* [PATCH 2/7] Have Power use add_active_range() and free_area_init_nodes()
From: Mel Gorman @ 2006-05-01 13:36 UTC (permalink / raw)
To: akpm, davej, tony.luck, linuxppc-dev, linux-kernel, bob.picco, ak,
linux-mm
Cc: Mel Gorman
In-Reply-To: <20060501133530.6379.66000.sendpatchset@skynet>
Size zones and holes in an architecture independent manner for Power.
This has been boot tested on PPC64 with NUMA both enabled and disabled. It
has been compile tested for an older CHRP-based machine.
powerpc/Kconfig | 13 ++--
powerpc/mm/mem.c | 53 ++++++----------
powerpc/mm/numa.c | 157 ++++---------------------------------------------
ppc/Kconfig | 3
ppc/mm/init.c | 26 ++++----
5 files changed, 62 insertions(+), 190 deletions(-)
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/Kconfig linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/Kconfig
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/Kconfig 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/Kconfig 2006-05-01 11:40:07.000000000 +0100
@@ -676,11 +676,16 @@ config ARCH_SPARSEMEM_DEFAULT
def_bool y
depends on SMP && PPC_PSERIES
-source "mm/Kconfig"
-
-config HAVE_ARCH_EARLY_PFN_TO_NID
+config ARCH_POPULATES_NODE_MAP
def_bool y
- depends on NEED_MULTIPLE_NODES
+
+# Value of 256 is MAX_LMB_REGIONS * 2
+config MAX_ACTIVE_REGIONS
+ int
+ default 256
+ depends on ARCH_POPULATES_NODE_MAP
+
+source "mm/Kconfig"
config ARCH_MEMORY_PROBE
def_bool y
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/mm/mem.c linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/mm/mem.c
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/mm/mem.c 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/mm/mem.c 2006-05-01 11:40:07.000000000 +0100
@@ -257,20 +257,22 @@ void __init do_init_bootmem(void)
boot_mapsize = init_bootmem(start >> PAGE_SHIFT, total_pages);
+ /* Add active regions with valid PFNs */
+ for (i = 0; i < lmb.memory.cnt; i++) {
+ unsigned long start_pfn, end_pfn;
+ start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
+ end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+ add_active_range(0, start_pfn, end_pfn);
+ }
+
/* Add all physical memory to the bootmem map, mark each area
* present.
*/
- for (i = 0; i < lmb.memory.cnt; i++) {
- unsigned long base = lmb.memory.region[i].base;
- unsigned long size = lmb_size_bytes(&lmb.memory, i);
#ifdef CONFIG_HIGHMEM
- if (base >= total_lowmem)
- continue;
- if (base + size > total_lowmem)
- size = total_lowmem - base;
+ free_bootmem_with_active_regions(0, total_lowmem >> PAGE_SHIFT);
+#else
+ free_bootmem_with_active_regions(0, max_pfn);
#endif
- free_bootmem(base, size);
- }
/* reserve the sections we're already using */
for (i = 0; i < lmb.reserved.cnt; i++)
@@ -278,9 +280,8 @@ void __init do_init_bootmem(void)
lmb_size_bytes(&lmb.reserved, i));
/* XXX need to clip this if using highmem? */
- for (i = 0; i < lmb.memory.cnt; i++)
- memory_present(0, lmb_start_pfn(&lmb.memory, i),
- lmb_end_pfn(&lmb.memory, i));
+ sparse_memory_present_with_active_regions(0);
+
init_bootmem_done = 1;
}
@@ -289,8 +290,6 @@ void __init do_init_bootmem(void)
*/
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES];
- unsigned long zholes_size[MAX_NR_ZONES];
unsigned long total_ram = lmb_phys_mem_size();
unsigned long top_of_ram = lmb_end_of_DRAM();
@@ -308,26 +307,18 @@ void __init paging_init(void)
top_of_ram, total_ram);
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
- /*
- * All pages are DMA-able so we put them all in the DMA zone.
- */
- memset(zones_size, 0, sizeof(zones_size));
- memset(zholes_size, 0, sizeof(zholes_size));
-
- zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
- zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
-
#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
- zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
- zholes_size[ZONE_HIGHMEM] = (top_of_ram - total_ram) >> PAGE_SHIFT;
+ free_area_init_nodes(total_lowmem >> PAGE_SHIFT,
+ total_lowmem >> PAGE_SHIFT,
+ total_lowmem >> PAGE_SHIFT,
+ top_of_ram >> PAGE_SHIFT);
#else
- zones_size[ZONE_DMA] = top_of_ram >> PAGE_SHIFT;
- zholes_size[ZONE_DMA] = (top_of_ram - total_ram) >> PAGE_SHIFT;
-#endif /* CONFIG_HIGHMEM */
+ free_area_init_nodes(top_of_ram >> PAGE_SHIFT,
+ top_of_ram >> PAGE_SHIFT,
+ top_of_ram >> PAGE_SHIFT,
+ top_of_ram >> PAGE_SHIFT);
+#endif
- free_area_init_node(0, NODE_DATA(0), zones_size,
- __pa(PAGE_OFFSET) >> PAGE_SHIFT, zholes_size);
}
#endif /* ! CONFIG_NEED_MULTIPLE_NODES */
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/mm/numa.c linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/mm/numa.c
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/powerpc/mm/numa.c 2006-05-01 11:36:58.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/powerpc/mm/numa.c 2006-05-01 11:40:07.000000000 +0100
@@ -39,96 +39,6 @@ static bootmem_data_t __initdata plat_no
static int min_common_depth;
static int n_mem_addr_cells, n_mem_size_cells;
-/*
- * We need somewhere to store start/end/node for each region until we have
- * allocated the real node_data structures.
- */
-#define MAX_REGIONS (MAX_LMB_REGIONS*2)
-static struct {
- unsigned long start_pfn;
- unsigned long end_pfn;
- int nid;
-} init_node_data[MAX_REGIONS] __initdata;
-
-int __init early_pfn_to_nid(unsigned long pfn)
-{
- unsigned int i;
-
- for (i = 0; init_node_data[i].end_pfn; i++) {
- unsigned long start_pfn = init_node_data[i].start_pfn;
- unsigned long end_pfn = init_node_data[i].end_pfn;
-
- if ((start_pfn <= pfn) && (pfn < end_pfn))
- return init_node_data[i].nid;
- }
-
- return -1;
-}
-
-void __init add_region(unsigned int nid, unsigned long start_pfn,
- unsigned long pages)
-{
- unsigned int i;
-
- dbg("add_region nid %d start_pfn 0x%lx pages 0x%lx\n",
- nid, start_pfn, pages);
-
- for (i = 0; init_node_data[i].end_pfn; i++) {
- if (init_node_data[i].nid != nid)
- continue;
- if (init_node_data[i].end_pfn == start_pfn) {
- init_node_data[i].end_pfn += pages;
- return;
- }
- if (init_node_data[i].start_pfn == (start_pfn + pages)) {
- init_node_data[i].start_pfn -= pages;
- return;
- }
- }
-
- /*
- * Leave last entry NULL so we dont iterate off the end (we use
- * entry.end_pfn to terminate the walk).
- */
- if (i >= (MAX_REGIONS - 1)) {
- printk(KERN_ERR "WARNING: too many memory regions in "
- "numa code, truncating\n");
- return;
- }
-
- init_node_data[i].start_pfn = start_pfn;
- init_node_data[i].end_pfn = start_pfn + pages;
- init_node_data[i].nid = nid;
-}
-
-/* We assume init_node_data has no overlapping regions */
-void __init get_region(unsigned int nid, unsigned long *start_pfn,
- unsigned long *end_pfn, unsigned long *pages_present)
-{
- unsigned int i;
-
- *start_pfn = -1UL;
- *end_pfn = *pages_present = 0;
-
- for (i = 0; init_node_data[i].end_pfn; i++) {
- if (init_node_data[i].nid != nid)
- continue;
-
- *pages_present += init_node_data[i].end_pfn -
- init_node_data[i].start_pfn;
-
- if (init_node_data[i].start_pfn < *start_pfn)
- *start_pfn = init_node_data[i].start_pfn;
-
- if (init_node_data[i].end_pfn > *end_pfn)
- *end_pfn = init_node_data[i].end_pfn;
- }
-
- /* We didnt find a matching region, return start/end as 0 */
- if (*start_pfn == -1UL)
- *start_pfn = 0;
-}
-
static void __cpuinit map_cpu_to_node(int cpu, int node)
{
numa_cpu_lookup_table[cpu] = node;
@@ -471,8 +381,8 @@ new_range:
continue;
}
- add_region(nid, start >> PAGE_SHIFT,
- size >> PAGE_SHIFT);
+ add_active_range(nid, start >> PAGE_SHIFT,
+ (start >> PAGE_SHIFT) + (size >> PAGE_SHIFT));
if (--ranges)
goto new_range;
@@ -485,6 +395,7 @@ static void __init setup_nonnuma(void)
{
unsigned long top_of_ram = lmb_end_of_DRAM();
unsigned long total_ram = lmb_phys_mem_size();
+ unsigned long start_pfn, end_pfn;
unsigned int i;
printk(KERN_DEBUG "Top of RAM: 0x%lx, Total RAM: 0x%lx\n",
@@ -492,9 +403,11 @@ static void __init setup_nonnuma(void)
printk(KERN_DEBUG "Memory hole size: %ldMB\n",
(top_of_ram - total_ram) >> 20);
- for (i = 0; i < lmb.memory.cnt; ++i)
- add_region(0, lmb.memory.region[i].base >> PAGE_SHIFT,
- lmb_size_pages(&lmb.memory, i));
+ for (i = 0; i < lmb.memory.cnt; ++i) {
+ start_pfn = lmb.memory.region[i].base >> PAGE_SHIFT;
+ end_pfn = start_pfn + lmb_size_pages(&lmb.memory, i);
+ add_active_range(0, start_pfn, end_pfn);
+ }
node_set_online(0);
}
@@ -632,11 +545,11 @@ void __init do_init_bootmem(void)
(void *)(unsigned long)boot_cpuid);
for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn, pages_present;
+ unsigned long start_pfn, end_pfn;
unsigned long bootmem_paddr;
unsigned long bootmap_pages;
- get_region(nid, &start_pfn, &end_pfn, &pages_present);
+ get_pfn_range_for_nid(nid, &start_pfn, &end_pfn);
/* Allocate the node structure node local if possible */
NODE_DATA(nid) = careful_allocation(nid,
@@ -669,19 +582,7 @@ void __init do_init_bootmem(void)
init_bootmem_node(NODE_DATA(nid), bootmem_paddr >> PAGE_SHIFT,
start_pfn, end_pfn);
- /* Add free regions on this node */
- for (i = 0; init_node_data[i].end_pfn; i++) {
- unsigned long start, end;
-
- if (init_node_data[i].nid != nid)
- continue;
-
- start = init_node_data[i].start_pfn << PAGE_SHIFT;
- end = init_node_data[i].end_pfn << PAGE_SHIFT;
-
- dbg("free_bootmem %lx %lx\n", start, end - start);
- free_bootmem_node(NODE_DATA(nid), start, end - start);
- }
+ free_bootmem_with_active_regions(nid, end_pfn);
/* Mark reserved regions on this node */
for (i = 0; i < lmb.reserved.cnt; i++) {
@@ -712,44 +613,14 @@ void __init do_init_bootmem(void)
}
}
- /* Add regions into sparsemem */
- for (i = 0; init_node_data[i].end_pfn; i++) {
- unsigned long start, end;
-
- if (init_node_data[i].nid != nid)
- continue;
-
- start = init_node_data[i].start_pfn;
- end = init_node_data[i].end_pfn;
-
- memory_present(nid, start, end);
- }
+ sparse_memory_present_with_active_regions(nid);
}
}
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES];
- unsigned long zholes_size[MAX_NR_ZONES];
- int nid;
-
- memset(zones_size, 0, sizeof(zones_size));
- memset(zholes_size, 0, sizeof(zholes_size));
-
- for_each_online_node(nid) {
- unsigned long start_pfn, end_pfn, pages_present;
-
- get_region(nid, &start_pfn, &end_pfn, &pages_present);
-
- zones_size[ZONE_DMA] = end_pfn - start_pfn;
- zholes_size[ZONE_DMA] = zones_size[ZONE_DMA] - pages_present;
-
- dbg("free_area_init node %d %lx %lx (hole: %lx)\n", nid,
- zones_size[ZONE_DMA], start_pfn, zholes_size[ZONE_DMA]);
-
- free_area_init_node(nid, NODE_DATA(nid), zones_size, start_pfn,
- zholes_size);
- }
+ unsigned long end_pfn = lmb_end_of_DRAM() >> PAGE_SHIFT;
+ free_area_init_nodes(end_pfn, end_pfn, end_pfn, end_pfn);
}
static int __init early_numa(char *p)
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/ppc/Kconfig linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/ppc/Kconfig
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/ppc/Kconfig 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/ppc/Kconfig 2006-05-01 11:40:07.000000000 +0100
@@ -949,6 +949,9 @@ config NR_CPUS
config HIGHMEM
bool "High memory support"
+config ARCH_POPULATES_NODE_MAP
+ def_bool y
+
source kernel/Kconfig.hz
source kernel/Kconfig.preempt
source "mm/Kconfig"
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/ppc/mm/init.c linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/ppc/mm/init.c
--- linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/arch/ppc/mm/init.c 2006-04-27 03:19:25.000000000 +0100
+++ linux-2.6.17-rc3-mm1-102-powerpc_use_init_nodes/arch/ppc/mm/init.c 2006-05-01 11:40:07.000000000 +0100
@@ -359,8 +359,7 @@ void __init do_init_bootmem(void)
*/
void __init paging_init(void)
{
- unsigned long zones_size[MAX_NR_ZONES], i;
-
+ unsigned long start_pfn, end_pfn;
#ifdef CONFIG_HIGHMEM
map_page(PKMAP_BASE, 0, 0); /* XXX gross */
pkmap_page_table = pte_offset_kernel(pmd_offset(pgd_offset_k
@@ -370,19 +369,22 @@ void __init paging_init(void)
(KMAP_FIX_BEGIN), KMAP_FIX_BEGIN), KMAP_FIX_BEGIN);
kmap_prot = PAGE_KERNEL;
#endif /* CONFIG_HIGHMEM */
-
- /*
- * All pages are DMA-able so we put them all in the DMA zone.
- */
- zones_size[ZONE_DMA] = total_lowmem >> PAGE_SHIFT;
- for (i = 1; i < MAX_NR_ZONES; i++)
- zones_size[i] = 0;
+ /* All pages are DMA-able so we put them all in the DMA zone. */
+ start_pfn = __pa(PAGE_OFFSET) >> PAGE_SHIFT;
+ end_pfn = start_pfn + (total_memory >> PAGE_SHIFT);
+ add_active_range(0, start_pfn, end_pfn);
#ifdef CONFIG_HIGHMEM
- zones_size[ZONE_HIGHMEM] = (total_memory - total_lowmem) >> PAGE_SHIFT;
+ free_area_init_nodes(total_lowmem >> PAGE_SHIFT,
+ total_lowmem >> PAGE_SHIFT,
+ total_lowmem >> PAGE_SHIFT,
+ total_memory >> PAGE_SHIFT);
+#else
+ free_area_init_nodes(total_memory >> PAGE_SHIFT,
+ total_memory >> PAGE_SHIFT,
+ total_memory >> PAGE_SHIFT,
+ total_memory >> PAGE_SHIFT);
#endif /* CONFIG_HIGHMEM */
-
- free_area_init(zones_size);
}
void __init mem_init(void)
^ permalink raw reply
* [PATCH 1/7] Introduce mechanism for registering active regions of memory
From: Mel Gorman @ 2006-05-01 13:35 UTC (permalink / raw)
To: akpm, davej, tony.luck, linux-mm, linux-kernel, bob.picco, ak,
linuxppc-dev
Cc: Mel Gorman
In-Reply-To: <20060501133530.6379.66000.sendpatchset@skynet>
This patch defines the structure to represent an active range of page
frames within a node in an architecture independent manner. Architectures
are expected to register active ranges of PFNs using add_active_range(nid,
start_pfn, end_pfn) and call free_area_init_nodes() passing the PFNs of
the end of each zone.
include/linux/mm.h | 34 +++
include/linux/mmzone.h | 10 -
mm/page_alloc.c | 402 +++++++++++++++++++++++++++++++++++++++++---
3 files changed, 421 insertions(+), 25 deletions(-)
Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-clean/include/linux/mm.h linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/include/linux/mm.h
--- linux-2.6.17-rc3-mm1-clean/include/linux/mm.h 2006-05-01 11:37:01.000000000 +0100
+++ linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/include/linux/mm.h 2006-05-01 11:39:02.000000000 +0100
@@ -916,6 +916,40 @@ extern void free_area_init(unsigned long
extern void free_area_init_node(int nid, pg_data_t *pgdat,
unsigned long * zones_size, unsigned long zone_start_pfn,
unsigned long *zholes_size);
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/*
+ * Any architecture that supports CONFIG_ARCH_POPULATES_NODE_MAP can
+ * initialise zone and hole information by
+ *
+ * for_all_memory_regions()
+ * add_active_range(nid, start, end)
+ * free_area_init_nodes(max_dma, max_dma32, max_low_pfn, max_pfn);
+ *
+ * Optionally, free_bootmem_with_active_regions() can be used to call
+ * free_bootmem_node() after active regions have been registered with
+ * add_active_range(). Similarly, sparse_memory_present_with_active_regions()
+ * calls memory_present() for active regions when SPARSEMEM is enabled
+ */
+extern void free_area_init_nodes(unsigned long max_dma_pfn,
+ unsigned long max_dma32_pfn,
+ unsigned long max_low_pfn,
+ unsigned long max_high_pfn);
+extern void add_active_range(unsigned int nid, unsigned long start_pfn,
+ unsigned long end_pfn);
+extern void shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
+ unsigned long new_end_pfn);
+extern void remove_all_active_ranges(void);
+extern unsigned long absent_pages_in_range(unsigned long start_pfn,
+ unsigned long end_pfn);
+extern void get_pfn_range_for_nid(unsigned int nid,
+ unsigned long *start_pfn, unsigned long *end_pfn);
+extern unsigned long find_min_pfn_with_active_regions(void);
+extern unsigned long find_max_pfn_with_active_regions(void);
+extern int early_pfn_to_nid(unsigned long pfn);
+extern void free_bootmem_with_active_regions(int nid,
+ unsigned long max_low_pfn);
+extern void sparse_memory_present_with_active_regions(int nid);
+#endif
extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long);
extern void setup_per_zone_pages_min(void);
extern void mem_init(void);
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-clean/include/linux/mmzone.h linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/include/linux/mmzone.h
--- linux-2.6.17-rc3-mm1-clean/include/linux/mmzone.h 2006-05-01 11:37:01.000000000 +0100
+++ linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/include/linux/mmzone.h 2006-05-01 11:39:02.000000000 +0100
@@ -271,6 +271,13 @@ struct zonelist {
struct zone *zones[MAX_NUMNODES * MAX_NR_ZONES + 1]; // NULL delimited
};
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+struct node_active_region {
+ unsigned long start_pfn;
+ unsigned long end_pfn;
+ int nid;
+};
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
/*
* The pg_data_t structure is used in machines with CONFIG_DISCONTIGMEM
@@ -468,7 +475,8 @@ extern struct zone *next_zone(struct zon
#endif
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+#if !defined(CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID) && \
+ !defined(CONFIG_ARCH_POPULATES_NODE_MAP)
#define early_pfn_to_nid(nid) (0UL)
#endif
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc3-mm1-clean/mm/page_alloc.c linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/mm/page_alloc.c
--- linux-2.6.17-rc3-mm1-clean/mm/page_alloc.c 2006-05-01 11:37:01.000000000 +0100
+++ linux-2.6.17-rc3-mm1-101-add_free_area_init_nodes/mm/page_alloc.c 2006-05-01 11:39:02.000000000 +0100
@@ -38,6 +38,8 @@
#include <linux/vmalloc.h>
#include <linux/mempolicy.h>
#include <linux/stop_machine.h>
+#include <linux/sort.h>
+#include <linux/pfn.h>
#include <asm/tlbflush.h>
#include "internal.h"
@@ -86,6 +88,18 @@ int min_free_kbytes = 1024;
unsigned long __meminitdata nr_kernel_pages;
unsigned long __meminitdata nr_all_pages;
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+ #ifdef CONFIG_MAX_ACTIVE_REGIONS
+ #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
+ #else
+ #define MAX_ACTIVE_REGIONS (MAX_NR_ZONES * MAX_NUMNODES + 1)
+ #endif
+
+ struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS];
+ unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
+ unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
#ifdef CONFIG_DEBUG_VM
static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
{
@@ -1864,25 +1878,6 @@ static inline unsigned long wait_table_b
#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
-static void __init calculate_zone_totalpages(struct pglist_data *pgdat,
- unsigned long *zones_size, unsigned long *zholes_size)
-{
- unsigned long realtotalpages, totalpages = 0;
- int i;
-
- for (i = 0; i < MAX_NR_ZONES; i++)
- totalpages += zones_size[i];
- pgdat->node_spanned_pages = totalpages;
-
- realtotalpages = totalpages;
- if (zholes_size)
- for (i = 0; i < MAX_NR_ZONES; i++)
- realtotalpages -= zholes_size[i];
- pgdat->node_present_pages = realtotalpages;
- printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id, realtotalpages);
-}
-
-
/*
* Initially all pages are reserved - free ones are freed
* up by free_all_bootmem() once the early boot process is
@@ -2200,6 +2195,215 @@ __meminit int init_currently_empty_zone(
return 0;
}
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/* Note: nid == MAX_NUMNODES returns first region */
+static int __init first_active_region_index_in_nid(int nid)
+{
+ int i;
+ for (i = 0; early_node_map[i].end_pfn; i++) {
+ if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
+ return i;
+ }
+
+ return MAX_ACTIVE_REGIONS;
+}
+
+/* Note: nid == MAX_NUMNODES returns next region */
+static int __init next_active_region_index_in_nid(unsigned int index, int nid)
+{
+ for (index = index + 1; early_node_map[index].end_pfn; index++) {
+ if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
+ return index;
+ }
+
+ return MAX_ACTIVE_REGIONS;
+}
+
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+int __init early_pfn_to_nid(unsigned long pfn)
+{
+ int i;
+
+ for (i = 0; early_node_map[i].end_pfn; i++) {
+ unsigned long start_pfn = early_node_map[i].start_pfn;
+ unsigned long end_pfn = early_node_map[i].end_pfn;
+
+ if ((start_pfn <= pfn) && (pfn < end_pfn))
+ return early_node_map[i].nid;
+ }
+
+ return -1;
+}
+#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
+
+#define for_each_active_range_index_in_nid(i, nid) \
+ for (i = first_active_region_index_in_nid(nid); \
+ i != MAX_ACTIVE_REGIONS; \
+ i = next_active_region_index_in_nid(i, nid))
+
+void __init free_bootmem_with_active_regions(int nid,
+ unsigned long max_low_pfn)
+{
+ unsigned int i;
+ for_each_active_range_index_in_nid(i, nid) {
+ unsigned long size_pages = 0;
+ unsigned long end_pfn = early_node_map[i].end_pfn;
+ if (early_node_map[i].start_pfn >= max_low_pfn)
+ continue;
+
+ if (end_pfn > max_low_pfn)
+ end_pfn = max_low_pfn;
+
+ size_pages = end_pfn - early_node_map[i].start_pfn;
+ free_bootmem_node(NODE_DATA(early_node_map[i].nid),
+ PFN_PHYS(early_node_map[i].start_pfn),
+ size_pages << PAGE_SHIFT);
+ }
+}
+
+void __init sparse_memory_present_with_active_regions(int nid)
+{
+ unsigned int i;
+ for_each_active_range_index_in_nid(i, nid)
+ memory_present(early_node_map[i].nid,
+ early_node_map[i].start_pfn,
+ early_node_map[i].end_pfn);
+}
+
+void __init get_pfn_range_for_nid(unsigned int nid,
+ unsigned long *start_pfn, unsigned long *end_pfn)
+{
+ unsigned int i;
+ *start_pfn = -1UL;
+ *end_pfn = 0;
+
+ for_each_active_range_index_in_nid(i, nid) {
+ *start_pfn = min(*start_pfn, early_node_map[i].start_pfn);
+ *end_pfn = max(*end_pfn, early_node_map[i].end_pfn);
+ }
+
+ if (*start_pfn == -1UL) {
+ printk(KERN_WARNING "Node %u active with no memory\n", nid);
+ *start_pfn = 0;
+ }
+}
+
+unsigned long __init zone_present_pages_in_node(int nid,
+ unsigned long zone_type,
+ unsigned long *ignored)
+{
+ unsigned long node_start_pfn, node_end_pfn;
+ unsigned long zone_start_pfn, zone_end_pfn;
+
+ /* Get the start and end of the node and zone */
+ get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
+ zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
+ zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
+
+ /* Check that this node has pages within the zone's required range */
+ if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
+ return 0;
+
+ /* Move the zone boundaries inside the node if necessary */
+ zone_end_pfn = min(zone_end_pfn, node_end_pfn);
+ zone_start_pfn = max(zone_start_pfn, node_start_pfn);
+
+ /* Return the spanned pages */
+ return zone_end_pfn - zone_start_pfn;
+}
+
+unsigned long __init __absent_pages_in_range(int nid,
+ unsigned long range_start_pfn,
+ unsigned long range_end_pfn)
+{
+ int i = 0;
+ unsigned long prev_end_pfn = 0, hole_pages = 0;
+ unsigned long start_pfn;
+
+ /* Find the end_pfn of the first active range of pfns in the node */
+ i = first_active_region_index_in_nid(nid);
+ if (i == MAX_ACTIVE_REGIONS)
+ return 0;
+ prev_end_pfn = early_node_map[i].start_pfn;
+
+ /* Find all holes for the zone within the node */
+ for (; i != MAX_ACTIVE_REGIONS;
+ i = next_active_region_index_in_nid(i, nid)) {
+
+ /* No need to continue if prev_end_pfn is outside the zone */
+ if (prev_end_pfn >= range_end_pfn)
+ break;
+
+ /* Make sure the end of the zone is not within the hole */
+ start_pfn = min(early_node_map[i].start_pfn, range_end_pfn);
+ prev_end_pfn = max(prev_end_pfn, range_start_pfn);
+
+ /* Update the hole size cound and move on */
+ if (start_pfn > range_start_pfn) {
+ BUG_ON(prev_end_pfn > start_pfn);
+ hole_pages += start_pfn - prev_end_pfn;
+ }
+ prev_end_pfn = early_node_map[i].end_pfn;
+ }
+
+ return hole_pages;
+}
+
+unsigned long __init absent_pages_in_range(unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn);
+}
+
+unsigned long __init zone_absent_pages_in_node(int nid,
+ unsigned long zone_type,
+ unsigned long *ignored)
+{
+ return __absent_pages_in_range(nid,
+ arch_zone_lowest_possible_pfn[zone_type],
+ arch_zone_highest_possible_pfn[zone_type]);
+}
+#else
+static inline unsigned long zone_present_pages_in_node(int nid,
+ unsigned long zone_type,
+ unsigned long *zones_size)
+{
+ return zones_size[zone_type];
+}
+
+static inline unsigned long zone_absent_pages_in_node(int nid,
+ unsigned long zone_type,
+ unsigned long *zholes_size)
+{
+ if (!zholes_size)
+ return 0;
+
+ return zholes_size[zone_type];
+}
+#endif
+
+static void __init calculate_node_totalpages(struct pglist_data *pgdat,
+ unsigned long *zones_size, unsigned long *zholes_size)
+{
+ unsigned long realtotalpages, totalpages = 0;
+ int i;
+
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ totalpages += zone_present_pages_in_node(pgdat->node_id, i,
+ zones_size);
+ }
+ pgdat->node_spanned_pages = totalpages;
+
+ realtotalpages = totalpages;
+ for (i = 0; i < MAX_NR_ZONES; i++) {
+ realtotalpages -=
+ zone_absent_pages_in_node(pgdat->node_id, i, zholes_size);
+ }
+ pgdat->node_present_pages = realtotalpages;
+ printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
+ realtotalpages);
+}
+
/*
* Set up the zone data structures:
* - mark all pages reserved
@@ -2223,10 +2427,9 @@ static void __meminit free_area_init_cor
struct zone *zone = pgdat->node_zones + j;
unsigned long size, realsize;
- realsize = size = zones_size[j];
- if (zholes_size)
- realsize -= zholes_size[j];
-
+ size = zone_present_pages_in_node(nid, j, zones_size);
+ realsize = size - zone_absent_pages_in_node(nid, j,
+ zholes_size);
if (j < ZONE_HIGHMEM)
nr_kernel_pages += realsize;
nr_all_pages += realsize;
@@ -2294,13 +2497,164 @@ void __meminit free_area_init_node(int n
{
pgdat->node_id = nid;
pgdat->node_start_pfn = node_start_pfn;
- calculate_zone_totalpages(pgdat, zones_size, zholes_size);
+ calculate_node_totalpages(pgdat, zones_size, zholes_size);
alloc_node_mem_map(pgdat);
free_area_init_core(pgdat, zones_size, zholes_size);
}
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+void __init add_active_range(unsigned int nid, unsigned long start_pfn,
+ unsigned long end_pfn)
+{
+ unsigned int i;
+
+ /* Merge with existing active regions if possible */
+ for (i = 0; early_node_map[i].end_pfn; i++) {
+ if (early_node_map[i].nid != nid)
+ continue;
+
+ /* Skip if an existing region covers this new one */
+ if (start_pfn >= early_node_map[i].start_pfn &&
+ end_pfn <= early_node_map[i].end_pfn)
+ return;
+
+ /* Merge forward if suitable */
+ if (start_pfn <= early_node_map[i].end_pfn &&
+ end_pfn > early_node_map[i].end_pfn) {
+ early_node_map[i].end_pfn = end_pfn;
+ return;
+ }
+
+ /* Merge backward if suitable */
+ if (start_pfn < early_node_map[i].end_pfn &&
+ end_pfn >= early_node_map[i].start_pfn) {
+ early_node_map[i].start_pfn = start_pfn;
+ return;
+ }
+ }
+
+ /* Leave last entry NULL, we use range.end_pfn to terminate the walk */
+ if (i >= MAX_ACTIVE_REGIONS - 1) {
+ printk(KERN_ERR "Too many memory regions, truncating\n");
+ return;
+ }
+
+ early_node_map[i].nid = nid;
+ early_node_map[i].start_pfn = start_pfn;
+ early_node_map[i].end_pfn = end_pfn;
+}
+
+void __init shrink_active_range(unsigned int nid, unsigned long old_end_pfn,
+ unsigned long new_end_pfn)
+{
+ unsigned int i;
+
+ /* Find the old active region end and shrink */
+ for_each_active_range_index_in_nid(i, nid) {
+ if (early_node_map[i].end_pfn == old_end_pfn) {
+ early_node_map[i].end_pfn = new_end_pfn;
+ break;
+ }
+ }
+}
+
+void __init remove_all_active_ranges()
+{
+ memset(early_node_map, 0, sizeof(early_node_map));
+}
+
+/* Compare two active node_active_regions */
+static int __init cmp_node_active_region(const void *a, const void *b)
+{
+ struct node_active_region *arange = (struct node_active_region *)a;
+ struct node_active_region *brange = (struct node_active_region *)b;
+
+ /* Done this way to avoid overflows */
+ if (arange->start_pfn > brange->start_pfn)
+ return 1;
+ if (arange->start_pfn < brange->start_pfn)
+ return -1;
+
+ return 0;
+}
+
+/* sort the node_map by start_pfn */
+static void __init sort_node_map(void)
+{
+ size_t num = 0;
+ while (early_node_map[num].end_pfn)
+ num++;
+
+ sort(early_node_map, num, sizeof(struct node_active_region),
+ cmp_node_active_region, NULL);
+}
+
+/* Find the lowest pfn for a node. This depends on a sorted early_node_map */
+unsigned long __init find_min_pfn_for_node(unsigned long nid)
+{
+ int i;
+
+ /* Assuming a sorted map, the first range found has the starting pfn */
+ for_each_active_range_index_in_nid(i, nid)
+ return early_node_map[i].start_pfn;
+
+ printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid);
+ return 0;
+}
+
+unsigned long __init find_min_pfn_with_active_regions(void)
+{
+ return find_min_pfn_for_node(MAX_NUMNODES);
+}
+
+unsigned long __init find_max_pfn_with_active_regions(void)
+{
+ int i;
+ unsigned long max_pfn = 0;
+
+ for (i = 0; early_node_map[i].end_pfn; i++)
+ max_pfn = max(max_pfn, early_node_map[i].end_pfn);
+
+ return max_pfn;
+}
+
+void __init free_area_init_nodes(unsigned long arch_max_dma_pfn,
+ unsigned long arch_max_dma32_pfn,
+ unsigned long arch_max_low_pfn,
+ unsigned long arch_max_high_pfn)
+{
+ unsigned long nid;
+ int zone_index;
+
+ /* Record where the zone boundaries are */
+ memset(arch_zone_lowest_possible_pfn, 0,
+ sizeof(arch_zone_lowest_possible_pfn));
+ memset(arch_zone_highest_possible_pfn, 0,
+ sizeof(arch_zone_highest_possible_pfn));
+ arch_zone_lowest_possible_pfn[ZONE_DMA] =
+ find_min_pfn_with_active_regions();
+ arch_zone_highest_possible_pfn[ZONE_DMA] = arch_max_dma_pfn;
+ arch_zone_highest_possible_pfn[ZONE_DMA32] = arch_max_dma32_pfn;
+ arch_zone_highest_possible_pfn[ZONE_NORMAL] = arch_max_low_pfn;
+ arch_zone_highest_possible_pfn[ZONE_HIGHMEM] = arch_max_high_pfn;
+ for (zone_index = 1; zone_index < MAX_NR_ZONES; zone_index++) {
+ arch_zone_lowest_possible_pfn[zone_index] =
+ arch_zone_highest_possible_pfn[zone_index-1];
+ }
+
+ /* Regions in the early_node_map can be in any order */
+ sort_node_map();
+
+ for_each_online_node(nid) {
+ pg_data_t *pgdat = NODE_DATA(nid);
+ free_area_init_node(nid, pgdat, NULL,
+ find_min_pfn_for_node(nid), NULL);
+ }
+}
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
#ifndef CONFIG_NEED_MULTIPLE_NODES
static bootmem_data_t contig_bootmem_data;
struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
^ permalink raw reply
* [PATCH 0/7] Sizing zones and holes in an architecture independent manner V5
From: Mel Gorman @ 2006-05-01 13:35 UTC (permalink / raw)
To: akpm, davej, tony.luck, linuxppc-dev, linux-kernel, bob.picco, ak,
linux-mm
Cc: Mel Gorman
This is V5 of the patchset to size zones and memory holes in
an architecture-independent manner. This has been rebased against
2.6.17-rc3-mm1 and as there were no objections against release V4, I would
like to have it considered for merging. If there are merge conflicts with
later trees, let me know what to rebase against.
The reasons why I'd like to this merged include;
o Less architecture-specific code - particularly for x86 and ppc64
o More maintainable. Changes to zone layout need only be made in one place
o Zone-sizing and memory hole calculation is one less job that needs to be
done for new architecture ports
o With the architecture-independent representation, zone-based
anti-fragmentation needs a lot less architecture-specific code making it
more portable between architectures. This will be important for future
hugepage-availability work
o Nigel Cunningham has stated that that software suspend could potentially
use the architecture-independent representation to discover what pages
need to be saved during suspend
Changelog since V4
o Rebase to 2.6.17-rc3-mm1
o Calculate holes on x86 with SRAT correctly
Changelog since V3
o Rebase to 2.6.17-rc2
o Allow the active regions to be cleared. Needed by x86_64 when it decides
the SRAT table is bad half way through the registering of active regions
o Fix for flatmem x86_64 machines booting
Changelog since V2
o Fix a bug where holes in lower zones get double counted
o Catch the case where a new range is registered that is within an range
o Catch the case where a zone boundary is within a hole
o Use the EFI map for registering ranges on x86_64+numa
o On IA64+NUMA, add the active ranges before rounding for granules
o On x86_64, remove e820_hole_size and e820_bootmem_free and use
arch-independent equivalents
o On x86_64, remove the map walk in e820_end_of_ram()
o Rename memory_present_with_active_regions, name ambiguous
o Add absent_pages_in_range() for arches to call
Changelog since V1
o Correctly convert virtual and physical addresses to PFNs on ia64
o Correctly convert physical addresses to PFN on older ppc
o When add_active_range() is called with overlapping pfn ranges, merge them
o When a zone boundary occurs within a memory hole, account correctly
o Minor whitespace damage cleanup
o Debugging patch temporarily included
At a basic level, architectures define structures to record where active
ranges of page frames are located. Once located, the code to calculate
zone sizes and holes in each architecture is very similar. Some of this
zone and hole sizing code is difficult to read for no good reason. This
set of patches eliminates the similar-looking architecture-specific code.
The patches introduce a mechanism where architectures register where the
active ranges of page frames are with add_active_range(). When all areas
have been discovered, free_area_init_nodes() is called to initialise
the pgdat and zones. The zone sizes and holes are then calculated in an
architecture independent manner.
Patch 1 introduces the mechanism for registering and initialising PFN ranges
Patch 2 changes ppc to use the mechanism - 128 arch-specific LOC removed
Patch 3 changes x86 to use the mechanism - 142 arch-specific LOC removed
Patch 4 changes x86_64 to use the mechanism - 94 arch-specific LOC removed
Patch 5 changes ia64 to use the mechanism - 57 arch-specific LOC removed
At this point, there is a reduction of 421 architecture-specific lines of code
and a net reduction of 25 lines. The arch-independent code is a lot easier
to read in comparison to some of the arch-specific stuff, particularly in
arch/i386/ .
For Patch 6, it was also noted that page_alloc.c has a *lot* of
initialisation code which makes the file harder to read than it needs to
be. Patch 6 creates a new file mem_init.c and moves a lot of initialisation
code from page_alloc.c to it. After the patch is applied, there is still a net
loss of 8 lines.
The patches have been successfully boot tested by me and verified that the
zones are the correct size on
o x86, flatmem with 1.5GiB of RAM
o x86, NUMAQ
o x86, NUMA, with SRAT
o x86 with SRAT CONFIG_NUMA=n
o PPC64, NUMA
o PPC64, CONFIG_NUMA=n
o Power, RS6000 (Had difficulty here with missing __udivdi3 symbol in pci_32.o)
o x86_64, NUMA with SRAT
o x86_64, NUMA with broken SRAT that falls back to k8topology discovery
o x86_64, ACPI_NUMA, ACPI_MEMORY_HOTPLUG && !SPARSEMEM to trigger the
hotadd path without sparsemem fun in srat.c (SRAT broken on test machine and
I'm pretty sure the machine does not support physical memory hotadd anyway
so test may not have been effective other than being a compile test.)
o x86_64, CONFIG_NUMA=n
o x86_64, AMD64 desktop machine with flatmem
Tony Luck has successfully tested for ia64 on Itanium with tiger_defconfig,
gensparse_defconfig and defconfig. Bob Picco has also tested and debugged
on IA64. Jack Steiner successfully boot tested on a mammoth SGI IA64-based
machine. These were on patches against 2.6.17-rc1 but there have been no
ia64-changes made between release 3 and 5 of these patches.
There are differences in the zone sizes for x86_64 as the arch-specific code
for x86_64 accounts the kernel image and the starting mem_maps as memory
holes but the architecture-independent code accounts the memory as present.
The net reduction seems small but the big benefit of this set of patches
is the reduction of 421 lines of architecture-specific code, some of
which is very hairy. There should be a greater net reduction when other
architectures use the same mechanisms for zone and hole sizing but I lack
the hardware to test on.
Comments?
Additional credit;
Dave Hansen for the initial suggestion and comments on early patches
Andy Whitcroft for reviewing early versions and catching numerous errors
Tony Luck for testing and debugging on IA64
Bob Picco for testing and fixing bugs related to pfn registration
Jack Steiner and Yasunori for testing on IA64
arch/i386/Kconfig | 8
arch/i386/kernel/setup.c | 19
arch/i386/kernel/srat.c | 101 ---
arch/i386/mm/discontig.c | 65 --
arch/ia64/Kconfig | 3
arch/ia64/mm/contig.c | 60 --
arch/ia64/mm/discontig.c | 41 -
arch/ia64/mm/init.c | 12
arch/powerpc/Kconfig | 13
arch/powerpc/mm/mem.c | 53 --
arch/powerpc/mm/numa.c | 157 ------
arch/ppc/Kconfig | 3
arch/ppc/mm/init.c | 26 -
arch/x86_64/Kconfig | 3
arch/x86_64/kernel/e820.c | 109 +---
arch/x86_64/kernel/setup.c | 7
arch/x86_64/mm/init.c | 62 --
arch/x86_64/mm/k8topology.c | 3
arch/x86_64/mm/numa.c | 18
arch/x86_64/mm/srat.c | 11
include/asm-ia64/meminit.h | 1
include/asm-x86_64/e820.h | 5
include/asm-x86_64/proto.h | 2
include/linux/mm.h | 34 +
include/linux/mmzone.h | 10
mm/Makefile | 2
mm/mem_init.c | 1121 ++++++++++++++++++++++++++++++++++++++++++
mm/page_alloc.c | 750 -----------------------------
--
--
Mel Gorman
Part-time Phd Student Linux Technology Center
University of Limerick IBM Dublin Software Lab
^ permalink raw reply
* Building custom 2.6 kernel
From: Konstantin Boyanov @ 2006-05-01 9:54 UTC (permalink / raw)
To: linuxppc-embedded
[-- Attachment #1: Type: text/plain, Size: 1728 bytes --]
Hi there,
These days I'm trying to compile and get working a 2.6 kernel for the
Motorola MVME6100 VME single board computer. For the purpose I'm using an
ELinOS IDE by the SysGo company (don't ask me why, it's the boss' decision).
Since ELinOS lacks support I post here with the hope of getting some help.
Thus far I have succeeded in only compiling a bare kernel but not getting it
up. There is a running RedHat distro on the board but I want to get a
customized cernel for it, because RedHat does not support certain stuff
(like VME drivers for example).
So, the kernel I compiled seems to be quite messy, as you can see from the
dmesg at startup:
The existing kernel >>
http://www-zeuthen.desy.de/~boyanov/dmesg_normal.txt<http://www-zeuthen.desy.de/%7Eboyanov/dmesg_normal.txt>
The new one >>
http://www-zeuthen.desy.de/~boyanov/dmesg_failed.txt<http://www-zeuthen.desy.de/%7Eboyanov/dmesg_failed.txt>
As you can see in the links above the kernel panics because of insufficient
"init" part of the command line passed to it. Another issue is the SCSI
hard-disk initialization and RAM disk creation. I forgot to mention, because
this is only a test kernel I just want basic functionality included, as I
tried to do so in the kernel configuration.
But nevertheless i failed to do so.
I'm actually only asking for some advice and tips (especially where to find
information on the web, if there is any) on the problems occuring, or if
someone has encountered such problems in the past and is willing to share
his/her experience.
I'm quite newbie to this stuff, and as such I'm really confused about what's
going on...
So any help and advice is appreciated.
Best regards,
Konstantin
[-- Attachment #2: Type: text/html, Size: 2034 bytes --]
^ permalink raw reply
* Re: [PATCH] convert powermac ide blink to new led infrastructure
From: Benjamin Herrenschmidt @ 2006-05-01 9:35 UTC (permalink / raw)
To: Johannes Berg; +Cc: linuxppc-dev list
In-Reply-To: <1146475513.24172.10.camel@localhost>
On Mon, 2006-05-01 at 11:25 +0200, Johannes Berg wrote:
> On Mon, 2006-05-01 at 19:10 +1000, Benjamin Herrenschmidt wrote:
>
> > Yes, there is a callback, that's the "done" function, but you can also
> > poll/check on request->complete though in this case, polling is not
> > recommended, the LED stuff should be as low latency as possible.
>
> I suppose then what I'll do is request a done callback, and when an LED
> update comes in while not done, just save it into a new 'next state
> should be' variable, which is compared when done so that at worst I
> loose events, but not the final status.
>
> Can I use pmu_request from within the done callback with the same
> structure? Probably not a good idea right? I can schedule_work then.
No need to schedule work, it should work fine to re-queue
Ben.
^ permalink raw reply
* Re: [PATCH] via-pmu: report powerbutton as proper input event
From: Benjamin Herrenschmidt @ 2006-05-01 9:35 UTC (permalink / raw)
To: Johannes Berg; +Cc: linuxppc-dev list
In-Reply-To: <1146475314.24172.7.camel@localhost>
On Mon, 2006-05-01 at 11:21 +0200, Johannes Berg wrote:
> On Mon, 2006-05-01 at 19:09 +1000, Benjamin Herrenschmidt wrote:
> > > What else does it report? As far as I know it doesn't have anything
> > > else, at least on my system.
> >
> > Depends on the machine... earlier ones have the backlight there or
> > volume control.. then LID close could be an event too...
>
> Ok, yea. Well, we need to rework all of that. Currently, to get at the
> lid status, you have to open /dev/adb and do something through that.
>
> I honestly don't understand the reason for this. Is it just historic?
Yes.
Ben.
^ permalink raw reply
* Re: [PATCH] convert powermac ide blink to new led infrastructure
From: Johannes Berg @ 2006-05-01 9:25 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev list
In-Reply-To: <1146474648.30710.48.camel@localhost.localdomain>
[-- Attachment #1: Type: text/plain, Size: 686 bytes --]
On Mon, 2006-05-01 at 19:10 +1000, Benjamin Herrenschmidt wrote:
> Yes, there is a callback, that's the "done" function, but you can also
> poll/check on request->complete though in this case, polling is not
> recommended, the LED stuff should be as low latency as possible.
I suppose then what I'll do is request a done callback, and when an LED
update comes in while not done, just save it into a new 'next state
should be' variable, which is compared when done so that at worst I
loose events, but not the final status.
Can I use pmu_request from within the done callback with the same
structure? Probably not a good idea right? I can schedule_work then.
johannes
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 793 bytes --]
^ permalink raw reply
* Re: [PATCH] via-pmu: report powerbutton as proper input event
From: Johannes Berg @ 2006-05-01 9:21 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev list
In-Reply-To: <1146474566.30710.45.camel@localhost.localdomain>
[-- Attachment #1: Type: text/plain, Size: 536 bytes --]
On Mon, 2006-05-01 at 19:09 +1000, Benjamin Herrenschmidt wrote:
> > What else does it report? As far as I know it doesn't have anything
> > else, at least on my system.
>
> Depends on the machine... earlier ones have the backlight there or
> volume control.. then LID close could be an event too...
Ok, yea. Well, we need to rework all of that. Currently, to get at the
lid status, you have to open /dev/adb and do something through that.
I honestly don't understand the reason for this. Is it just historic?
johannes
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 793 bytes --]
^ permalink raw reply
* Re: [PATCH] convert powermac ide blink to new led infrastructure
From: Benjamin Herrenschmidt @ 2006-05-01 9:10 UTC (permalink / raw)
To: Johannes Berg; +Cc: linuxppc-dev list
In-Reply-To: <1146473959.24172.4.camel@localhost>
On Mon, 2006-05-01 at 10:59 +0200, Johannes Berg wrote:
> On Mon, 2006-05-01 at 16:56 +1000, Benjamin Herrenschmidt wrote:
>
> > Hrm... you find out the hard way in general ... we'll have to be careful
> > with this one, maybe have the PMU driver stop the LED after the device
> > suspend dance or do a sysdev with a suspend/resume and make sure that
> > any pending LED request is complete. Also, your code isn't testing if
> > the PMU request is still in progress before possibly sending another
> > one ... that's a bit dangerous (and may cause the LED to get "stuck").
> > You can re-use a request once it's complete though. In addition, my PMU
> > code was written to minimize spinlock usage in the hot path... There
> > might be a way to do the same here.
>
> I don't really understand how the PMU works hence wasn't really sure
> what the whole request thing does. Do we get an interrupt when it
> completes or something? If so, could I get at that notification via the
> request structure?
Yes, there is a callback, that's the "done" function, but you can also
poll/check on request->complete though in this case, polling is not
recommended, the LED stuff should be as low latency as possible.
> I'll take a closer look and rework this, I think it's worthwhile even if
> it requires some more effort.
>
> johannes
^ permalink raw reply
* Re: [PATCH] via-pmu: report powerbutton as proper input event
From: Benjamin Herrenschmidt @ 2006-05-01 9:09 UTC (permalink / raw)
To: Johannes Berg; +Cc: linuxppc-dev list
In-Reply-To: <1146473774.24172.2.camel@localhost>
On Mon, 2006-05-01 at 10:56 +0200, Johannes Berg wrote:
> On Mon, 2006-05-01 at 16:58 +1000, Benjamin Herrenschmidt wrote:
> > On Fri, 2006-04-28 at 21:15 +0200, Johannes Berg wrote:
> > > This patch adds an input device for the power button so that userspace gets
> > > notified about the user pressing it via the standard input layer.
> >
> > Looks interesting. Waht about other buttons/events the PMU can report ?
>
> What else does it report? As far as I know it doesn't have anything
> else, at least on my system.
Depends on the machine... earlier ones have the backlight there or
volume control.. then LID close could be an event too...
> > > +#define BUS_PMU 0x20
> >
> > Magic numbers... BAD BAD BAD ... Somebody needs to LART the input layer
> > people.
>
> It's not really a "magic" number since it doesn't really carry any
> information, or would you like string identifiers in each device
> descriptor? An enum would be just the same, in fact, the definition
> could be rewritten as an enum :)
>
> > Do we need these here ? Why not an initcall self-contained in each
> > file ?
>
> No heh, good point.
>
> Thanks,
> johannes
^ permalink raw reply
* Re: [PATCH] convert powermac ide blink to new led infrastructure
From: Johannes Berg @ 2006-05-01 8:59 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev list
In-Reply-To: <1146466608.30710.36.camel@localhost.localdomain>
[-- Attachment #1: Type: text/plain, Size: 1047 bytes --]
On Mon, 2006-05-01 at 16:56 +1000, Benjamin Herrenschmidt wrote:
> Hrm... you find out the hard way in general ... we'll have to be careful
> with this one, maybe have the PMU driver stop the LED after the device
> suspend dance or do a sysdev with a suspend/resume and make sure that
> any pending LED request is complete. Also, your code isn't testing if
> the PMU request is still in progress before possibly sending another
> one ... that's a bit dangerous (and may cause the LED to get "stuck").
> You can re-use a request once it's complete though. In addition, my PMU
> code was written to minimize spinlock usage in the hot path... There
> might be a way to do the same here.
I don't really understand how the PMU works hence wasn't really sure
what the whole request thing does. Do we get an interrupt when it
completes or something? If so, could I get at that notification via the
request structure?
I'll take a closer look and rework this, I think it's worthwhile even if
it requires some more effort.
johannes
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 793 bytes --]
^ permalink raw reply
* Re: [PATCH] via-pmu: report powerbutton as proper input event
From: Johannes Berg @ 2006-05-01 8:56 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev list
In-Reply-To: <1146466716.30710.39.camel@localhost.localdomain>
[-- Attachment #1: Type: text/plain, Size: 943 bytes --]
On Mon, 2006-05-01 at 16:58 +1000, Benjamin Herrenschmidt wrote:
> On Fri, 2006-04-28 at 21:15 +0200, Johannes Berg wrote:
> > This patch adds an input device for the power button so that userspace gets
> > notified about the user pressing it via the standard input layer.
>
> Looks interesting. Waht about other buttons/events the PMU can report ?
What else does it report? As far as I know it doesn't have anything
else, at least on my system.
> > +#define BUS_PMU 0x20
>
> Magic numbers... BAD BAD BAD ... Somebody needs to LART the input layer
> people.
It's not really a "magic" number since it doesn't really carry any
information, or would you like string identifiers in each device
descriptor? An enum would be just the same, in fact, the definition
could be rewritten as an enum :)
> Do we need these here ? Why not an initcall self-contained in each
> file ?
No heh, good point.
Thanks,
johannes
[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 793 bytes --]
^ permalink raw reply
* Re: [PATCH] powerpc: Use the ibm,pa-features property where available
From: Paul Mackerras @ 2006-05-01 7:53 UTC (permalink / raw)
To: Benjamin Herrenschmidt; +Cc: linuxppc-dev
In-Reply-To: <1146466281.30710.32.camel@localhost.localdomain>
Benjamin Herrenschmidt writes:
> We should also remove all the bits potentially found in the property
> before hand...
That's why I clear the feature bit if the pa-features bit is there and
is zero...
Paul.
^ permalink raw reply
* [PATCH] powerpc: Export flat device tree via debugfs for debugging
From: Michael Ellerman @ 2006-05-01 7:40 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linuxppc-dev
If DEBUG is turned on in prom.c, export the flat device tree via debugfs.
This has been handy on several occasions.
To look at it:
# mount -t debugfs none /sys/kernel/debug
# od -a /sys/kernel/debug/powerpc/flat-device-tree
and/or
# dtc -fI dtb /sys/kernel/debug/powerpc/flat-device-tree -O dts
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
arch/powerpc/kernel/prom.c | 25 +++++++++++++++++++++++++
1 file changed, 25 insertions(+)
Index: to-merge/arch/powerpc/kernel/prom.c
===================================================================
--- to-merge.orig/arch/powerpc/kernel/prom.c
+++ to-merge/arch/powerpc/kernel/prom.c
@@ -30,6 +30,7 @@
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/kexec.h>
+#include <linux/debugfs.h>
#include <asm/prom.h>
#include <asm/rtas.h>
@@ -2009,3 +2010,27 @@ void kdump_move_device_tree(void)
/* XXX should we unreserve the old DT? */
}
#endif /* CONFIG_KEXEC */
+
+#ifdef DEBUG
+static struct debugfs_blob_wrapper flat_dt_blob;
+
+static int __init export_flat_device_tree(void)
+{
+ struct dentry *d;
+
+ d = debugfs_create_dir("powerpc", NULL);
+ if (!d)
+ return 1;
+
+ flat_dt_blob.data = initial_boot_params;
+ flat_dt_blob.size = initial_boot_params->totalsize;
+
+ d = debugfs_create_blob("flat-device-tree", S_IFREG | S_IRUSR,
+ d, &flat_dt_blob);
+ if (!d)
+ return 1;
+
+ return 0;
+}
+__initcall(export_flat_device_tree);
+#endif
^ permalink raw reply
* Re: windfarm for PM72/PM73/RM31
From: Benjamin Herrenschmidt @ 2006-05-01 7:17 UTC (permalink / raw)
To: Robin H. Johnson; +Cc: linuxppc-dev
In-Reply-To: <20060501002719.GD15141@curie-int.vc.shawcable.net>
> 1. Message naming consistancy - there are a number of printk/DBG
> statements that seem to flip randomly between using a prefix of
> 'windfarm:' or 'wf:', and then some that omit it entirely.
> Any consensus on which one is preferred?
Not really... I tended to use "wf" for debug and "windfarm" for normally
visible messages but I may not have been totally consistent.
> 2. Could somebody please send me a tarball of /proc/device-tree/ from a
> RackMac3,1 unit? I've got the data for PM72/73 already.
I'll do so tomorrow, ping me if you don't hear from me.
Ben.
^ permalink raw reply
* Re: windfarm for PM72/PM73/RM31
From: Benjamin Herrenschmidt @ 2006-05-01 7:16 UTC (permalink / raw)
To: Robin H. Johnson; +Cc: linuxppc-dev
In-Reply-To: <20060501064443.GA20481@curie-int.vc.shawcable.net>
On Sun, 2006-04-30 at 23:44 -0700, Robin H. Johnson wrote:
> On Mon, May 01, 2006 at 06:53:09AM +0200, ?tienne Bersac wrote:
> > I own an iMac G5 rev C (iSight) with a PowerMac 12,1 included. Do you
> > intend to make support for it ? I will be happy to help you :)
> I'd like to start on the PM72 stuff first, but please tar up your
> /proc/device-tree/ directory and email it to me, and I can see about it.
>
> If Apple stuck to the same sensors (max6690, lm75. lm87, ds1775, ds1631,
> ad7417), it should be reasonable simple to add your machine afterwards.
>
> Is there actually a reasonable archive of the OF node trees somewhere?
> It think it might be handy in developing drivers in future. I've
> presently got: PowerBook4,3 PowerMac7,2 PowerMac7,3 PowerMac11,2
I'm not sure we can publically post them, there might be some apple
copyright lurking around. I have a fairly big archive here too.
Ben.
^ permalink raw reply
* Re: Linux 2.6 sources for MPC852T processor
From: David Jander @ 2006-05-01 7:14 UTC (permalink / raw)
To: Marcelo Tosatti; +Cc: linuxppc-embedded
In-Reply-To: <20060430165325.GB4631@dmt>
Hi,
I did the following:
$ patch -p1 <tlbfix-mt.patch
(This is the patch you sent me)
$ cg-seek v2.6.16
Warning: uncommitted local changes, trying to bring them along
patching file arch/ppc/8xx_io/Kconfig
patching file arch/ppc/8xx_io/Makefile
The next patch would create the file arch/ppc/8xx_io/cpm_spi.c,
which already exists! Skipping patch.
1 out of 1 hunk ignored -- saving rejects to file
arch/ppc/8xx_io/cpm_spi.c.rej
patching file arch/ppc/Kconfig
Hunk #2 succeeded at 927 (offset -16 lines).
patching file arch/ppc/kernel/head_8xx.S
patching file arch/ppc/platforms/Makefile
Hunk #1 succeeded at 37 (offset -1 lines).
The next patch would create the file arch/ppc/platforms/prtppc.h,
which already exists! Skipping patch.
1 out of 1 hunk ignored -- saving rejects to file
arch/ppc/platforms/prtppc.h.rej
The next patch would create the file arch/ppc/platforms/prtppc_serial.h,
which already exists! Skipping patch.
1 out of 1 hunk ignored -- saving rejects to file
arch/ppc/platforms/prtppc_serial.h.rej
The next patch would create the file arch/ppc/platforms/prtppc_setup.c,
which already exists! Skipping patch.
1 out of 1 hunk ignored -- saving rejects to file
arch/ppc/platforms/prtppc_setup.c.rej
patching file arch/ppc/syslib/m8xx_setup.c
patching file drivers/input/keyboard/Kconfig
patching file drivers/input/keyboard/Makefile
patching file drivers/usb/Kconfig
patching file drivers/usb/host/isp116x.h
patching file drivers/video/Kconfig
patching file drivers/video/Makefile
The next patch would create the file include/asm-ppc/cpm_spi.h,
which already exists! Skipping patch.
1 out of 1 hunk ignored -- saving rejects to file
include/asm-ppc/cpm_spi.h.rej
patching file include/asm-ppc/mpc8xx.h
patching file include/asm-ppc/ppc_sys.h
patching file include/asm-ppc/serial.h
Adding file arch/ppc/8xx_io/cpm_spi.c
Adding file arch/ppc/platforms/prtppc.h
Adding file arch/ppc/platforms/prtppc_serial.h
Adding file arch/ppc/platforms/prtppc_setup.c
Adding file include/asm-ppc/cpm_spi.h
On commit 7705a8792b0fc82fd7d4dd923724606bbfd9fb20
All the abve is BSP stuff and drivers for our board, plus macrelo's patch.
To verify, I did:
$ cg-diff arch/ppc/kernel/head_8xx.S
diff --git a/arch/ppc/kernel/head_8xx.S b/arch/ppc/kernel/head_8xx.S
--- a/arch/ppc/kernel/head_8xx.S
+++ b/arch/ppc/kernel/head_8xx.S
@@ -357,9 +357,7 @@ InstructionTLBMiss:
. = 0x1200
DataStoreTLBMiss:
-#ifdef CONFIG_8xx_CPU6
stw r3, 8(r0)
-#endif
DO_8xx_CPU6(0x3f80, r3)
mtspr SPRN_M_TW, r10 /* Save a couple of working registers */
mfcr r10
@@ -419,9 +417,7 @@ DataStoreTLBMiss:
lwz r11, 0(r0)
mtcr r11
lwz r11, 4(r0)
-#ifdef CONFIG_8xx_CPU6
lwz r3, 8(r0)
-#endif
rfi
/* This is an instruction TLB error on the MPC8xx. This could be due
Looks good.
$ make uImage
Kernel doesn't boot, no output produced :-(
Greetings,
--
David Jander
^ permalink raw reply
* Re: [PATCH] via-pmu: report powerbutton as proper input event
From: Benjamin Herrenschmidt @ 2006-05-01 6:58 UTC (permalink / raw)
To: Johannes Berg; +Cc: linuxppc-dev list, debian-powerpc
In-Reply-To: <1146251713.5019.4.camel@localhost>
On Fri, 2006-04-28 at 21:15 +0200, Johannes Berg wrote:
> This patch adds an input device for the power button so that userspace gets
> notified about the user pressing it via the standard input layer.
Looks interesting. Waht about other buttons/events the PMU can report ?
> +#endif /* __VIA_PMU_EVENT_H */
> --- linux-2.6.orig/include/linux/input.h 2006-04-28 20:00:37.417288704 +0200
> +++ linux-2.6/include/linux/input.h 2006-04-28 20:00:48.157288704 +0200
> @@ -658,6 +658,7 @@ struct input_absinfo {
> #define BUS_I2C 0x18
> #define BUS_HOST 0x19
> #define BUS_GSC 0x1A
> +#define BUS_PMU 0x20
Magic numbers... BAD BAD BAD ... Somebody needs to LART the input layer
people.
> @@ -2915,6 +2921,10 @@ static int __init init_pmu_led(void)
> if (pmu_led_init()) {
> printk(KERN_WARNING "via-pmu: LED failed to init\n");
> }
> +
> + /* only on keylargo can the power button be on the pmu ... */
> + if (pmu_event_init())
> + printk(KERN_WARNING "via-pmu: couldn't add event device");
> }
>
> return 0;
Do we need these here ? Why not an initcall self-contained in each
file ?
Ben.
^ permalink raw reply
* Re: [PATCH] convert powermac ide blink to new led infrastructure
From: Benjamin Herrenschmidt @ 2006-05-01 6:56 UTC (permalink / raw)
To: Johannes Berg; +Cc: linuxppc-dev list, debian-powerpc
In-Reply-To: <1146237326.19164.111.camel@localhost>
On Fri, 2006-04-28 at 17:15 +0200, Johannes Berg wrote:
> [resent slightly different patch that adds a comment over the original
> version]
>
> This patch removes the old pmac ide led blink code and
> adds generic LED subsystem support for the LED.
>
> Ben: I removed the retry code that was present in the original. I guess
> LEDs aren't really too important but if it really is necessary please
> tell me and I'll add it back.
>
> Also, I don't have any suspend handling things. It doesn't appear to be
> necessary, how do I find out?
Hrm... you find out the hard way in general ... we'll have to be careful
with this one, maybe have the PMU driver stop the LED after the device
suspend dance or do a sysdev with a suspend/resume and make sure that
any pending LED request is complete. Also, your code isn't testing if
the PMU request is still in progress before possibly sending another
one ... that's a bit dangerous (and may cause the LED to get "stuck").
You can re-use a request once it's complete though. In addition, my PMU
code was written to minimize spinlock usage in the hot path... There
might be a way to do the same here.
Ben.
^ permalink raw reply
* Re: [PATCH] powerpc: Use the ibm,pa-features property where available
From: Benjamin Herrenschmidt @ 2006-05-01 6:51 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linuxppc-dev
In-Reply-To: <17493.35040.453321.909465@cargo.ozlabs.ibm.com>
On Mon, 2006-05-01 at 14:04 +1000, Paul Mackerras wrote:
> Forthcoming IBM machines will have a "ibm,pa-features" property on CPU
> nodes, that contains bits indicating which optional architecture
> features are implemented by the CPU. This adds code to use the
> property, if present, to update our CPU feature bitmaps.
>
> This is based on a patch by Will Schmidt <will_schmidt@vnet.ibm.com>
We should also remove all the bits potentially found in the property
before hand...
Ben.
^ permalink raw reply
* Re: windfarm for PM72/PM73/RM31
From: Robin H. Johnson @ 2006-05-01 6:44 UTC (permalink / raw)
To: linuxppc-dev
In-Reply-To: <A2D81A03-8CD5-41CD-BD49-3EDB1CDE53FD@laposte.net>
[-- Attachment #1: Type: text/plain, Size: 827 bytes --]
On Mon, May 01, 2006 at 06:53:09AM +0200, ?tienne Bersac wrote:
> I own an iMac G5 rev C (iSight) with a PowerMac 12,1 included. Do you
> intend to make support for it ? I will be happy to help you :)
I'd like to start on the PM72 stuff first, but please tar up your
/proc/device-tree/ directory and email it to me, and I can see about it.
If Apple stuck to the same sensors (max6690, lm75. lm87, ds1775, ds1631,
ad7417), it should be reasonable simple to add your machine afterwards.
Is there actually a reasonable archive of the OF node trees somewhere?
It think it might be handy in developing drivers in future. I've
presently got: PowerBook4,3 PowerMac7,2 PowerMac7,3 PowerMac11,2
--
Robin Hugh Johnson
E-Mail : robbat2@gentoo.org
GnuPG FP : 11AC BA4F 4778 E3F6 E4ED F38E B27B 944E 3488 4E85
[-- Attachment #2: Type: application/pgp-signature, Size: 241 bytes --]
^ permalink raw reply
* Re: windfarm for PM72/PM73/RM31
From: Étienne Bersac @ 2006-05-01 4:53 UTC (permalink / raw)
To: Robin H. Johnson; +Cc: linuxppc-dev
In-Reply-To: <20060501002719.GD15141@curie-int.vc.shawcable.net>
Hello,
I own an iMac G5 rev C (iSight) with a PowerMac 12,1 included. Do you =20=
intend to make support for it ? I will be happy to help you :)
Thank you.
=C9tienne.=
^ permalink raw reply
* [PATCH] powerpc: Use the ibm,pa-features property where available
From: Paul Mackerras @ 2006-05-01 4:04 UTC (permalink / raw)
To: will_schmidt, linuxppc-dev
Forthcoming IBM machines will have a "ibm,pa-features" property on CPU
nodes, that contains bits indicating which optional architecture
features are implemented by the CPU. This adds code to use the
property, if present, to update our CPU feature bitmaps.
This is based on a patch by Will Schmidt <will_schmidt@vnet.ibm.com>
Signed-off-by: Paul Mackerras <paulus@samba.org>
---
At the moment we're looking at all the ibm,pa-features properties and
processing them all... Maybe we should only look at the one for the
boot cpu.
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index 1cb69e8..9a07f97 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -885,6 +885,74 @@ void __init unflatten_device_tree(void)
DBG(" <- unflatten_device_tree()\n");
}
+/*
+ * ibm,pa-features is a per-cpu property that contains a string of
+ * attribute descriptors, each of which has a 2 byte header plus up
+ * to 254 bytes worth of processor attribute bits. First header
+ * byte specifies the number of bytes following the header.
+ * Second header byte is an "attribute-specifier" type, of which
+ * zero is the only currently-defined value.
+ * Implementation: Pass in the byte and bit offset for the feature
+ * that we are interested in. The function will return -1 if the
+ * pa-features property is missing, or a 1/0 to indicate if the feature
+ * is supported/not supported. Note that the bit numbers are
+ * big-endian to match the definition in PAPR.
+ */
+static struct ibm_pa_feature {
+ unsigned long cpu_features; /* CPU_FTR_xxx bit */
+ unsigned int cpu_user_ftrs; /* PPC_FEATURE_xxx bit */
+ unsigned char pabyte; /* byte number in ibm,pa-features */
+ unsigned char pabit; /* bit number (big-endian) */
+ unsigned char invert; /* if 1, pa bit set => clear feature */
+} ibm_pa_features[] __initdata = {
+ {0, PPC_FEATURE_HAS_MMU, 0, 0, 0},
+ {0, PPC_FEATURE_HAS_FPU, 0, 1, 0},
+ {CPU_FTR_SLB, 0, 0, 2, 0},
+ {CPU_FTR_CTRL, 0, 0, 3, 0},
+ {CPU_FTR_NOEXECUTE, 0, 0, 6, 0},
+ {CPU_FTR_NODSISRALIGN, 0, 1, 1, 1},
+ {CPU_FTR_CI_LARGE_PAGE, 0, 1, 2, 0},
+};
+
+static void __init check_cpu_pa_features(unsigned long node)
+{
+ unsigned char *pa_ftrs;
+ unsigned long len, tablelen, i, bit;
+
+ pa_ftrs = of_get_flat_dt_prop(node, "ibm,pa-features", &tablelen);
+ if (pa_ftrs == NULL)
+ return;
+
+ /* find descriptor with type == 0 */
+ for (;;) {
+ if (tablelen < 3)
+ return;
+ len = 2 + pa_ftrs[0];
+ if (tablelen < len)
+ return; /* descriptor 0 not found */
+ if (pa_ftrs[1] == 0)
+ break;
+ tablelen -= len;
+ pa_ftrs += len;
+ }
+
+ /* loop over bits we know about */
+ for (i = 0; i < ARRAY_SIZE(ibm_pa_features); ++i) {
+ struct ibm_pa_feature *fp = &ibm_pa_features[i];
+
+ if (fp->pabyte >= pa_ftrs[0])
+ continue;
+ bit = (pa_ftrs[2 + fp->pabyte] >> (7 - fp->pabit)) & 1;
+ if (bit ^ fp->invert) {
+ cur_cpu_spec->cpu_features |= fp->cpu_features;
+ cur_cpu_spec->cpu_user_features |= fp->cpu_user_ftrs;
+ } else {
+ cur_cpu_spec->cpu_features &= ~fp->cpu_features;
+ cur_cpu_spec->cpu_user_features &= ~fp->cpu_user_ftrs;
+ }
+ }
+}
+
static int __init early_init_dt_scan_cpus(unsigned long node,
const char *uname, int depth,
void *data)
@@ -968,6 +1036,8 @@ #ifdef CONFIG_ALTIVEC
cur_cpu_spec->cpu_user_features |= PPC_FEATURE_HAS_ALTIVEC;
}
#endif /* CONFIG_ALTIVEC */
+
+ check_cpu_pa_features(node);
#ifdef CONFIG_PPC_PSERIES
if (nthreads > 1)
^ permalink raw reply related
* Re: sign extension for 32bit syscalls on ppc64
From: Stephen Rothwell @ 2006-05-01 3:44 UTC (permalink / raw)
To: schwab; +Cc: linuxppc-dev, paulus
In-Reply-To: <20060501100518.22aedb20.sfr@canb.auug.org.au>
[-- Attachment #1: Type: text/plain, Size: 666 bytes --]
On Mon, 1 May 2006 10:05:18 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
>
> Try mkdirat. openat has a compat wrapper that has the dfd paramater
> declared as a unsigned int and passes it to sys_openat, whose first
> paramter is decalred to be int, so the sign extension gets done.
OK, I actually tried this and it works! :-(
I traced the code path and it turns out that the place we check for the
-100 (in do_path_lookup), the compiler has used a cmpwi instruction and so
ignores the top 32 bits. Thus we get away with the ABI abuse!
--
Cheers,
Stephen Rothwell sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/
[-- Attachment #2: Type: application/pgp-signature, Size: 189 bytes --]
^ permalink raw reply
* [RFC/PATCH 4/4] powerpc: Convert DBG to pr_debug for the rest of arch/powerpc
From: Michael Ellerman @ 2006-05-01 0:53 UTC (permalink / raw)
To: Paul Mackerras; +Cc: linuxppc-dev
In-Reply-To: <1146444821.660434.518701003988.qpush@concordia>
Convert DBG to pr_debug for the rest of arch/powerpc.
Signed-off-by: Michael Ellerman <michael@ellerman.id.au>
---
arch/powerpc/mm/hash_native_64.c | 22 ++++++------------
arch/powerpc/mm/init_64.c | 8 ------
arch/powerpc/mm/lmb.c | 29 +++++++++---------------
arch/powerpc/mm/slb.c | 12 ++--------
arch/powerpc/sysdev/dart_iommu.c | 10 ++++----
arch/powerpc/sysdev/mpic.c | 46 ++++++++++++++++++---------------------
6 files changed, 50 insertions(+), 77 deletions(-)
Index: to-merge/arch/powerpc/mm/hash_native_64.c
===================================================================
--- to-merge.orig/arch/powerpc/mm/hash_native_64.c
+++ to-merge/arch/powerpc/mm/hash_native_64.c
@@ -10,7 +10,7 @@
* 2 of the License, or (at your option) any later version.
*/
-#undef DEBUG_LOW
+#undef DEBUG /* warning: leads to _lots_ of output */
#include <linux/spinlock.h>
#include <linux/bitops.h>
@@ -27,12 +27,6 @@
#include <asm/cputable.h>
#include <asm/udbg.h>
-#ifdef DEBUG_LOW
-#define DBG_LOW(fmt...) udbg_printf(fmt)
-#else
-#define DBG_LOW(fmt...)
-#endif
-
#define HPTE_LOCK_BIT 3
static DEFINE_SPINLOCK(native_tlbie_lock);
@@ -132,7 +126,7 @@ long native_hpte_insert(unsigned long hp
int i;
if (!(vflags & HPTE_V_BOLTED)) {
- DBG_LOW(" insert(group=%lx, va=%016lx, pa=%016lx,"
+ pr_debug(" insert(group=%lx, va=%016lx, pa=%016lx,"
" rflags=%lx, vflags=%lx, psize=%d)\n",
hpte_group, va, pa, rflags, vflags, psize);
}
@@ -156,7 +150,7 @@ long native_hpte_insert(unsigned long hp
hpte_r = hpte_encode_r(pa, psize) | rflags;
if (!(vflags & HPTE_V_BOLTED)) {
- DBG_LOW(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
+ pr_debug(" i=%x hpte_v=%016lx, hpte_r=%016lx\n",
i, hpte_v, hpte_r);
}
@@ -181,7 +175,7 @@ static long native_hpte_remove(unsigned
int slot_offset;
unsigned long hpte_v;
- DBG_LOW(" remove(group=%lx)\n", hpte_group);
+ pr_debug(" remove(group=%lx)\n", hpte_group);
/* pick a random entry to start at */
slot_offset = mftb() & 0x7;
@@ -222,7 +216,7 @@ static long native_hpte_updatepp(unsigne
want_v = hpte_encode_v(va, psize);
- DBG_LOW(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
+ pr_debug(" update(va=%016lx, avpnv=%016lx, hash=%016lx, newpp=%x)",
va, want_v & HPTE_V_AVPN, slot, newpp);
native_lock_hpte(hptep);
@@ -231,11 +225,11 @@ static long native_hpte_updatepp(unsigne
/* Even if we miss, we need to invalidate the TLB */
if (!HPTE_V_COMPARE(hpte_v, want_v) || !(hpte_v & HPTE_V_VALID)) {
- DBG_LOW(" -> miss\n");
+ pr_debug(" -> miss\n");
native_unlock_hpte(hptep);
ret = -1;
} else {
- DBG_LOW(" -> hit\n");
+ pr_debug(" -> hit\n");
/* Update the HPTE */
hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) |
(newpp & (HPTE_R_PP | HPTE_R_N));
@@ -321,7 +315,7 @@ static void native_hpte_invalidate(unsig
local_irq_save(flags);
- DBG_LOW(" invalidate(va=%016lx, hash: %x)\n", va, slot);
+ pr_debug(" invalidate(va=%016lx, hash: %x)\n", va, slot);
want_v = hpte_encode_v(va, psize);
native_lock_hpte(hptep);
Index: to-merge/arch/powerpc/mm/init_64.c
===================================================================
--- to-merge.orig/arch/powerpc/mm/init_64.c
+++ to-merge/arch/powerpc/mm/init_64.c
@@ -67,12 +67,6 @@
#include "mmu_decl.h"
-#ifdef DEBUG
-#define DBG(fmt...) printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
#if PGTABLE_RANGE > USER_VSID_RANGE
#warning Limited user VSID range means pagetable space is wasted
#endif
@@ -172,7 +166,7 @@ void pgtable_cache_init(void)
int size = pgtable_cache_size[i];
const char *name = pgtable_cache_name[i];
- DBG("Allocating page table cache %s (#%d) "
+ pr_debug("Allocating page table cache %s (#%d) "
"for size: %08x...\n", name, i, size);
pgtable_cache[i] = kmem_cache_create(name,
size, size,
Index: to-merge/arch/powerpc/mm/lmb.c
===================================================================
--- to-merge.orig/arch/powerpc/mm/lmb.c
+++ to-merge/arch/powerpc/mm/lmb.c
@@ -10,6 +10,8 @@
* 2 of the License, or (at your option) any later version.
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <linux/kernel.h>
#include <linux/init.h>
@@ -22,15 +24,6 @@
#include "mmu_decl.h" /* for __max_low_memory */
#endif
-#undef DEBUG
-
-#ifdef DEBUG
-#include <asm/udbg.h>
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
#define LMB_ALLOC_ANYWHERE 0
struct lmb lmb;
@@ -40,22 +33,22 @@ void lmb_dump_all(void)
#ifdef DEBUG
unsigned long i;
- DBG("lmb_dump_all:\n");
- DBG(" memory.cnt = 0x%lx\n", lmb.memory.cnt);
- DBG(" memory.size = 0x%lx\n", lmb.memory.size);
+ pr_debug("lmb_dump_all:\n");
+ pr_debug(" memory.cnt = 0x%lx\n", lmb.memory.cnt);
+ pr_debug(" memory.size = 0x%lx\n", lmb.memory.size);
for (i=0; i < lmb.memory.cnt ;i++) {
- DBG(" memory.region[0x%x].base = 0x%lx\n",
+ pr_debug(" memory.region[0x%x].base = 0x%lx\n",
i, lmb.memory.region[i].base);
- DBG(" .size = 0x%lx\n",
+ pr_debug(" .size = 0x%lx\n",
lmb.memory.region[i].size);
}
- DBG("\n reserved.cnt = 0x%lx\n", lmb.reserved.cnt);
- DBG(" reserved.size = 0x%lx\n", lmb.reserved.size);
+ pr_debug("\n reserved.cnt = 0x%lx\n", lmb.reserved.cnt);
+ pr_debug(" reserved.size = 0x%lx\n", lmb.reserved.size);
for (i=0; i < lmb.reserved.cnt ;i++) {
- DBG(" reserved.region[0x%x].base = 0x%lx\n",
+ pr_debug(" reserved.region[0x%x].base = 0x%lx\n",
i, lmb.reserved.region[i].base);
- DBG(" .size = 0x%lx\n",
+ pr_debug(" .size = 0x%lx\n",
lmb.reserved.region[i].size);
}
#endif /* DEBUG */
Index: to-merge/arch/powerpc/mm/slb.c
===================================================================
--- to-merge.orig/arch/powerpc/mm/slb.c
+++ to-merge/arch/powerpc/mm/slb.c
@@ -24,12 +24,6 @@
#include <asm/cputable.h>
#include <asm/cacheflush.h>
-#ifdef DEBUG
-#define DBG(fmt...) udbg_printf(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
extern void slb_allocate_realmode(unsigned long ea);
extern void slb_allocate_user(unsigned long ea);
@@ -191,12 +185,12 @@ void slb_initialize(void)
patch_slb_encoding(slb_miss_user_load_normal,
SLB_VSID_USER | virtual_llp);
- DBG("SLB: linear LLP = %04x\n", linear_llp);
- DBG("SLB: virtual LLP = %04x\n", virtual_llp);
+ pr_debug("SLB: linear LLP = %04x\n", linear_llp);
+ pr_debug("SLB: virtual LLP = %04x\n", virtual_llp);
#ifdef CONFIG_HUGETLB_PAGE
patch_slb_encoding(slb_miss_user_load_huge,
SLB_VSID_USER | huge_llp);
- DBG("SLB: huge LLP = %04x\n", huge_llp);
+ pr_debug("SLB: huge LLP = %04x\n", huge_llp);
#endif
}
Index: to-merge/arch/powerpc/sysdev/dart_iommu.c
===================================================================
--- to-merge.orig/arch/powerpc/sysdev/dart_iommu.c
+++ to-merge/arch/powerpc/sysdev/dart_iommu.c
@@ -27,6 +27,8 @@
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
+#undef DEBUG
+
#include <linux/config.h>
#include <linux/init.h>
#include <linux/types.h>
@@ -70,15 +72,13 @@ static int iommu_table_dart_inited;
static int dart_dirty;
static int dart_is_u4;
-#define DBG(...)
-
static inline void dart_tlb_invalidate_all(void)
{
unsigned long l = 0;
unsigned int reg, inv_bit;
unsigned long limit;
- DBG("dart: flush\n");
+ pr_debug("dart: flush\n");
/* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the
* control register and wait for it to clear.
@@ -125,7 +125,7 @@ static void dart_build(struct iommu_tabl
unsigned int *dp;
unsigned int rpn;
- DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
+ pr_debug("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr);
index <<= DART_PAGE_FACTOR;
npages <<= DART_PAGE_FACTOR;
@@ -156,7 +156,7 @@ static void dart_free(struct iommu_table
* bad DMAs, but then no 32-bit architecture ever does either.
*/
- DBG("dart: free at: %lx, %lx\n", index, npages);
+ pr_debug("dart: free at: %lx, %lx\n", index, npages);
index <<= DART_PAGE_FACTOR;
npages <<= DART_PAGE_FACTOR;
Index: to-merge/arch/powerpc/sysdev/mpic.c
===================================================================
--- to-merge.orig/arch/powerpc/sysdev/mpic.c
+++ to-merge/arch/powerpc/sysdev/mpic.c
@@ -37,12 +37,6 @@
#include <asm/mpic.h>
#include <asm/smp.h>
-#ifdef DEBUG
-#define DBG(fmt...) printk(fmt)
-#else
-#define DBG(fmt...)
-#endif
-
static struct mpic *mpics;
static struct mpic *mpic_primary;
static DEFINE_SPINLOCK(mpic_lock);
@@ -205,7 +199,7 @@ static void mpic_startup_ht_interrupt(st
if (fixup->base == NULL)
return;
- DBG("startup_ht_interrupt(%u, %u) index: %d\n",
+ pr_debug("startup_ht_interrupt(%u, %u) index: %d\n",
source, irqflags, fixup->index);
spin_lock_irqsave(&mpic->fixup_lock, flags);
/* Enable and configure */
@@ -228,7 +222,7 @@ static void mpic_shutdown_ht_interrupt(s
if (fixup->base == NULL)
return;
- DBG("shutdown_ht_interrupt(%u, %u)\n", source, irqflags);
+ pr_debug("shutdown_ht_interrupt(%u, %u)\n", source, irqflags);
/* Disable */
spin_lock_irqsave(&mpic->fixup_lock, flags);
@@ -271,7 +265,8 @@ static void __init mpic_scan_ht_pic(stru
writeb(0x10 + 2 * i, base + 2);
tmp = readl(base + 4);
irq = (tmp >> 16) & 0xff;
- DBG("HT PIC index 0x%x, irq 0x%x, tmp: %08x\n", i, irq, tmp);
+ pr_debug("HT PIC index 0x%x, irq 0x%x, tmp: %08x\n",
+ i, irq, tmp);
/* mask it , will be unmasked later */
tmp |= 0x1;
writel(tmp, base + 4);
@@ -318,7 +313,7 @@ static void __init mpic_scan_ht_pics(str
u32 l = readl(devbase + PCI_VENDOR_ID);
u16 s;
- DBG("devfn %x, l: %x\n", devfn, l);
+ pr_debug("devfn %x, l: %x\n", devfn, l);
/* If no device, skip */
if (l == 0xffffffff || l == 0x00000000 ||
@@ -417,7 +412,8 @@ static void mpic_enable_irq(unsigned int
struct mpic *mpic = mpic_from_irq(irq);
unsigned int src = irq - mpic->irq_offset;
- DBG("%p: %s: enable_irq: %d (src %d)\n", mpic, mpic->name, irq, src);
+ pr_debug("%p: %s: enable_irq: %d (src %d)\n",
+ mpic, mpic->name, irq, src);
mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) &
@@ -464,7 +460,7 @@ static void mpic_disable_irq(unsigned in
struct mpic *mpic = mpic_from_irq(irq);
unsigned int src = irq - mpic->irq_offset;
- DBG("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
+ pr_debug("%s: disable_irq: %d (src %d)\n", mpic->name, irq, src);
mpic_irq_write(src, MPIC_IRQ_VECTOR_PRI,
mpic_irq_read(src, MPIC_IRQ_VECTOR_PRI) |
@@ -498,7 +494,7 @@ static void mpic_end_irq(unsigned int ir
struct mpic *mpic = mpic_from_irq(irq);
#ifdef DEBUG_IRQ
- DBG("%s: end_irq: %d\n", mpic->name, irq);
+ pr_debug("%s: end_irq: %d\n", mpic->name, irq);
#endif
/* We always EOI on end_irq() even for edge interrupts since that
* should only lower the priority, the MPIC should have properly
@@ -524,7 +520,7 @@ static void mpic_enable_ipi(unsigned int
struct mpic *mpic = mpic_from_ipi(irq);
unsigned int src = irq - mpic->ipi_offset;
- DBG("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src);
+ pr_debug("%s: enable_ipi: %d (ipi %d)\n", mpic->name, irq, src);
mpic_ipi_write(src, mpic_ipi_read(src) & ~MPIC_VECPRI_MASK);
}
@@ -762,7 +758,7 @@ void __init mpic_init(struct mpic *mpic)
#ifdef CONFIG_MPIC_BROKEN_U3
/* Do the HT PIC fixups on U3 broken mpic */
- DBG("MPIC flags: %x\n", mpic->flags);
+ pr_debug("MPIC flags: %x\n", mpic->flags);
if ((mpic->flags & MPIC_BROKEN_U3) && (mpic->flags & MPIC_PRIMARY))
mpic_scan_ht_pics(mpic);
#endif /* CONFIG_MPIC_BROKEN_U3 */
@@ -802,8 +798,8 @@ void __init mpic_init(struct mpic *mpic)
#endif
}
- DBG("setup source %d, vecpri: %08x, level: %d\n", i, vecpri,
- (level != 0));
+ pr_debug("setup source %d, vecpri: %08x, level: %d\n",
+ i, vecpri, (level != 0));
/* init hw */
mpic_irq_write(i, MPIC_IRQ_VECTOR_PRI, vecpri);
@@ -879,7 +875,8 @@ void mpic_setup_this_cpu(void)
BUG_ON(mpic == NULL);
- DBG("%s: setup_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+ pr_debug("%s: setup_this_cpu(%d)\n",
+ mpic->name, hard_smp_processor_id());
spin_lock_irqsave(&mpic_lock, flags);
@@ -930,7 +927,8 @@ void mpic_teardown_this_cpu(int secondar
BUG_ON(mpic == NULL);
- DBG("%s: teardown_this_cpu(%d)\n", mpic->name, hard_smp_processor_id());
+ pr_debug("%s: teardown_this_cpu(%d)\n",
+ mpic->name, hard_smp_processor_id());
spin_lock_irqsave(&mpic_lock, flags);
/* let the mpic know we don't want intrs. */
@@ -952,7 +950,7 @@ void mpic_send_ipi(unsigned int ipi_no,
BUG_ON(mpic == NULL);
#ifdef DEBUG_IPI
- DBG("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no);
+ pr_debug("%s: send_ipi(ipi_no: %d)\n", mpic->name, ipi_no);
#endif
mpic_cpu_write(MPIC_CPU_IPI_DISPATCH_0 + ipi_no * 0x10,
@@ -965,11 +963,11 @@ int mpic_get_one_irq(struct mpic *mpic,
irq = mpic_cpu_read(MPIC_CPU_INTACK) & MPIC_VECPRI_VECTOR_MASK;
#ifdef DEBUG_LOW
- DBG("%s: get_one_irq(): %d\n", mpic->name, irq);
+ pr_debug("%s: get_one_irq(): %d\n", mpic->name, irq);
#endif
if (mpic->cascade && irq == mpic->cascade_vec) {
#ifdef DEBUG_LOW
- DBG("%s: cascading ...\n", mpic->name);
+ pr_debug("%s: cascading ...\n", mpic->name);
#endif
irq = mpic->cascade(regs, mpic->cascade_data);
mpic_eoi(mpic);
@@ -979,12 +977,12 @@ int mpic_get_one_irq(struct mpic *mpic,
return -1;
if (irq < MPIC_VEC_IPI_0) {
#ifdef DEBUG_IRQ
- DBG("%s: irq %d\n", mpic->name, irq + mpic->irq_offset);
+ pr_debug("%s: irq %d\n", mpic->name, irq + mpic->irq_offset);
#endif
return irq + mpic->irq_offset;
}
#ifdef DEBUG_IPI
- DBG("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
+ pr_debug("%s: ipi %d !\n", mpic->name, irq - MPIC_VEC_IPI_0);
#endif
return irq - MPIC_VEC_IPI_0 + mpic->ipi_offset;
}
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox