LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [RFC 1/2] powerpc: add all the iSeries virtual devices to the device tree
From: Stephen Rothwell @ 2006-04-25 11:54 UTC (permalink / raw)
  To: ppc-dev

We do this by putting them in the flattened device tree at setup time.
This required the flattened device tree blob to be made bigger.

Currently we don't do anything with these.

Also make a function static.

Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
---

 arch/powerpc/platforms/iseries/setup.c |   87 ++++++++++++++++++++++++++++++++
 arch/powerpc/platforms/iseries/vio.c   |    2 -
 2 files changed, 87 insertions(+), 2 deletions(-)

-- 
Cheers,
Stephen Rothwell                    sfr@canb.auug.org.au
http://www.canb.auug.org.au/~sfr/

676ab187518bfb5e48aa4c3cc418456e67a2ed70
diff --git a/arch/powerpc/platforms/iseries/setup.c b/arch/powerpc/platforms/iseries/setup.c
index 3c51448..9ce2afc 100644
--- a/arch/powerpc/platforms/iseries/setup.c
+++ b/arch/powerpc/platforms/iseries/setup.c
@@ -45,6 +45,7 @@ #include <asm/paca.h>
 #include <asm/cache.h>
 #include <asm/sections.h>
 #include <asm/abs_addr.h>
+#include <asm/iseries/hv_types.h>
 #include <asm/iseries/hv_lp_config.h>
 #include <asm/iseries/hv_call_event.h>
 #include <asm/iseries/hv_call_xm.h>
@@ -710,7 +711,7 @@ define_machine(iseries) {
 };
 
 struct blob {
-	unsigned char data[PAGE_SIZE];
+	unsigned char data[PAGE_SIZE * 2];
 	unsigned long next;
 };
 
@@ -911,6 +912,88 @@ void dt_model(struct iseries_flat_dt *dt
 	dt_prop_str(dt, "compatible", "IBM,iSeries");
 }
 
+void dt_vdevices(struct iseries_flat_dt *dt)
+{
+	u32 reg = 0;
+	HvLpIndexMap vlan_map;
+	int i;
+	char buf[32];
+
+	dt_start_node(dt, "vdevice");
+	dt_prop_u32(dt, "#address-cells", 1);
+	dt_prop_u32(dt, "#size-cells", 0);
+
+	snprintf(buf, sizeof(buf), "viocons@%08x", reg);
+	dt_start_node(dt, buf);
+	dt_prop_str(dt, "device_type", "serial");
+	dt_prop_empty(dt, "compatible");
+	dt_prop_u32(dt, "reg", reg);
+	dt_end_node(dt);
+	reg++;
+
+	snprintf(buf, sizeof(buf), "v-scsi@%08x", reg);
+	dt_start_node(dt, buf);
+	dt_prop_str(dt, "device_type", "vscsi");
+	dt_prop_str(dt, "compatible", "IBM,v-scsi");
+	dt_prop_u32(dt, "reg", reg);
+	dt_end_node(dt);
+	reg++;
+
+	vlan_map = HvLpConfig_getVirtualLanIndexMap();
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALLANS; i++) {
+		unsigned char mac_addr[6];
+
+		if ((vlan_map & (0x8000 >> i)) == 0)
+			continue;
+		snprintf(buf, 32, "vlan@%08x", reg + i);
+		dt_start_node(dt, buf);
+		dt_prop_str(dt, "device_type", "vlan");
+		dt_prop_empty(dt, "compatible");
+		dt_prop_u32(dt, "reg", reg + i);
+
+		mac_addr[0] = 0x02;
+		mac_addr[1] = 0x01;
+		mac_addr[2] = 0xff;
+		mac_addr[3] = i;
+		mac_addr[4] = 0xff;
+		mac_addr[5] = HvLpConfig_getLpIndex_outline();
+		dt_prop(dt, "local-mac-address", (char *)mac_addr, 6);
+		dt_prop(dt, "mac-address", (char *)mac_addr, 6);
+
+		dt_end_node(dt);
+	}
+	reg += HVMAXARCHITECTEDVIRTUALLANS;
+
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALDISKS; i++) {
+		snprintf(buf, 32, "viodasd@%08x", reg + i);
+		dt_start_node(dt, buf);
+		dt_prop_str(dt, "device_type", "viodasd");
+		dt_prop_empty(dt, "compatible");
+		dt_prop_u32(dt, "reg", reg + i);
+		dt_end_node(dt);
+	}
+	reg += HVMAXARCHITECTEDVIRTUALDISKS;
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALCDROMS; i++) {
+		snprintf(buf, 32, "viocd@%08x", reg + i);
+		dt_start_node(dt, buf);
+		dt_prop_str(dt, "device_type", "viocd");
+		dt_prop_empty(dt, "compatible");
+		dt_prop_u32(dt, "reg", reg + i);
+		dt_end_node(dt);
+	}
+	reg += HVMAXARCHITECTEDVIRTUALCDROMS;
+	for (i = 0; i < HVMAXARCHITECTEDVIRTUALTAPES; i++) {
+		snprintf(buf, 32, "viotape@%08x", reg + i);
+		dt_start_node(dt, buf);
+		dt_prop_str(dt, "device_type", "viotape");
+		dt_prop_empty(dt, "compatible");
+		dt_prop_u32(dt, "reg", reg + i);
+		dt_end_node(dt);
+	}
+
+	dt_end_node(dt);
+}
+
 void build_flat_dt(struct iseries_flat_dt *dt, unsigned long phys_mem_size)
 {
 	u64 tmp[2];
@@ -941,6 +1024,8 @@ void build_flat_dt(struct iseries_flat_d
 
 	dt_cpus(dt);
 
+	dt_vdevices(dt);
+
 	dt_end_node(dt);
 
 	dt_push_u32(dt, OF_DT_END);
diff --git a/arch/powerpc/platforms/iseries/vio.c b/arch/powerpc/platforms/iseries/vio.c
index ad36ab0..22045a2 100644
--- a/arch/powerpc/platforms/iseries/vio.c
+++ b/arch/powerpc/platforms/iseries/vio.c
@@ -71,7 +71,7 @@ static struct vio_dev *__init vio_regist
 	return viodev;
 }
 
-void __init probe_bus_iseries(void)
+static void __init probe_bus_iseries(void)
 {
 	HvLpIndexMap vlan_map;
 	struct vio_dev *viodev;
-- 
1.3.1

^ permalink raw reply related

* [PATCH] fix cpm_uart driver for PQ1...
From: David Jander @ 2006-04-25  9:55 UTC (permalink / raw)
  To: linuxppc-embedded

[-- Attachment #1: Type: text/plain, Size: 473 bytes --]


Hi,

This patch fixes the following three problems:

1. Memory mapping virtual<-->dma is broken in the case that 
CONFIG_CONSISTENT_START > CPM_ADDR.

2. SCC uart sends a break sequence each time it is stopped with the 
CPM_CR_STOP_TX command. That means that each time an application closes the 
serial device, a break is transmitted.

3. Implemented default BRG routing for PQ1 (in the same sense as it is done 
for PQ2).

Signed-off-by: David Jander

Regards,

David.


[-- Attachment #2: cpm_uart_patch.diff --]
[-- Type: text/x-diff, Size: 6578 bytes --]

diff --git a/drivers/serial/cpm_uart/cpm_uart_cpm1.c b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
--- a/drivers/serial/cpm_uart/cpm_uart_cpm1.c
+++ b/drivers/serial/cpm_uart/cpm_uart_cpm1.c
@@ -138,25 +138,33 @@ void smc2_lineif(struct uart_cpm_port *p
 
 void scc1_lineif(struct uart_cpm_port *pinfo)
 {
+	volatile cpm8xx_t *cp = cpmp;
 	/* XXX SCC1: insert port configuration here */
+	cp->cp_sicr = (cp->cp_sicr & ~0x0000003f) | 0x00000000; /* Route BRG1 to SCC1 */
 	pinfo->brg = 1;
 }
 
 void scc2_lineif(struct uart_cpm_port *pinfo)
 {
+	volatile cpm8xx_t *cp = cpmp;
 	/* XXX SCC2: insert port configuration here */
+	cp->cp_sicr = (cp->cp_sicr & ~0x00003f00) | 0x00000900; /* Route BRG2 to SCC2 */
 	pinfo->brg = 2;
 }
 
 void scc3_lineif(struct uart_cpm_port *pinfo)
 {
+	volatile cpm8xx_t *cp = cpmp;
 	/* XXX SCC3: insert port configuration here */
+	cp->cp_sicr = (cp->cp_sicr & ~0x003f0000) | 0x00120000; /* Route BRG3 to SCC3 */
 	pinfo->brg = 3;
 }
 
 void scc4_lineif(struct uart_cpm_port *pinfo)
 {
+	volatile cpm8xx_t *cp = cpmp;
 	/* XXX SCC4: insert port configuration here */
+	cp->cp_sicr = (cp->cp_sicr & ~0x3f000000) | 0x1b000000; /* Route BRG4 to SCC4 */
 	pinfo->brg = 4;
 }
 
@@ -191,11 +199,11 @@ int cpm_uart_allocbuf(struct uart_cpm_po
 		/* was hostalloc but changed cause it blows away the */
 		/* large tlb mapping when pinning the kernel area    */
 		mem_addr = (u8 *) cpm_dpram_addr(cpm_dpalloc(memsz, 8));
-		dma_addr = 0;
+		dma_addr = (dma_addr_t)mem_addr;
 	} else
 		mem_addr = dma_alloc_coherent(NULL, memsz, &dma_addr,
 					      GFP_KERNEL);
-
+	
 	if (mem_addr == NULL) {
 		cpm_dpfree(dp_offset);
 		printk(KERN_ERR
@@ -206,6 +214,7 @@ int cpm_uart_allocbuf(struct uart_cpm_po
 	pinfo->dp_addr = dp_offset;
 	pinfo->mem_addr = mem_addr;
 	pinfo->dma_addr = dma_addr;
+	pinfo->dma_offset = (dma_addr_t)((unsigned long)dma_addr - (unsigned long)mem_addr);
 
 	pinfo->rx_buf = mem_addr;
 	pinfo->tx_buf = pinfo->rx_buf + L1_CACHE_ALIGN(pinfo->rx_nrfifos
diff --git a/drivers/serial/cpm_uart/cpm_uart_core.c b/drivers/serial/cpm_uart/cpm_uart_core.c
--- a/drivers/serial/cpm_uart/cpm_uart_core.c
+++ b/drivers/serial/cpm_uart/cpm_uart_core.c
@@ -71,17 +71,19 @@ static void cpm_uart_initbd(struct uart_
 
 /**************************************************************/
 
-static inline unsigned long cpu2cpm_addr(void *addr)
+static inline unsigned long cpu2cpm_addr(void *addr, unsigned long offset)
 {
-	if ((unsigned long)addr >= CPM_ADDR)
-		return (unsigned long)addr;
+	if (((unsigned long)addr >= CPM_ADDR) 
+		|| ((unsigned long)addr >= CONFIG_CONSISTENT_START))
+		return (unsigned long)addr + offset;
 	return virt_to_bus(addr);
 }
 
-static inline void *cpm2cpu_addr(unsigned long addr)
+static inline void *cpm2cpu_addr(unsigned long addr, unsigned long offset)
 {
-	if (addr >= CPM_ADDR)
-		return (void *)addr;
+	if (((unsigned long)(addr - offset) >= CPM_ADDR) 
+		|| ((unsigned long)(addr - offset) >= CONFIG_CONSISTENT_START))
+		return (void *)(addr - offset);
 	return bus_to_virt(addr);
 }
 
@@ -260,7 +262,7 @@ static void cpm_uart_int_rx(struct uart_
 		}
 
 		/* get pointer */
-		cp = cpm2cpu_addr(bdp->cbd_bufaddr);
+		cp = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo->dma_offset);
 
 		/* loop through the buffer */
 		while (i-- > 0) {
@@ -441,7 +443,8 @@ static void cpm_uart_shutdown(struct uar
 		}
 
 		/* Shut them really down */
-		cpm_line_cr_cmd(line, CPM_CR_STOP_TX);
+		if (IS_SMC(pinfo)) cpm_line_cr_cmd(line, CPM_CR_STOP_TX);
+		else cpm_line_cr_cmd(line, CPM_CR_STOP_TX+1); /* Do graceful stop of SCC uart */
 	}
 }
 
@@ -603,7 +606,7 @@ static int cpm_uart_tx_pump(struct uart_
 		/* Pick next descriptor and fill from buffer */
 		bdp = pinfo->tx_cur;
 
-		p = cpm2cpu_addr(bdp->cbd_bufaddr);
+		p = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo->dma_offset);
 
 		*p++ = xmit->buf[xmit->tail];
 		bdp->cbd_datlen = 1;
@@ -630,7 +633,7 @@ static int cpm_uart_tx_pump(struct uart_
 
 	while (!(bdp->cbd_sc & BD_SC_READY) && (xmit->tail != xmit->head)) {
 		count = 0;
-		p = cpm2cpu_addr(bdp->cbd_bufaddr);
+		p = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo->dma_offset);
 		while (count < pinfo->tx_fifosize) {
 			*p++ = xmit->buf[xmit->tail];
 			xmit->tail = (xmit->tail + 1) & (UART_XMIT_SIZE - 1);
@@ -668,6 +671,7 @@ static void cpm_uart_initbd(struct uart_
 {
 	int i;
 	u8 *mem_addr;
+	unsigned long dma_offset;
 	volatile cbd_t *bdp;
 
 	pr_debug("CPM uart[%d]:initbd\n", pinfo->port.line);
@@ -677,14 +681,15 @@ static void cpm_uart_initbd(struct uart_
 	 * virtual address for us to work with.
 	 */
 	mem_addr = pinfo->mem_addr;
+	dma_offset = pinfo->dma_offset;
 	bdp = pinfo->rx_cur = pinfo->rx_bd_base;
 	for (i = 0; i < (pinfo->rx_nrfifos - 1); i++, bdp++) {
-		bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr);
+		bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr, dma_offset);
 		bdp->cbd_sc = BD_SC_EMPTY | BD_SC_INTRPT;
 		mem_addr += pinfo->rx_fifosize;
 	}
 
-	bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr);
+	bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr, dma_offset);
 	bdp->cbd_sc = BD_SC_WRAP | BD_SC_EMPTY | BD_SC_INTRPT;
 
 	/* Set the physical address of the host memory
@@ -694,12 +699,12 @@ static void cpm_uart_initbd(struct uart_
 	mem_addr = pinfo->mem_addr + L1_CACHE_ALIGN(pinfo->rx_nrfifos * pinfo->rx_fifosize);
 	bdp = pinfo->tx_cur = pinfo->tx_bd_base;
 	for (i = 0; i < (pinfo->tx_nrfifos - 1); i++, bdp++) {
-		bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr);
+		bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr, dma_offset);
 		bdp->cbd_sc = BD_SC_INTRPT;
 		mem_addr += pinfo->tx_fifosize;
 	}
 
-	bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr);
+	bdp->cbd_bufaddr = cpu2cpm_addr(mem_addr, dma_offset);
 	bdp->cbd_sc = BD_SC_WRAP | BD_SC_INTRPT;
 }
 
@@ -1029,7 +1034,7 @@ static void cpm_uart_console_write(struc
 		 * If the buffer address is in the CPM DPRAM, don't
 		 * convert it.
 		 */
-		cp = cpm2cpu_addr(bdp->cbd_bufaddr);
+		cp = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo->dma_offset);
 
 		*cp = *s;
 
@@ -1046,7 +1051,7 @@ static void cpm_uart_console_write(struc
 			while ((bdp->cbd_sc & BD_SC_READY) != 0)
 				;
 
-			cp = cpm2cpu_addr(bdp->cbd_bufaddr);
+			cp = cpm2cpu_addr(bdp->cbd_bufaddr, pinfo->dma_offset);
 
 			*cp = 13;
 			bdp->cbd_datlen = 1;
diff --git a/drivers/serial/cpm_uart/cpm_uart.h b/drivers/serial/cpm_uart/cpm_uart.h
--- a/drivers/serial/cpm_uart/cpm_uart.h
+++ b/drivers/serial/cpm_uart/cpm_uart.h
@@ -64,6 +64,7 @@ struct uart_cpm_port {
 	uint			 dp_addr;
 	void			*mem_addr;
 	dma_addr_t		 dma_addr;
+	unsigned long	 dma_offset;
 	/* helpers */
 	int			 baud;
 	int			 bits;

^ permalink raw reply

* Question about porting  linux2.6 on mpc860T
From: bharathi kandimalla @ 2006-04-25  9:29 UTC (permalink / raw)
  To: linuxppc-embedded@ozlabs.org; +Cc: linuxppc-embedded@ozlabs.org

[-- Attachment #1: Type: text/plain, Size: 464 bytes --]

Hi
  I know there exits a lot of changes in the linux2.6
and i want to know what are the changes 
related to architecture? 
  As mpc860T is a slow processor 
Is it considerably slow for mpc860T on linux 2.6  ?

I came to know there are some bugs on mpc860T for linux2.6 
Is there any bugs?

what version of linux(2.4/2.6) is good for mpc860T ? 
regards  
thank you.

---------------------------------
Yahoo! Mail goes everywhere you do.  Get it on your phone.

[-- Attachment #2: Type: text/html, Size: 682 bytes --]

^ permalink raw reply

* Re: [PATCH 0/7] [RFC] Sizing zones and holes in an architecture independent manner V4
From: Mel Gorman @ 2006-04-25  9:05 UTC (permalink / raw)
  To: KAMEZAWA Hiroyuki
  Cc: davej, tony.luck, linux-mm, ak, bob.picco, linux-kernel,
	linuxppc-dev
In-Reply-To: <20060425104855.42c6ca62.kamezawa.hiroyu@jp.fujitsu.com>

On Tue, 25 Apr 2006, KAMEZAWA Hiroyuki wrote:

> On Mon, 24 Apr 2006 21:20:09 +0100 (IST)
> Mel Gorman <mel@csn.ul.ie> wrote:
>
>> This is V4 of the patchset to size zones and memory holes in an
>> architecture-independent manner.
>>
>
> Could you add some documentation about 'how to use' your generic funcs ?
> I think more archs can use your generic routine if well documented.
>
> All initialization path can be written in following way ?
> ==
> for_all_memory_region()
> 	add_active_range(nid, start, end)
> free_area_init_nodes(max_dma, max_dma32, max_low_pfn, max_pfn);
> ==

Yes, that is accurate. I can write in some documentation.

>
> And following functions are really needed ?

Most of them, yes. Without them, the arch-specific could would need to be 
able to iterate through the list of active regions which would lead to 
more similar-looking code.

> ==

> +extern void remove_all_active_ranges(void);

Used by x86_64 when it finds the SRAT table is bad and needs to rediscover 
active regions in another way. Spontaneous reboots were possible without 
it.

> +extern void get_pfn_range_for_nid(unsigned int nid,
> +			unsigned long *start_pfn, unsigned long *end_pfn);

Currently only used by power when initialising the boot allocator. There 
were no obvious canditates for use elsewhere.

> +extern unsigned long find_min_pfn_with_active_regions(void);

This does not need to be in a header.

> +extern unsigned long find_max_pfn_with_active_regions(void);

Could be dropped, only required by x86_64.

> +extern int early_pfn_to_nid(unsigned long pfn);

Removed two copies of similar functions from power and x86. I thought I 
could replace the IA64 one as well, but the code was unusual enough there 
that I decided not to.

> +extern void free_bootmem_with_active_regions(int nid,
> +						unsigned long max_low_pfn);

Removes similar-looking code from power and x86_64.

> +extern void sparse_memory_present_with_active_regions(int nid);

This is only used by power.

> +extern unsigned long absent_pages_in_range(unsigned long start_pfn,
> +						unsigned long end_pfn);
>

Removes similar looking code from x86_64. For all other architectures, it 
does the job of the existing functions that calculate zholes_size[].

-- 
Mel Gorman
Part-time Phd Student                          Linux Technology Center
University of Limerick                         IBM Dublin Software Lab

^ permalink raw reply

* [PATCH] ppc32: add 440GX erratum 440_43 workaround
From: Eugene Surovegin @ 2006-04-25  8:22 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-embedded

This patch adds workaround for PPC 440GX erratum 440_43. According to 
this erratum spurious MachineChecks (caused by L1 cache parity) can 
happen during DataTLB miss processing. We disable L1 cache parity 
checking for 440GX rev.C and rev.F

Signed-off-by: Eugene Surovegin <ebs@ebshome.net>

diff --git a/arch/ppc/platforms/4xx/ocotea.c b/arch/ppc/platforms/4xx/ocotea.c
index f841972..554776d 100644
--- a/arch/ppc/platforms/4xx/ocotea.c
+++ b/arch/ppc/platforms/4xx/ocotea.c
@@ -331,7 +331,7 @@ static void __init ocotea_init(void)
 void __init platform_init(unsigned long r3, unsigned long r4,
 		unsigned long r5, unsigned long r6, unsigned long r7)
 {
-	ibm44x_platform_init(r3, r4, r5, r6, r7);
+	ibm440gx_platform_init(r3, r4, r5, r6, r7);
 
 	ppc_md.setup_arch = ocotea_setup_arch;
 	ppc_md.show_cpuinfo = ocotea_show_cpuinfo;
diff --git a/arch/ppc/syslib/ibm440gx_common.c b/arch/ppc/syslib/ibm440gx_common.c
index a7dd55f..f6cc168 100644
--- a/arch/ppc/syslib/ibm440gx_common.c
+++ b/arch/ppc/syslib/ibm440gx_common.c
@@ -2,7 +2,7 @@
  * PPC440GX system library
  *
  * Eugene Surovegin <eugene.surovegin@zultys.com> or <ebs@ebshome.net>
- * Copyright (c) 2003, 2004 Zultys Technologies
+ * Copyright (c) 2003 - 2006 Zultys Technologies
  *
  * This program is free software; you can redistribute  it and/or modify it
  * under  the terms of  the GNU General  Public License as published by the
@@ -282,3 +282,14 @@ int ibm440gx_show_cpuinfo(struct seq_fil
 	return 0;
 }
 
+void __init ibm440gx_platform_init(unsigned long r3, unsigned long r4,
+				   unsigned long r5, unsigned long r6,
+				   unsigned long r7)
+{
+	/* Erratum 440_43 workaround, disable L1 cache parity checking */
+	if (!strcmp(cur_cpu_spec->cpu_name, "440GX Rev. C") ||
+	    !strcmp(cur_cpu_spec->cpu_name, "440GX Rev. F"))
+		mtspr(SPRN_CCR1, mfspr(SPRN_CCR1) | CCR1_DPC);
+
+	ibm44x_platform_init(r3, r4, r5, r6, r7);
+}
diff --git a/arch/ppc/syslib/ibm440gx_common.h b/arch/ppc/syslib/ibm440gx_common.h
index a2ab9fa..a03ec60 100644
--- a/arch/ppc/syslib/ibm440gx_common.h
+++ b/arch/ppc/syslib/ibm440gx_common.h
@@ -29,6 +29,10 @@ #include <syslib/ibm44x_common.h>
 void ibm440gx_get_clocks(struct ibm44x_clocks*, unsigned int sys_clk,
 	unsigned int ser_clk) __init;
 
+/* common 440GX platform init */
+void ibm440gx_platform_init(unsigned long r3, unsigned long r4, unsigned long r5,
+			    unsigned long r6, unsigned long r7) __init;
+
 /* Enable L2 cache */
 void ibm440gx_l2c_enable(void) __init;
 
diff --git a/include/asm-ppc/reg_booke.h b/include/asm-ppc/reg_booke.h
index 00ad9c7..4944c0f 100644
--- a/include/asm-ppc/reg_booke.h
+++ b/include/asm-ppc/reg_booke.h
@@ -237,6 +237,7 @@ #define SPRN_CSRR1	SPRN_SRR3 /* Critical
 #endif
 
 /* Bit definitions for CCR1. */
+#define	CCR1_DPC	0x00000100 /* Disable L1 I-Cache/D-Cache parity checking */
 #define	CCR1_TCS	0x00000080 /* Timer Clock Select */
 
 /* Bit definitions for the MCSR. */

^ permalink raw reply related

* Re: 85xx FDT updates?
From: Eugene Surovegin @ 2006-04-25  7:49 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev@ozlabs.org
In-Reply-To: <34C11E8A-ED04-490F-B601-841DC1F94AD6@kernel.crashing.org>

On Tue, Apr 25, 2006 at 01:08:00AM -0500, Kumar Gala wrote:
> 
> On Apr 24, 2006, at 5:31 PM, Dan Malek wrote:
> 
> >
> > On Apr 24, 2006, at 5:43 PM, Kumar Gala wrote:
> >
> >> However, I am against removing the arch/ppc support until the u-boot
> >> patches are picked up.  I think its bad form to give people a kernel
> >> they can't easily boot.
> >
> > What about systems that can't update u-boot, but want to run
> > a newer kernel?
> 
> Does this situation exist for any in tree boards that are supported?

Kumar, with all due respect, I don't like this attitude. I can have 
eval board where I don't want to upgrade firmware.

Also, this "let's screw everybody who doesn't have their board support 
in tree" is very counter productive. FYI, almost all my contributed 
kernel work was done on custom hardware, not on reference boards.

It seems some people here lost touch with reality - embedded Linux is 
mostly run on custom hardware, not on evaluation boards.

-- 
Eugene

^ permalink raw reply

* Re: 85xx FDT updates?
From: Kumar Gala @ 2006-04-25  6:08 UTC (permalink / raw)
  To: Dan Malek; +Cc: linuxppc-dev@ozlabs.org
In-Reply-To: <17017F03-078C-4B7F-A961-EC371F534E27@embeddedalley.com>


On Apr 24, 2006, at 5:31 PM, Dan Malek wrote:

>
> On Apr 24, 2006, at 5:43 PM, Kumar Gala wrote:
>
>> However, I am against removing the arch/ppc support until the u-boot
>> patches are picked up.  I think its bad form to give people a kernel
>> they can't easily boot.
>
> What about systems that can't update u-boot, but want to run
> a newer kernel?

Does this situation exist for any in tree boards that are supported?

- kumar

^ permalink raw reply

* Re: Clock running fast on Mac Mini?
From: Benjamin Herrenschmidt @ 2006-04-25  4:14 UTC (permalink / raw)
  To: Mich Lanners; +Cc: linuxppc-dev
In-Reply-To: <E1FY8kd-0000mH-PO@owl.grunz.lu>

On Mon, 2006-04-24 at 23:37 +0200, Mich Lanners wrote:
> Hello all,
> 
> I have a problem on my Mac Mini with the kernel clock, which is running
> fast.
> 
> In fact, it is gaining 10 ms every 5 seconds, as can be seen here, with
> ntpdate called every 5 seconds:
> 
> owl:~# while true; do ntpdate -u be.pool.ntp.org; sleep 5; done
> 24 Apr 23:14:17 ntpdate[2601]: adjust time server 195.47.215.226 offset 0.151197 sec
> 24 Apr 23:14:22 ntpdate[2607]: adjust time server 195.47.215.226 offset 0.160348 sec
> 24 Apr 23:14:28 ntpdate[2613]: adjust time server 195.47.215.226 offset 0.170311 sec
> [...]
> 24 Apr 23:17:15 ntpdate[2813]: adjust time server 195.47.215.226 offset 0.489635 sec
> 24 Apr 23:17:20 ntpdate[2819]: adjust time server 195.47.215.226 offset 0.499386 sec
> 24 Apr 23:17:25 ntpdate[2825]: step time server 195.47.215.226 offset 0.509342 sec
> 24 Apr 23:17:31 ntpdate[2831]: adjust time server 195.47.215.226 offset 0.013693 sec
> 24 Apr 23:17:36 ntpdate[2837]: adjust time server 195.47.215.226 offset 0.021668 sec
> 
> and so on.
> 
> This is with ntpd stopped, after a fresh boot with /etc/adjtime removed
> to make sure hwclock is not falsly tuning the kernel timekeeping (I did
> this because I once had a 2.6.16 kernel running, where clock problems
> have been reported. I thought that may have falsified ntp's drift file
> and/or hwclock's /etc/adjtime).
> 
> All this is with a 2.6.15 kernel.
> 
> My Alu powerbook with a 2.6.15 kernel does not show this problem.

Could be several things... the clock calibration data from the
device-tree incorrect, or a rouding bug we fixed recently... I'd expect
2.6.16 to be good.

Ben

^ permalink raw reply

* Re: [PATCH 0/7] [RFC] Sizing zones and holes in an architecture independent manner V4
From: KAMEZAWA Hiroyuki @ 2006-04-25  1:48 UTC (permalink / raw)
  To: Mel Gorman
  Cc: davej, tony.luck, linux-mm, mel, ak, bob.picco, linux-kernel,
	linuxppc-dev
In-Reply-To: <20060424202009.20409.89016.sendpatchset@skynet>

On Mon, 24 Apr 2006 21:20:09 +0100 (IST)
Mel Gorman <mel@csn.ul.ie> wrote:

> This is V4 of the patchset to size zones and memory holes in an
> architecture-independent manner.
> 

Could you add some documentation about 'how to use' your generic funcs ?
I think more archs can use your generic routine if well documented.

All initialization path can be written in following way ?
==
for_all_memory_region()
	add_active_range(nid, start, end)
free_area_init_nodes(max_dma, max_dma32, max_low_pfn, max_pfn);
==

And following functions are really needed ?
==
+extern void remove_all_active_ranges(void);
+extern void get_pfn_range_for_nid(unsigned int nid,
+			unsigned long *start_pfn, unsigned long *end_pfn);
+extern unsigned long find_min_pfn_with_active_regions(void);
+extern unsigned long find_max_pfn_with_active_regions(void);
+extern int early_pfn_to_nid(unsigned long pfn);
+extern void free_bootmem_with_active_regions(int nid,
+						unsigned long max_low_pfn);
+extern void sparse_memory_present_with_active_regions(int nid);
+extern unsigned long absent_pages_in_range(unsigned long start_pfn,
+						unsigned long end_pfn);

-Kame

^ permalink raw reply

* Re: [PATCH] Wire up *at syscalls
From: David Woodhouse @ 2006-04-25  0:13 UTC (permalink / raw)
  To: arnd; +Cc: linuxppc-dev
In-Reply-To: <jeu08ippb9.fsf@sykes.suse.de>


>   * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c 

Hm, do we really need that? How about something based on this instead...
note that I used CONFIG_PPC_CELL because dependencies on CONFIG_*_MODULE
in the static kernel are evil.

Syscall 224 is missing from the existing spu_syscall_table, btw.

diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.S
index 8d15226..8371f14 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.S
@@ -18,17 +18,29 @@ #include <linux/config.h>
 #include <asm/ppc_asm.h>
 
 #ifdef CONFIG_PPC64
-#define SYSCALL(func)		.llong	.sys_##func,.sys_##func
-#define COMPAT_SYS(func)	.llong	.sys_##func,.compat_sys_##func
-#define PPC_SYS(func)		.llong	.ppc_##func,.ppc_##func
-#define OLDSYS(func)		.llong	.sys_ni_syscall,.sys_ni_syscall
-#define SYS32ONLY(func)		.llong	.sys_ni_syscall,.compat_sys_##func
-#define SYSX(f, f3264, f32)	.llong	.f,.f3264
+
+#ifdef CONFIG_PPC_CELL
+#define SPU(x) ,x
+#else
+#define SPU(x)
+#endif
+
+#define SYSCALL(func)		.llong	.sys_##func,.sys_##func SPU(.sys_ni_syscall)
+#define SYSCALL_SPU(func)	.llong	.sys_##func,.sys_##func SPU(.sys_##func)
+#define COMPAT_SYS(func)	.llong	.sys_##func,.compat_sys_##func SPU(.sys_ni_syscall)
+#define COMPAT_SYS_SPU(func)	.llong	.sys_##func,.compat_sys_##func SPU(.sys_##func)
+#define PPC_SYS(func)		.llong	.ppc_##func,.ppc_##func SPU(.sys_ni_syscall)
+#define PPC_SYS_SPU(func)	.llong	.ppc_##func,.ppc_##func SPU(.ppc_##func)
+#define OLDSYS(func)		.llong	.sys_ni_syscall,.sys_ni_syscall SPU(.sys_ni_syscall)
+#define SYS32ONLY(func)		.llong	.sys_ni_syscall,.compat_sys_##func SPU(.sys_ni_syscall)
+#define SYSX(f, f3264, f32)	.llong	.f,.f3264 SPU(.f)
 #else
-#define SYSCALL(func)		.long	sys_##func
 #define COMPAT_SYS(func)	.long	sys_##func
+#define COMPAT_SYS_SPU(func)	.long	sys_##func
 #define PPC_SYS(func)		.long	ppc_##func
+#define PPC_SYS_SPU(func)	.long	ppc_##func
 #define OLDSYS(func)		.long	sys_##func
+#define OLDSYS_SPU(func)	.long	sys_##func
 #define SYS32ONLY(func)		.long	sys_##func
 #define SYSX(f, f3264, f32)	.long	f32
 #endif
@@ -42,175 +54,175 @@ _GLOBAL(sys_call_table)
 SYSCALL(restart_syscall)
 SYSCALL(exit)
 PPC_SYS(fork)
-SYSCALL(read)
-SYSCALL(write)
-COMPAT_SYS(open)
-SYSCALL(close)
-COMPAT_SYS(waitpid)
-COMPAT_SYS(creat)
-SYSCALL(link)
-SYSCALL(unlink)
+SYSCALL_SPU(read)
+SYSCALL_SPU(write)
+COMPAT_SYS_SPU(open)
+SYSCALL_SPU(close)
+COMPAT_SYS_SPU(waitpid)
+COMPAT_SYS_SPU(creat)
+SYSCALL_SPU(link)
+SYSCALL_SPU(unlink)
 COMPAT_SYS(execve)
-SYSCALL(chdir)
-COMPAT_SYS(time)
-SYSCALL(mknod)
-SYSCALL(chmod)
-SYSCALL(lchown)
+SYSCALL_SPU(chdir)
+COMPAT_SYS_SPU(time)
+SYSCALL_SPU(mknod)
+SYSCALL_SPU(chmod)
+SYSCALL_SPU(lchown)
 SYSCALL(ni_syscall)
 OLDSYS(stat)
-SYSX(sys_lseek,ppc32_lseek,sys_lseek)
-SYSCALL(getpid)
+SYSX_SPU(sys_lseek,ppc32_lseek,sys_lseek)
+SYSCALL_SPU(getpid)
 COMPAT_SYS(mount)
 SYSX(sys_ni_syscall,sys_oldumount,sys_oldumount)
-SYSCALL(setuid)
-SYSCALL(getuid)
-COMPAT_SYS(stime)
+SYSCALL_SPU(setuid)
+SYSCALL_SPU(getuid)
+COMPAT_SYS_SPU(stime)
 COMPAT_SYS(ptrace)
-SYSCALL(alarm)
+SYSCALL_SPU(alarm)
 OLDSYS(fstat)
 COMPAT_SYS(pause)
 COMPAT_SYS(utime)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
-COMPAT_SYS(access)
-COMPAT_SYS(nice)
+COMPAT_SYS_SPU(access)
+COMPAT_SYS_SPU(nice)
 SYSCALL(ni_syscall)
-SYSCALL(sync)
-COMPAT_SYS(kill)
-SYSCALL(rename)
-COMPAT_SYS(mkdir)
-SYSCALL(rmdir)
-SYSCALL(dup)
-SYSCALL(pipe)
-COMPAT_SYS(times)
+SYSCALL_SPU(sync)
+COMPAT_SYS_SPU(kill)
+SYSCALL_SPU(rename)
+COMPAT_SYS_SPU(mkdir)
+SYSCALL_SPU(rmdir)
+SYSCALL_SPU(dup)
+SYSCALL_SPU(pipe)
+COMPAT_SYS_SPU(times)
 SYSCALL(ni_syscall)
-SYSCALL(brk)
-SYSCALL(setgid)
-SYSCALL(getgid)
+SYSCALL_SPU(brk)
+SYSCALL_SPU(setgid)
+SYSCALL_SPU(getgid)
 SYSCALL(signal)
-SYSCALL(geteuid)
-SYSCALL(getegid)
+SYSCALL_SPU(geteuid)
+SYSCALL_SPU(getegid)
 SYSCALL(acct)
 SYSCALL(umount)
 SYSCALL(ni_syscall)
-COMPAT_SYS(ioctl)
-COMPAT_SYS(fcntl)
+COMPAT_SYS_SPU(ioctl)
+COMPAT_SYS_SPU(fcntl)
 SYSCALL(ni_syscall)
-COMPAT_SYS(setpgid)
+COMPAT_SYS_SPU(setpgid)
 SYSCALL(ni_syscall)
 SYSX(sys_ni_syscall,sys_olduname, sys_olduname)
-COMPAT_SYS(umask)
-SYSCALL(chroot)
+COMPAT_SYS_SPU(umask)
+SYSCALL_SPU(chroot)
 SYSCALL(ustat)
-SYSCALL(dup2)
-SYSCALL(getppid)
-SYSCALL(getpgrp)
-SYSCALL(setsid)
+SYSCALL_SPU(dup2)
+SYSCALL_SPU(getppid)
+SYSCALL_SPU(getpgrp)
+SYSCALL_SPU(setsid)
 SYS32ONLY(sigaction)
-SYSCALL(sgetmask)
-COMPAT_SYS(ssetmask)
-SYSCALL(setreuid)
-SYSCALL(setregid)
+SYSCALL_SPU(sgetmask)
+COMPAT_SYS_SPU(ssetmask)
+SYSCALL_SPU(setreuid)
+SYSCALL_SPU(setregid)
 SYS32ONLY(sigsuspend)
 COMPAT_SYS(sigpending)
-COMPAT_SYS(sethostname)
-COMPAT_SYS(setrlimit)
+COMPAT_SYS_SPU(sethostname)
+COMPAT_SYS_SPU(setrlimit)
 COMPAT_SYS(old_getrlimit)
-COMPAT_SYS(getrusage)
-COMPAT_SYS(gettimeofday)
-COMPAT_SYS(settimeofday)
-COMPAT_SYS(getgroups)
-COMPAT_SYS(setgroups)
+COMPAT_SYS_SPU(getrusage)
+COMPAT_SYS_SPU(gettimeofday)
+COMPAT_SYS_SPU(settimeofday)
+COMPAT_SYS_SPU(getgroups)
+COMPAT_SYS_SPU(setgroups)
 SYSX(sys_ni_syscall,sys_ni_syscall,ppc_select)
-SYSCALL(symlink)
+SYSCALL_SPU(symlink)
 OLDSYS(lstat)
-COMPAT_SYS(readlink)
+COMPAT_SYS_SPU(readlink)
 SYSCALL(uselib)
 SYSCALL(swapon)
 SYSCALL(reboot)
 SYSX(sys_ni_syscall,old32_readdir,old_readdir)
-SYSCALL(mmap)
-SYSCALL(munmap)
-SYSCALL(truncate)
-SYSCALL(ftruncate)
-SYSCALL(fchmod)
-SYSCALL(fchown)
-COMPAT_SYS(getpriority)
-COMPAT_SYS(setpriority)
+SYSCALL_SPU(mmap)
+SYSCALL_SPU(munmap)
+SYSCALL_SPU(truncate)
+SYSCALL_SPU(ftruncate)
+SYSCALL_SPU(fchmod)
+SYSCALL_SPU(fchown)
+COMPAT_SYS_SPU(getpriority)
+COMPAT_SYS_SPU(setpriority)
 SYSCALL(ni_syscall)
 COMPAT_SYS(statfs)
 COMPAT_SYS(fstatfs)
 SYSCALL(ni_syscall)
-COMPAT_SYS(socketcall)
-COMPAT_SYS(syslog)
-COMPAT_SYS(setitimer)
-COMPAT_SYS(getitimer)
-COMPAT_SYS(newstat)
-COMPAT_SYS(newlstat)
-COMPAT_SYS(newfstat)
+COMPAT_SYS_SPU(socketcall)
+COMPAT_SYS_SPU(syslog)
+COMPAT_SYS_SPU(setitimer)
+COMPAT_SYS_SPU(getitimer)
+COMPAT_SYS_SPU(newstat)
+COMPAT_SYS_SPU(newlstat)
+COMPAT_SYS_SPU(newfstat)
 SYSX(sys_ni_syscall,sys_uname,sys_uname)
 SYSCALL(ni_syscall)
-SYSCALL(vhangup)
+SYSCALL_SPU(vhangup)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
-COMPAT_SYS(wait4)
+COMPAT_SYS_SPU(wait4)
 SYSCALL(swapoff)
-COMPAT_SYS(sysinfo)
+COMPAT_SYS_SPU(sysinfo)
 COMPAT_SYS(ipc)
-SYSCALL(fsync)
+SYSCALL_SPU(fsync)
 SYS32ONLY(sigreturn)
 PPC_SYS(clone)
-COMPAT_SYS(setdomainname)
-PPC_SYS(newuname)
+COMPAT_SYS_SPU(setdomainname)
+PPC_SYS_SPU(newuname)
 SYSCALL(ni_syscall)
-COMPAT_SYS(adjtimex)
-SYSCALL(mprotect)
+COMPAT_SYS_SPU(adjtimex)
+SYSCALL_SPU(mprotect)
 SYSX(sys_ni_syscall,compat_sys_sigprocmask,sys_sigprocmask)
 SYSCALL(ni_syscall)
 SYSCALL(init_module)
 SYSCALL(delete_module)
 SYSCALL(ni_syscall)
 SYSCALL(quotactl)
-COMPAT_SYS(getpgid)
-SYSCALL(fchdir)
-SYSCALL(bdflush)
+COMPAT_SYS_SPU(getpgid)
+SYSCALL_SPU(fchdir)
+SYSCALL_SPU(bdflush)
 COMPAT_SYS(sysfs)
-SYSX(ppc64_personality,ppc64_personality,sys_personality)
+SYSX_SPU(ppc64_personality,ppc64_personality,sys_personality)
 SYSCALL(ni_syscall)
-SYSCALL(setfsuid)
-SYSCALL(setfsgid)
-SYSCALL(llseek)
-COMPAT_SYS(getdents)
-SYSX(sys_select,ppc32_select,ppc_select)
-SYSCALL(flock)
-SYSCALL(msync)
-COMPAT_SYS(readv)
-COMPAT_SYS(writev)
-COMPAT_SYS(getsid)
-SYSCALL(fdatasync)
+SYSCALL_SPU(setfsuid)
+SYSCALL_SPU(setfsgid)
+SYSCALL_SPU(llseek)
+COMPAT_SYS_SPU(getdents)
+SYSX_SPU(sys_select,ppc32_select,ppc_select)
+SYSCALL_SPU(flock)
+SYSCALL_SPU(msync)
+COMPAT_SYS_SPU(readv)
+COMPAT_SYS_SPU(writev)
+COMPAT_SYS_SPU(getsid)
+SYSCALL_SPU(fdatasync)
 COMPAT_SYS(sysctl)
-SYSCALL(mlock)
-SYSCALL(munlock)
-SYSCALL(mlockall)
-SYSCALL(munlockall)
-COMPAT_SYS(sched_setparam)
-COMPAT_SYS(sched_getparam)
-COMPAT_SYS(sched_setscheduler)
-COMPAT_SYS(sched_getscheduler)
-SYSCALL(sched_yield)
-COMPAT_SYS(sched_get_priority_max)
-COMPAT_SYS(sched_get_priority_min)
-COMPAT_SYS(sched_rr_get_interval)
-COMPAT_SYS(nanosleep)
-SYSCALL(mremap)
-SYSCALL(setresuid)
-SYSCALL(getresuid)
+SYSCALL_SPU(mlock)
+SYSCALL_SPU(munlock)
+SYSCALL_SPU(mlockall)
+SYSCALL_SPU(munlockall)
+COMPAT_SYS_SPU(sched_setparam)
+COMPAT_SYS_SPU(sched_getparam)
+COMPAT_SYS_SPU(sched_setscheduler)
+COMPAT_SYS_SPU(sched_getscheduler)
+SYSCALL_SPU(sched_yield)
+COMPAT_SYS_SPU(sched_get_priority_max)
+COMPAT_SYS_SPU(sched_get_priority_min)
+COMPAT_SYS_SPU(sched_rr_get_interval)
+COMPAT_SYS_SPU(nanosleep)
+SYSCALL_SPU(mremap)
+SYSCALL_SPU(setresuid)
+SYSCALL_SPU(getresuid)
 SYSCALL(ni_syscall)
-SYSCALL(poll)
+SYSCALL_SPU(poll)
 COMPAT_SYS(nfsservctl)
-SYSCALL(setresgid)
-SYSCALL(getresgid)
-COMPAT_SYS(prctl)
+SYSCALL_SPU(setresgid)
+SYSCALL_SPU(getresgid)
+COMPAT_SYS_SPU(prctl)
 COMPAT_SYS(rt_sigreturn)
 COMPAT_SYS(rt_sigaction)
 COMPAT_SYS(rt_sigprocmask)
@@ -218,19 +230,19 @@ COMPAT_SYS(rt_sigpending)
 COMPAT_SYS(rt_sigtimedwait)
 COMPAT_SYS(rt_sigqueueinfo)
 COMPAT_SYS(rt_sigsuspend)
-COMPAT_SYS(pread64)
-COMPAT_SYS(pwrite64)
-SYSCALL(chown)
-SYSCALL(getcwd)
-SYSCALL(capget)
-SYSCALL(capset)
+COMPAT_SYS_SPU(pread64)
+COMPAT_SYS_SPU(pwrite64)
+SYSCALL_SPU(chown)
+SYSCALL_SPU(getcwd)
+SYSCALL_SPU(capget)
+SYSCALL_SPU(capset)
 COMPAT_SYS(sigaltstack)
-SYSX(sys_sendfile64,compat_sys_sendfile,sys_sendfile)
+SYSX_SPU(sys_sendfile64,compat_sys_sendfile,sys_sendfile)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 PPC_SYS(vfork)
-COMPAT_SYS(getrlimit)
-COMPAT_SYS(readahead)
+COMPAT_SYS_SPU(getrlimit)
+COMPAT_SYS_SPU(readahead)
 SYS32ONLY(mmap2)
 SYS32ONLY(truncate64)
 SYS32ONLY(ftruncate64)
@@ -241,60 +253,60 @@ SYSCALL(pciconfig_read)
 SYSCALL(pciconfig_write)
 SYSCALL(pciconfig_iobase)
 SYSCALL(ni_syscall)
-SYSCALL(getdents64)
-SYSCALL(pivot_root)
+SYSCALL_SPU(getdents64)
+SYSCALL_SPU(pivot_root)
 SYSX(sys_ni_syscall,compat_sys_fcntl64,sys_fcntl64)
-SYSCALL(madvise)
-SYSCALL(mincore)
-SYSCALL(gettid)
-SYSCALL(tkill)
-SYSCALL(setxattr)
-SYSCALL(lsetxattr)
-SYSCALL(fsetxattr)
-SYSCALL(getxattr)
-SYSCALL(lgetxattr)
-SYSCALL(fgetxattr)
-SYSCALL(listxattr)
-SYSCALL(llistxattr)
-SYSCALL(flistxattr)
-SYSCALL(removexattr)
-SYSCALL(lremovexattr)
-SYSCALL(fremovexattr)
-COMPAT_SYS(futex)
-COMPAT_SYS(sched_setaffinity)
-COMPAT_SYS(sched_getaffinity)
+SYSCALL_SPU(madvise)
+SYSCALL_SPU(mincore)
+SYSCALL_SPU(gettid)
+SYSCALL_SPU(tkill)
+SYSCALL_SPU(setxattr)
+SYSCALL_SPU(lsetxattr)
+SYSCALL_SPU(fsetxattr)
+SYSCALL_SPU(getxattr)
+SYSCALL_SPU(lgetxattr)
+SYSCALL_SPU(fgetxattr)
+SYSCALL_SPU(listxattr)
+SYSCALL_SPU(llistxattr)
+SYSCALL_SPU(flistxattr)
+SYSCALL_SPU(removexattr)
+SYSCALL_SPU(lremovexattr)
+SYSCALL_SPU(fremovexattr)
+COMPAT_SYS_SPU(futex)
+COMPAT_SYS_SPU(sched_setaffinity)
+COMPAT_SYS_SPU(sched_getaffinity)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
 SYS32ONLY(sendfile64)
-COMPAT_SYS(io_setup)
-SYSCALL(io_destroy)
-COMPAT_SYS(io_getevents)
-COMPAT_SYS(io_submit)
-SYSCALL(io_cancel)
+COMPAT_SYS_SPU(io_setup)
+SYSCALL_SPU(io_destroy)
+COMPAT_SYS_SPU(io_getevents)
+COMPAT_SYS_SPU(io_submit)
+SYSCALL_SPU(io_cancel)
 SYSCALL(set_tid_address)
-SYSX(sys_fadvise64,ppc32_fadvise64,sys_fadvise64)
+SYSX_SPU(sys_fadvise64,ppc32_fadvise64,sys_fadvise64)
 SYSCALL(exit_group)
 SYSX(sys_lookup_dcookie,ppc32_lookup_dcookie,sys_lookup_dcookie)
-SYSCALL(epoll_create)
-SYSCALL(epoll_ctl)
-SYSCALL(epoll_wait)
-SYSCALL(remap_file_pages)
-SYSX(sys_timer_create,compat_sys_timer_create,sys_timer_create)
-COMPAT_SYS(timer_settime)
-COMPAT_SYS(timer_gettime)
-SYSCALL(timer_getoverrun)
-SYSCALL(timer_delete)
-COMPAT_SYS(clock_settime)
-COMPAT_SYS(clock_gettime)
-COMPAT_SYS(clock_getres)
-COMPAT_SYS(clock_nanosleep)
+SYSCALL_SPU(epoll_create)
+SYSCALL_SPU(epoll_ctl)
+SYSCALL_SPU(epoll_wait)
+SYSCALL_SPU(remap_file_pages)
+SYSX_SPU(sys_timer_create,compat_sys_timer_create,sys_timer_create)
+COMPAT_SYS_SPU(timer_settime)
+COMPAT_SYS_SPU(timer_gettime)
+SYSCALL_SPU(timer_getoverrun)
+SYSCALL_SPU(timer_delete)
+COMPAT_SYS_SPU(clock_settime)
+COMPAT_SYS_SPU(clock_gettime)
+COMPAT_SYS_SPU(clock_getres)
+COMPAT_SYS_SPU(clock_nanosleep)
 SYSX(ppc64_swapcontext,ppc32_swapcontext,ppc_swapcontext)
-COMPAT_SYS(tgkill)
-COMPAT_SYS(utimes)
-COMPAT_SYS(statfs64)
-COMPAT_SYS(fstatfs64)
+COMPAT_SYS_SPU(tgkill)
+COMPAT_SYS_SPU(utimes)
+COMPAT_SYS_SPU(statfs64)
+COMPAT_SYS_SPU(fstatfs64)
 SYSX(sys_ni_syscall, ppc_fadvise64_64, ppc_fadvise64_64)
-PPC_SYS(rtas)
+PPC_SYS_SPU(rtas)
 OLDSYS(debug_setcontext)
 SYSCALL(ni_syscall)
 SYSCALL(ni_syscall)
@@ -321,11 +333,6 @@ SYSCALL(spu_run)
 SYSCALL(spu_create)
 COMPAT_SYS(pselect6)
 COMPAT_SYS(ppoll)
-SYSCALL(unshare)
-SYSCALL(splice)
-SYSCALL(tee)
-
-/*
- * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c
- * as well when appropriate.
- */
+SYSCALL_SPU(unshare)
+SYSCALL_SPU(splice)
+SYSCALL_SPU(tee)

-- 
dwmw2

^ permalink raw reply related

* Re: [PATCH] Wire up *at syscalls
From: Andreas Schwab @ 2006-04-25  0:03 UTC (permalink / raw)
  To: Arnd Bergmann; +Cc: linuxppc-dev
In-Reply-To: <200604250131.22154.arnd@arndb.de>

Arnd Bergmann <arnd@arndb.de> writes:

> Simply removing the #ifdef looks wrong. AFAICT, powerpc and ia64 are the
> only ones that want both sys_newfstatat and sys_fstatat64, maybe you
> can simply do 
>
> #if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_NEWSTATFSAT)

Updated patch below.

Andreas.

Signed-off-by: Andreas Schwab <schwab@suse.de>

---
 arch/powerpc/kernel/systbl.S                |   13 +++++++++++++
 arch/powerpc/platforms/cell/spu_callbacks.c |   13 +++++++++++++
 fs/stat.c                                   |    2 +-
 include/asm-powerpc/unistd.h                |   20 +++++++++++++++++++-
 4 files changed, 46 insertions(+), 2 deletions(-)

Index: linux-2.6.17-rc2-git5/arch/powerpc/kernel/systbl.S
===================================================================
--- linux-2.6.17-rc2-git5.orig/arch/powerpc/kernel/systbl.S	2006-04-24 16:24:47.000000000 +0200
+++ linux-2.6.17-rc2-git5/arch/powerpc/kernel/systbl.S	2006-04-24 19:53:50.000000000 +0200
@@ -324,6 +324,19 @@ COMPAT_SYS(ppoll)
 SYSCALL(unshare)
 SYSCALL(splice)
 SYSCALL(tee)
+COMPAT_SYS(openat)
+SYSCALL(mkdirat)
+SYSCALL(mknodat)
+SYSCALL(fchownat)
+COMPAT_SYS(futimesat)
+SYSX(sys_newfstatat, sys_fstatat64, sys_fstatat64)
+SYSCALL(unlinkat)
+SYSCALL(renameat)
+SYSCALL(linkat)
+SYSCALL(symlinkat)
+SYSCALL(readlinkat)
+SYSCALL(fchmodat)
+SYSCALL(faccessat)
 
 /*
  * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c
Index: linux-2.6.17-rc2-git5/include/asm-powerpc/unistd.h
===================================================================
--- linux-2.6.17-rc2-git5.orig/include/asm-powerpc/unistd.h	2006-04-24 16:24:15.000000000 +0200
+++ linux-2.6.17-rc2-git5/include/asm-powerpc/unistd.h	2006-04-25 01:35:38.000000000 +0200
@@ -303,8 +303,25 @@
 #define __NR_unshare		282
 #define __NR_splice		283
 #define __NR_tee		284
+#define __NR_openat		285
+#define __NR_mkdirat		286
+#define __NR_mknodat		287
+#define __NR_fchownat		288
+#define __NR_futimesat		289
+#ifdef __powerpc64__
+#define __NR_newfstatat		290
+#else
+#define __NR_fstatat64		290
+#endif
+#define __NR_unlinkat		291
+#define __NR_renameat		292
+#define __NR_linkat		293
+#define __NR_symlinkat		294
+#define __NR_readlinkat		295
+#define __NR_fchmodat		296
+#define __NR_faccessat		297
 
-#define __NR_syscalls		285
+#define __NR_syscalls		298
 
 #ifdef __KERNEL__
 #define __NR__exit __NR_exit
@@ -457,6 +474,7 @@ type name(type1 arg1, type2 arg2, type3 
 #ifdef CONFIG_PPC64
 #define __ARCH_WANT_COMPAT_SYS_TIME
 #define __ARCH_WANT_COMPAT_SYS_RT_SIGSUSPEND
+#define __ARCH_WANT_SYS_NEWFSTATAT
 #endif
 
 /*
Index: linux-2.6.17-rc2-git5/arch/powerpc/platforms/cell/spu_callbacks.c
===================================================================
--- linux-2.6.17-rc2-git5.orig/arch/powerpc/platforms/cell/spu_callbacks.c	2006-04-24 16:24:47.000000000 +0200
+++ linux-2.6.17-rc2-git5/arch/powerpc/platforms/cell/spu_callbacks.c	2006-04-24 23:51:16.000000000 +0200
@@ -318,6 +318,19 @@ void *spu_syscall_table[] = {
 	[__NR_unshare]			sys_unshare,
 	[__NR_splice]			sys_splice,
 	[__NR_tee]			sys_tee,
+	[__NR_openat]			sys_openat,
+	[__NR_mkdirat]			sys_mkdirat,
+	[__NR_mknodat]			sys_mknodat,
+	[__NR_fchownat]			sys_fchownat,
+	[__NR_futimesat]		sys_futimesat,
+	[__NR_newfstatat]		sys_newfstatat,
+	[__NR_unlinkat]			sys_unlinkat,
+	[__NR_renameat]			sys_renameat,
+	[__NR_linkat]			sys_linkat,
+	[__NR_symlinkat]		sys_symlinkat,
+	[__NR_readlinkat]		sys_readlinkat,
+	[__NR_fchmodat]			sys_fchmodat,
+	[__NR_faccessat]		sys_faccessat,
 };
 
 long spu_sys_callback(struct spu_syscall_block *s)
Index: linux-2.6.17-rc2-git5/fs/stat.c
===================================================================
--- linux-2.6.17-rc2-git5.orig/fs/stat.c	2006-04-24 18:05:23.000000000 +0200
+++ linux-2.6.17-rc2-git5/fs/stat.c	2006-04-25 01:37:06.000000000 +0200
@@ -261,7 +261,7 @@ asmlinkage long sys_newlstat(char __user
 	return error;
 }
 
-#ifndef __ARCH_WANT_STAT64
+#if !defined(__ARCH_WANT_STAT64) || defined(__ARCH_WANT_SYS_NEWFSTATAT)
 asmlinkage long sys_newfstatat(int dfd, char __user *filename,
 				struct stat __user *statbuf, int flag)
 {

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply

* Re: [PATCH] Wire up *at syscalls
From: Arnd Bergmann @ 2006-04-24 23:31 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <jeu08ippb9.fsf@sykes.suse.de>

QW0gVHVlc2RheSAyNSBBcHJpbCAyMDA2IDAwOjQzIHNjaHJpZWIgQW5kcmVhcyBTY2h3YWI6Cj4g
SW5kZXg6IGxpbnV4LTIuNi4xNy1yYzItZ2l0NS9hcmNoL3Bvd2VycGMvcGxhdGZvcm1zL2NlbGwv
c3B1X2NhbGxiYWNrcy5jCj4gPT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09
PT09PT09PT09PT09PT09PT09PT09PT09PT09PQo+IC0tLQo+IGxpbnV4LTIuNi4xNy1yYzItZ2l0
NS5vcmlnL2FyY2gvcG93ZXJwYy9wbGF0Zm9ybXMvY2VsbC9zcHVfY2FsbGJhY2tzLmOgoKCgoAo+
oDIwMDYtMDQtMjQgMTY6MjQ6NDcuMDAwMDAwMDAwICswMjAwICsrKwo+IGxpbnV4LTIuNi4xNy1y
YzItZ2l0NS9hcmNoL3Bvd2VycGMvcGxhdGZvcm1zL2NlbGwvc3B1X2NhbGxiYWNrcy5joKCgMjAw
Ni0wNAo+LTI0IDE4OjA3OjU5LjAwMDAwMDAwMCArMDIwMCBAQCAtMzE4LDYgKzMxOCwxOSBAQCB2
b2lkICpzcHVfc3lzY2FsbF90YWJsZVtdCj4gPSB7Cj4goKCgoKCgoKBbX19OUl91bnNoYXJlXaCg
oKCgoKCgoKCgoKCgoKCgoHN5c191bnNoYXJlLAo+IKCgoKCgoKCgW19fTlJfc3BsaWNlXaCgoKCg
oKCgoKCgoKCgoKCgoKBzeXNfc3BsaWNlLAo+IKCgoKCgoKCgW19fTlJfdGVlXaCgoKCgoKCgoKCg
oKCgoKCgoKCgoKBzeXNfdGVlLAo+ICugoKCgoKCgW19fTlJfb3BlbmF0XaCgoKCgoKCgoKCgoKCg
oKCgoKBzeXNfb3BlbmF0LAo+ICugoKCgoKCgW19fTlJfbWtkaXJhdF2goKCgoKCgoKCgoKCgoKCg
oKBzeXNfbWtkaXJhdCwKPiAroKCgoKCgoFtfX05SX21rbm9kYXRdoKCgoKCgoKCgoKCgoKCgoKCg
c3lzX21rbm9kYXQsCj4gK6CgoKCgoKBbX19OUl9mY2hvd25hdF2goKCgoKCgoKCgoKCgoKCgoHN5
c19mY2hvd25hdCwKPiAroKCgoKCgoFtfX05SX2Z1dGltZXNhdF2goKCgoKCgoKCgoKCgoKCgc3lz
X2Z1dGltZXNhdCwKPiAroKCgoKCgoFtfX05SX25ld2ZzdGF0YXRdoKCgoKCgoKCgoKCgoKCgc3lz
X25ld2ZzdGF0YXQsCj4gK6CgoKCgoKBbX19OUl91bmxpbmthdF2goKCgoKCgoKCgoKCgoKCgoHN5
c191bmxpbmthdCwKPiAroKCgoKCgoFtfX05SX3JlbmFtZWF0XaCgoKCgoKCgoKCgoKCgoKCgc3lz
X3JlbmFtZWF0LAo+ICugoKCgoKCgW19fTlJfbGlua2F0XaCgoKCgoKCgoKCgoKCgoKCgoKBzeXNf
bGlua2F0LAo+ICugoKCgoKCgW19fTlJfc3ltbGlua2F0XaCgoKCgoKCgoKCgoKCgoKBzeXNfc3lt
bGlua2F0LAo+ICugoKCgoKCgW19fTlJfcmVhZGxpbmthdF2goKCgoKCgoKCgoKCgoKBzeXNfcmVh
ZGxpbmthdCwKPiAroKCgoKCgoFtfX05SX2ZjaG1vZGF0XaCgoKCgoKCgoKCgoKCgoKCgc3lzX2Zj
aG1vZGF0LAo+ICugoKCgoKCgW19fTlJfZmFjY2Vzc2F0XaCgoKCgoKCgoKCgoKCgoKBzeXNfZmFj
Y2Vzc2F0LAo+IKB9Owo+IKAKClRoZSBzcHUgcGFydHMgbG9vayBmaW5lLCB0aGFua3MuCgo+IKBs
b25nIHNwdV9zeXNfY2FsbGJhY2soc3RydWN0IHNwdV9zeXNjYWxsX2Jsb2NrICpzKQo+IEluZGV4
OiBsaW51eC0yLjYuMTctcmMyLWdpdDUvZnMvc3RhdC5jCj4gPT09PT09PT09PT09PT09PT09PT09
PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PT09PQo+IC0tLSBsaW51
eC0yLjYuMTctcmMyLWdpdDUub3JpZy9mcy9zdGF0LmOgoKCgoKCgoDIwMDYtMDQtMjQKPiAxODow
NToyMy4wMDAwMDAwMDAgKzAyMDAgKysrIGxpbnV4LTIuNi4xNy1yYzItZ2l0NS9mcy9zdGF0LmOg
oKCgoDIwMDYtMDQtMjQKPiAxODowNTo0NC4wMDAwMDAwMDAgKzAyMDAgQEAgLTI2MSw3ICsyNjEs
NiBAQCBhc21saW5rYWdlIGxvbmcKPiBzeXNfbmV3bHN0YXQoY2hhciBfX3VzZXIKPiCgoKCgoKCg
oHJldHVybiBlcnJvcjsKPiCgfQo+IKAKPiAtI2lmbmRlZiBfX0FSQ0hfV0FOVF9TVEFUNjQKPiCg
YXNtbGlua2FnZSBsb25nIHN5c19uZXdmc3RhdGF0KGludCBkZmQsIGNoYXIgX191c2VyICpmaWxl
bmFtZSwKPiCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoKCgoHN0cnVjdCBzdGF0IF9fdXNl
ciAqc3RhdGJ1ZiwgaW50IGZsYWcpCj4goHsKPiBAQCAtMjgyLDcgKzI4MSw2IEBAIGFzbWxpbmth
Z2UgbG9uZyBzeXNfbmV3ZnN0YXRhdChpbnQgZGZkLAo+IKBvdXQ6Cj4goKCgoKCgoKByZXR1cm4g
ZXJyb3I7Cj4goH0KPiAtI2VuZGlmCj4goAo+IKBhc21saW5rYWdlIGxvbmcgc3lzX25ld2ZzdGF0
KHVuc2lnbmVkIGludCBmZCwgc3RydWN0IHN0YXQgX191c2VyICpzdGF0YnVmKQo+IKB7CgpTaW1w
bHkgcmVtb3ZpbmcgdGhlICNpZmRlZiBsb29rcyB3cm9uZy4gQUZBSUNULCBwb3dlcnBjIGFuZCBp
YTY0IGFyZSB0aGUKb25seSBvbmVzIHRoYXQgd2FudCBib3RoIHN5c19uZXdmc3RhdGF0IGFuZCBz
eXNfZnN0YXRhdDY0LCBtYXliZSB5b3UKY2FuIHNpbXBseSBkbyAKCiNpZiAhZGVmaW5lZChfX0FS
Q0hfV0FOVF9TVEFUNjQpIHx8IGRlZmluZWQoX19BUkNIX1dBTlRfTkVXU1RBVEZTQVQpCgoJQXJu
ZCA8PjwK

^ permalink raw reply

* Re: alignment exceptionhandler sleeps in invalid context
From: Paul Mackerras @ 2006-04-24 23:12 UTC (permalink / raw)
  To: Olaf Hering; +Cc: linuxppc-dev
In-Reply-To: <20060424173246.GA22318@suse.de>

Olaf Hering writes:

> I'm not sure where the bug is. Does it mean the network stack does
> something nasty, or is the exception handler itself broken? (probably the latter)
> This is 2.6.16.9 on a p270.

The alignment handler does a get_user to read the instruction.  I
suppose it will have to read the instruction directly if the exception
occurred in kernel mode.

Paul.

^ permalink raw reply

* Re: Xilinx Virtex-2 PRO FPGA ppc 405 on ML310 board
From: Aidan Williams @ 2006-04-24 23:02 UTC (permalink / raw)
  To: Vincent Winstead; +Cc: linuxppc-embedded list
In-Reply-To: <20060424203047.86885.qmail@web52005.mail.yahoo.com>


After generating a new auto-config.in using the BSP,
you'll need to:

   0. put new auto-config.in into kernel directory
      linux-2.4.x/arch/ppc/platforms/xilinx_auto

   1. change to the top uClinux-dist directory
      (*NOT* the kernel directory)

   2. In "Vendor/Product Selection  --->"
      Choose "Xilinx"
      Choose "powerpc-auto"

   3. In "Kernel/Library/Defaults Selection  --->"
      Choose linux-2.4.x as your kernel, making sure that
      this is a symlink to or copy of the linuxppc-2.4
      tarball from UQ.

   4. In "Kernel/Library/Defaults Selection  --->"
      Choose "[*] Default all settings (lose changes)"

   5. Save and quit..

This process will pull in default Xilinx/powerpc config
files from vendors/Xilinx/powerpc-auto/config.*
for the kernel and for other uClinux components.

It will also generate a new linux-2.4.x/.config
based on your auto-config.in and the defaults
in vendors/Xilinx/powerpc-auto/config.linux-2.4.x.

btw, I don't have an ML310 board.  I have however gotten
the UQ kernel + uclinux to work on a v2pro FF1152 board
and a virtex-4 FX-12 minimodule.

another btw, the linuxppc-2.4 code has an auto-config.in
inside it which, if you follow the above steps (without
doing step 0), should compile.  The supplied auto-config.in
won't match your board, but it should at least compile.

- aidan


Vincent Winstead wrote:
> guess what - I got the BSP to work!  But now I have a problem with the 
> make command.  I put the auto-config.in where it needs to be then I did 
> a make menuconfig to configure it, then I did a make dep, but when I did 
> a make is when  an error came up that I can't seem to figure out:
> 
> In file included from 
> /home/ml310_linux/uClinux-dist/linuxppc-2.4/include/linux/pagemap.h:16,
>                  from 
> /home/ml310_linux/uClinux-dist/linuxppc-2.4/include/linux/locks.h:8,
>                  from 
> /home/ml310_linux/uClinux-dist/linuxppc-2.4/include/linux/blk.h:5,
>                  from init/main.c:25:
> /home/ml310_linux/uClinux-dist/linuxppc-2.4/include/linux/highmem.h: In 
> function `kmap':
> /home/ml310_linux/uClinux-dist/linuxppc-2.4/include/linux/highmem.h:68: 
> error: `CONFIG_KERNEL_START' undeclared (first use in this function)
> init/main.c: In function `start_kernel':
> init/main.c:393: error: `CONFIG_KERNEL_START' undeclared (first use in 
> this function)
> make[1]: *** [init/main.o] Error 1
> make[1]: Leaving directory `/home/ml310_linux/uClinux-dist/linuxppc-2.4'
> make: *** [linux] Error 1
> 
> This CONFIG_KERNEL_START is the problem.  It doesn't seem to be defined 
> anywhere and I guess it needs to be.  Is this something I need to get 
> from somewhere?  Or is it maybe generated along with the BSP so I would 
> have to put a start number into the platform Studio configuration?
> 
> -Vincent
> 
> 
> */Aidan Williams /* wrote:
> 
> 
>     Vincent Winstead wrote:
>      > Now, as far as step 5, am I supposed to have a symbolic link that is
>      > named linux-2.4.x placed into the uClinux-dist directory? Because
>      > there's already a folder named linux-2.4.x which was in there
>     already
>      > when I untarred everything. At the command prompt in the
>     uClinux-dist
>      > directory I entered the following line:
>      >
>      > ln -s ../linuxppc-2.4 linux-2.4.x
>      >
>      > and the result of this operation was to put a symbolic link into my
>      > linuxppc-2.4 directory with the name of linux-2.4.x - is this
>     correct?
>      >
> 
>     First, you'll need to move the existing directory aside using
>     a command like:
> 
>     mv linux-2.4.x linux-2.4.x-dist
> 
>     and then re-run the ln -s command above.
> 
>      > Now on to Step 6 problem.
>      > How am I supposed to make use uClinux EDK Board Support Package 1.0
>      > files? I'm not sure how to go about using them in the Xilinx
>     Platform
>      > Studio in order to generate the necessary auto-config.in file.
>      >
> 
>     See the document below for the general approach:
> 
>      > Even though it is about the microblaze rather than
>      > the PPC, a helpful "getting started" document is:
>      >
>     http://www.itee.uq.edu.au/~wu/downloads/uClinux_ready_Microblaze_design.pdf
>      >
> 
>     Look particularly at the section "Software Platform Settings"
>     on page 29, steps 67,68.
> 
>     If you are not overly familiar with the EDK, it would
>     be best to find someone locally who can help walk you
>     through the process of generating a system.
> 
>     - aidan
> 
> 
>     --------------------------------------------------------------------------
>     This email and any attachments may be confidential. They may contain
>     legally
>     privileged information or copyright material. You should not read, copy,
>     use or disclose them without authorisation. If you are not an intended
>     recipient, please contact us at once by return email and then delete
>     both
>     messages. We do not accept liability in connection with computer virus,
>     data corruption, delay, interruption, unauthorised access or
>     unauthorised
>     amendment. This notice should not be removed.
> 
> 

^ permalink raw reply

* [PATCH] Wire up *at syscalls
From: Andreas Schwab @ 2006-04-24 22:43 UTC (permalink / raw)
  To: linuxppc-dev

This patch has been tested on ppc64 (using glibc's testsuite, both 32bit
and 64bit), and compile-tested for ppc32 (I have currently no ppc32 system
available, but I expect no problems).

Signed-off-by: Andreas Schwab <schwab@suse.de>

---
 arch/powerpc/kernel/systbl.S                |   13 +++++++++++++
 arch/powerpc/platforms/cell/spu_callbacks.c |   13 +++++++++++++
 fs/stat.c                                   |    2 --
 include/asm-powerpc/unistd.h                |   19 ++++++++++++++++++-
 4 files changed, 44 insertions(+), 3 deletions(-)

Index: linux-2.6.17-rc2-git5/arch/powerpc/kernel/systbl.S
===================================================================
--- linux-2.6.17-rc2-git5.orig/arch/powerpc/kernel/systbl.S	2006-04-24 16:24:47.000000000 +0200
+++ linux-2.6.17-rc2-git5/arch/powerpc/kernel/systbl.S	2006-04-24 19:53:50.000000000 +0200
@@ -324,6 +324,19 @@ COMPAT_SYS(ppoll)
 SYSCALL(unshare)
 SYSCALL(splice)
 SYSCALL(tee)
+COMPAT_SYS(openat)
+SYSCALL(mkdirat)
+SYSCALL(mknodat)
+SYSCALL(fchownat)
+COMPAT_SYS(futimesat)
+SYSX(sys_newfstatat, sys_fstatat64, sys_fstatat64)
+SYSCALL(unlinkat)
+SYSCALL(renameat)
+SYSCALL(linkat)
+SYSCALL(symlinkat)
+SYSCALL(readlinkat)
+SYSCALL(fchmodat)
+SYSCALL(faccessat)
 
 /*
  * please add new calls to arch/powerpc/platforms/cell/spu_callbacks.c
Index: linux-2.6.17-rc2-git5/include/asm-powerpc/unistd.h
===================================================================
--- linux-2.6.17-rc2-git5.orig/include/asm-powerpc/unistd.h	2006-04-24 16:24:15.000000000 +0200
+++ linux-2.6.17-rc2-git5/include/asm-powerpc/unistd.h	2006-04-24 18:06:53.000000000 +0200
@@ -303,8 +303,25 @@
 #define __NR_unshare		282
 #define __NR_splice		283
 #define __NR_tee		284
+#define __NR_openat		285
+#define __NR_mkdirat		286
+#define __NR_mknodat		287
+#define __NR_fchownat		288
+#define __NR_futimesat		289
+#ifdef __powerpc64__
+#define __NR_newfstatat		290
+#else
+#define __NR_fstatat64		290
+#endif
+#define __NR_unlinkat		291
+#define __NR_renameat		292
+#define __NR_linkat		293
+#define __NR_symlinkat		294
+#define __NR_readlinkat		295
+#define __NR_fchmodat		296
+#define __NR_faccessat		297
 
-#define __NR_syscalls		285
+#define __NR_syscalls		298
 
 #ifdef __KERNEL__
 #define __NR__exit __NR_exit
Index: linux-2.6.17-rc2-git5/arch/powerpc/platforms/cell/spu_callbacks.c
===================================================================
--- linux-2.6.17-rc2-git5.orig/arch/powerpc/platforms/cell/spu_callbacks.c	2006-04-24 16:24:47.000000000 +0200
+++ linux-2.6.17-rc2-git5/arch/powerpc/platforms/cell/spu_callbacks.c	2006-04-24 18:07:59.000000000 +0200
@@ -318,6 +318,19 @@ void *spu_syscall_table[] = {
 	[__NR_unshare]			sys_unshare,
 	[__NR_splice]			sys_splice,
 	[__NR_tee]			sys_tee,
+	[__NR_openat]			sys_openat,
+	[__NR_mkdirat]			sys_mkdirat,
+	[__NR_mknodat]			sys_mknodat,
+	[__NR_fchownat]			sys_fchownat,
+	[__NR_futimesat]		sys_futimesat,
+	[__NR_newfstatat]		sys_newfstatat,
+	[__NR_unlinkat]			sys_unlinkat,
+	[__NR_renameat]			sys_renameat,
+	[__NR_linkat]			sys_linkat,
+	[__NR_symlinkat]		sys_symlinkat,
+	[__NR_readlinkat]		sys_readlinkat,
+	[__NR_fchmodat]			sys_fchmodat,
+	[__NR_faccessat]		sys_faccessat,
 };
 
 long spu_sys_callback(struct spu_syscall_block *s)
Index: linux-2.6.17-rc2-git5/fs/stat.c
===================================================================
--- linux-2.6.17-rc2-git5.orig/fs/stat.c	2006-04-24 18:05:23.000000000 +0200
+++ linux-2.6.17-rc2-git5/fs/stat.c	2006-04-24 18:05:44.000000000 +0200
@@ -261,7 +261,6 @@ asmlinkage long sys_newlstat(char __user
 	return error;
 }
 
-#ifndef __ARCH_WANT_STAT64
 asmlinkage long sys_newfstatat(int dfd, char __user *filename,
 				struct stat __user *statbuf, int flag)
 {
@@ -282,7 +281,6 @@ asmlinkage long sys_newfstatat(int dfd, 
 out:
 	return error;
 }
-#endif
 
 asmlinkage long sys_newfstat(unsigned int fd, struct stat __user *statbuf)
 {

Andreas.

-- 
Andreas Schwab, SuSE Labs, schwab@suse.de
SuSE Linux Products GmbH, Maxfeldstraße 5, 90409 Nürnberg, Germany
PGP key fingerprint = 58CA 54C7 6D53 942B 1756  01D3 44D5 214B 8276 4ED5
"And now for something completely different."

^ permalink raw reply

* Re: 85xx FDT updates?
From: Dan Malek @ 2006-04-24 22:31 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev@ozlabs.org
In-Reply-To: <4C61B597-BD91-4D05-BB40-43DE0319F123@kernel.crashing.org>


On Apr 24, 2006, at 5:43 PM, Kumar Gala wrote:

> However, I am against removing the arch/ppc support until the u-boot
> patches are picked up.  I think its bad form to give people a kernel
> they can't easily boot.

What about systems that can't update u-boot, but want to run
a newer kernel?


	-- Dan

^ permalink raw reply

* [PATCH] Use check_legacy_ioport() on ppc32 too.
From: David Woodhouse @ 2006-04-24 22:22 UTC (permalink / raw)
  To: paulus, benh; +Cc: linuxppc-dev

Some people report that we die on some Macs when we are expecting to
catch machine checks after poking at some random I/O address. I'd seen
it happen on my dual G4 with serial ports until we fixed those to use
OF, but now other users are reporting it with i8042.

This expands the use of check_legacy_ioport() to avoid that situation
even on 32-bit kernels.

Signed-off-by: David Woodhouse <dwmw2@infradead.org>

diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 1d93e73..684ab1d 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -516,3 +516,11 @@ void probe_machine(void)
 
 	printk(KERN_INFO "Using %s machine description\n", ppc_md.name);
 }
+
+int check_legacy_ioport(unsigned long base_port)
+{
+	if (ppc_md.check_legacy_ioport == NULL)
+		return 0;
+	return ppc_md.check_legacy_ioport(base_port);
+}
+EXPORT_SYMBOL(check_legacy_ioport);
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 13e91c4..4467c49 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -594,14 +594,6 @@ void ppc64_terminate_msg(unsigned int sr
 	printk("[terminate]%04x %s\n", src, msg);
 }
 
-int check_legacy_ioport(unsigned long base_port)
-{
-	if (ppc_md.check_legacy_ioport == NULL)
-		return 0;
-	return ppc_md.check_legacy_ioport(base_port);
-}
-EXPORT_SYMBOL(check_legacy_ioport);
-
 void cpu_die(void)
 {
 	if (ppc_md.cpu_die)
diff --git a/drivers/block/floppy.c b/drivers/block/floppy.c
index bedb689..dff1e67 100644
--- a/drivers/block/floppy.c
+++ b/drivers/block/floppy.c
@@ -4301,7 +4301,7 @@ #endif
 	}
 
 	use_virtual_dma = can_use_virtual_dma & 1;
-#if defined(CONFIG_PPC64)
+#if defined(CONFIG_PPC_MERGE)
 	if (check_legacy_ioport(FDC1)) {
 		del_timer(&fd_timeout);
 		err = -ENODEV;
diff --git a/drivers/input/serio/i8042-io.h b/drivers/input/serio/i8042-io.h
index 9a92216..cc21914 100644
--- a/drivers/input/serio/i8042-io.h
+++ b/drivers/input/serio/i8042-io.h
@@ -67,14 +67,14 @@ static inline int i8042_platform_init(vo
  * On some platforms touching the i8042 data register region can do really
  * bad things. Because of this the region is always reserved on such boxes.
  */
-#if !defined(__sh__) && !defined(__alpha__) && !defined(__mips__) && !defined(CONFIG_PPC64)
+#if !defined(__sh__) && !defined(__alpha__) && !defined(__mips__) && !defined(CONFIG_PPC_MERGE)
 	if (!request_region(I8042_DATA_REG, 16, "i8042"))
 		return -EBUSY;
 #endif
 
         i8042_reset = 1;
 
-#if defined(CONFIG_PPC64)
+#if defined(CONFIG_PPC_MERGE)
 	if (check_legacy_ioport(I8042_DATA_REG))
 		return -EBUSY;
 	if (!request_region(I8042_DATA_REG, 16, "i8042"))
diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
index 68efbea..f1c2469 100644
--- a/include/asm-powerpc/io.h
+++ b/include/asm-powerpc/io.h
@@ -9,6 +9,9 @@ #ifdef __KERNEL__
  * 2 of the License, or (at your option) any later version.
  */
 
+/* Check of existence of legacy devices */
+extern int check_legacy_ioport(unsigned long base_port);
+
 #ifndef CONFIG_PPC64
 #include <asm-ppc/io.h>
 #else
@@ -437,9 +440,6 @@ #define dma_cache_inv(_start,_size)		do 
 #define dma_cache_wback(_start,_size)		do { } while (0)
 #define dma_cache_wback_inv(_start,_size)	do { } while (0)
 
-/* Check of existence of legacy devices */
-extern int check_legacy_ioport(unsigned long base_port);
-
 
 /*
  * Convert a physical pointer to a virtual kernel pointer for /dev/mem

-- 
dwmw2

^ permalink raw reply related

* Re: Clock running fast on Mac Mini?
From: Brent Cook @ 2006-04-24 22:06 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <E1FY8kd-0000mH-PO@owl.grunz.lu>

On Monday 24 April 2006 16:37, Mich Lanners wrote:
> Hello all,
>
> I have a problem on my Mac Mini with the kernel clock, which is running
> fast.
>
> In fact, it is gaining 10 ms every 5 seconds, as can be seen here, with
> ntpdate called every 5 seconds:
>

I can confirm this. I can also trigger make's 'File has future modification 
date' warning message when compiling a file on the local filesystem.

 - Brent

^ permalink raw reply

* Clock running fast on Mac Mini?
From: Mich Lanners @ 2006-04-24 21:37 UTC (permalink / raw)
  To: linuxppc-dev

Hello all,

I have a problem on my Mac Mini with the kernel clock, which is running
fast.

In fact, it is gaining 10 ms every 5 seconds, as can be seen here, with
ntpdate called every 5 seconds:

owl:~# while true; do ntpdate -u be.pool.ntp.org; sleep 5; done
24 Apr 23:14:17 ntpdate[2601]: adjust time server 195.47.215.226 offset 0.151197 sec
24 Apr 23:14:22 ntpdate[2607]: adjust time server 195.47.215.226 offset 0.160348 sec
24 Apr 23:14:28 ntpdate[2613]: adjust time server 195.47.215.226 offset 0.170311 sec
[...]
24 Apr 23:17:15 ntpdate[2813]: adjust time server 195.47.215.226 offset 0.489635 sec
24 Apr 23:17:20 ntpdate[2819]: adjust time server 195.47.215.226 offset 0.499386 sec
24 Apr 23:17:25 ntpdate[2825]: step time server 195.47.215.226 offset 0.509342 sec
24 Apr 23:17:31 ntpdate[2831]: adjust time server 195.47.215.226 offset 0.013693 sec
24 Apr 23:17:36 ntpdate[2837]: adjust time server 195.47.215.226 offset 0.021668 sec

and so on.

This is with ntpd stopped, after a fresh boot with /etc/adjtime removed
to make sure hwclock is not falsly tuning the kernel timekeeping (I did
this because I once had a 2.6.16 kernel running, where clock problems
have been reported. I thought that may have falsified ntp's drift file
and/or hwclock's /etc/adjtime).

All this is with a 2.6.15 kernel.

My Alu powerbook with a 2.6.15 kernel does not show this problem.

Anybody know what's up?

Thanks, and cheers

Michel

-------------------------------------------------------------------------
Michel Lanners                 |  " Read Philosophy.  Study Art.
23, Rue Paul Henkes            |    Ask Questions.  Make Mistakes.
L-1710 Luxembourg              |
email   mlan@cpu.lu            |
http://www.cpu.lu/~mlan        |                     Learn Always. "

^ permalink raw reply

* Rebuilding FS MDS 8349 BSP & JFFS2 integration
From: Krawczuk, Victor @ 2006-04-24 21:30 UTC (permalink / raw)
  To: linuxppc-embedded

[-- Attachment #1: Type: text/plain, Size: 3374 bytes --]

Hi,

I am a newbie to Linux but not to embedded PPC. I was hoping someone
could point me to the direction here. I apologize in advance if this is
kindergarten stuff.

I downloaded the MDS-8349 Linux BSP from FreeScale. I was able to burn
the default prebuilt images "uboot" and "jffs2.img" to my MDS-8349 (PB)
board and have uboot tftp the prebuilt "uImage" to my PB. I used
"tftpboot 200000 uImage" to download and "bootm" to boot.  As per
included instructions, I had "setenv bootargs root=/dev/mtdblock1
rootfstype=jffs2 rw console=ttyS0,115200" in uboot, followed by a
"saveenv". The prebuilt Linux (uImage) came up, no problem.

As I need to include USB Host support to the PPC kernel, I used the ltib
tool to reconfigure and rebuild Linux, as well as a matching jffs2 (I
would think). Ltib produced vmlinux.gz.uboot and rootfs.jffs2 for me.
For some reason, rootfs.jffs2 was significantly smaller than the
prebuilt "jffs2.img" (2483184 bytes vs. 4325376 bytes). I'm not sure
why. I then did the following:

bootargs is still set to "root=/dev/mtdblock1 rootfstype=jffs2 rw
console=ttyS0,115200"  in uboot:

Using the same (prebuilt) uboot as before:

- tftpboot 400000 rootfs.jffs2 (rebuilt jffs2)
- erase fe020000 fe5fffff
- cp.b 400000 fe020000 25e3f0

- tftpboot 200000 vmlinux.gz.uboot (rebuilt Linux containing USB Host
stuff)
- bootm

Linux could not boot!  I got the following root FS error:

[stuff deleted...]
io scheduler deadline registered
io scheduler cfq registered
RAMDISK driver initialized: 16 RAM disks of 32768K size 1024 blocksize
loop: loaded (max 8 devices)
eth0: Gianfar Ethernet Controller Version 1.1, 00:04:9f:00:2d:b7
eth0: Running with NAPI disabled
eth0: 64/64 RX/TX BD ring size
eth1: Gianfar Ethernet Controller Version 1.1, 00:04:9f:00:2d:b8
eth1: Running with NAPI disabled
eth1: 64/64 RX/TX BD ring size
i2c /dev entries driver
NET: Registered protocol family 2
IP: routing cache hash table of 2048 buckets, 16Kbytes
TCP established hash table entries: 16384 (order: 5, 131072 bytes)
TCP bind hash table entries: 16384 (order: 4, 65536 bytes)
TCP: Hash tables configured (established 16384 bind 16384)
NET: Registered protocol family 1
NET: Registered protocol family 17
Root-NFS: No NFS server available, giving up.
VFS: Unable to mount root fs via NFS, trying floppy.
VFS: Cannot open root device "mtdblock1" or unknown-block(2,0)
Please append a correct "root=" boot option
Kernel panic - not syncing: VFS: Unable to mount root fs on
unknown-block(2,0)
 <0>Rebooting in 180 seconds..

Would anybody happen to know what is going wrong?  How am I supposed to
know the correct "root=" boot option for a rebuilt jffs2 using LTIB, if
that is my problem?

Thanks in advance for any hints,

Victor.

The information contained in this e-mail message is PRIVATE. It may contain confidential information and may be legally privileged. It is intended for the exclusive use of the addressee(s). If you are not the intended recipient, you are hereby notified that any dissemination, distribution or reproduction of this communication is strictly prohibited. If the intended recipient(s) cannot be reached or if a transmission problem has occurred, please notify the sender immediately by return e-mail and destroy all copies of this message. 
Thank you. 

[-- Attachment #2: Type: text/html, Size: 5900 bytes --]

^ permalink raw reply

* Re: 85xx FDT updates?
From: Kumar Gala @ 2006-04-24 21:43 UTC (permalink / raw)
  To: Jon Loeliger; +Cc: linuxppc-dev@ozlabs.org
In-Reply-To: <1145900516.4251.18.camel@cashmere.sps.mot.com>


On Apr 24, 2006, at 12:41 PM, Jon Loeliger wrote:

> OK, so what is the deal with the 85xx patches
> that have been submitted for more FDT support?
> I get a variety of mixed stories, message and
> vague comments.  I want some facts and direction.
>
> Can anyone verify, validate, refute, deny or
> support any of these statements?  Not trying to
> be mean, point fingers or sound negative here,
> I'm just trying to pinpoint the apparent log-jam.
>
>
> 1. The 85xx patches that Andy submitted, that
>    have been pulled into Kumar's 85xx git tree
>    are going to sit there until further patches
>    are submitted that _remove_all_85xx_support_
>    from the arch/ppc tree as well.
>
> 2. The same patches are going to sit in Kumar's
>    tree until 8560 patches with support for CPM
>    are released and all 85xx ports appear fully
>    and completely in arch/powerpc.  BTW, CPM is
>    waiting on approval of Vitaly's CPM proposal
>    for OF FDT support.
>
> 3. Statements 1 and 2 combined.
>
> 4. The same 85xx patches have just slipped through
>    some mental crack and simply need to be pulled,
>    re-applied, re-drafted, re-submitted.  Oops, sorry.
>    The action item is in _________'s lap.
>
> 5. There was an unresolved comment/issue that
>    FSL still needs to address.  BTW, that issue
>    is ________.
>
> 6. We are waiting on something else and that
>    item is _______.
>
> 7. You know, you really need to address the _other_
>    85xx ports (such as TQMs) as well.  Can you
>    please do something about those as well?
>
> 8. These Linux patches aren't going to be picked
>    up until the corresponding U-Boot patches are
>    picked up as well.

My understanding is that paulus should pull them into powerpc.git.   
He hasn't given me any reason to believe that shouldn't just happen.   
I dont see any reason that we need to have other dependancies for  
code that works.  I'll push on paulus to pull the patches in my tree  
into powerpc.git.

However, I am against removing the arch/ppc support until the u-boot  
patches are picked up.  I think its bad form to give people a kernel  
they can't easily boot.

- k

^ permalink raw reply

* Re: [PATCH] DTC - validation by device_type
From: Paul Nasrat @ 2006-04-24 21:09 UTC (permalink / raw)
  To: Segher Boessenkool; +Cc: linuxppc-dev@ozlabs.org, Jon Loeliger
In-Reply-To: <5E473F0A-C2E2-4ABD-A9FA-E83C4B4CB6C3@kernel.crashing.org>

On Mon, 2006-04-24 at 22:55 +0200, Segher Boessenkool wrote:

> 1) It's hard to see how any (physical) network device would
> not have a "reg" property.  It is not required though.
> 
> 2) "local-mac-address" is not a required property.
> 
> 3) "mac-address" is only required for nodes that have been
> opened before.
> 
> 4) "address-bits" is not required (taken as 48 if absent).
> 
> Please see Annex A (look at the property names, and perhaps
> at the "network" device type).  _Always_ look at Annex A!
> It's normative.

My bad.  Jon, just drop this from your queue then. 

Paul 

^ permalink raw reply

* Re: [PATCH] DTC - validation by device_type
From: Segher Boessenkool @ 2006-04-24 20:55 UTC (permalink / raw)
  To: Paul Nasrat; +Cc: linuxppc-dev@ozlabs.org, Jon Loeliger
In-Reply-To: <1145905793.4082.27.camel@enki.eridu>

>>> +	CHECK_HAVE(net, "reg");
>>> +	CHECK_HAVE(net, "local-mac-address");
>>> +	CHECK_HAVE(net, "mac-address");
>>> +	CHECK_HAVE(net, "address-bits");

1) It's hard to see how any (physical) network device would
not have a "reg" property.  It is not required though.

2) "local-mac-address" is not a required property.

3) "mac-address" is only required for nodes that have been
opened before.

4) "address-bits" is not required (taken as 48 if absent).

Please see Annex A (look at the property names, and perhaps
at the "network" device type).  _Always_ look at Annex A!
It's normative.

Segher

^ permalink raw reply

* [PATCH 7/7] Print out debugging information during initialisation
From: Mel Gorman @ 2006-04-24 20:22 UTC (permalink / raw)
  To: davej, tony.luck, linux-mm, ak, bob.picco, linux-kernel,
	linuxppc-dev
  Cc: Mel Gorman
In-Reply-To: <20060424202009.20409.89016.sendpatchset@skynet>


The zone and hole sizing code is new and unexpected problems showed up
on machines that were not covered by the pre-release tests. This patch
prints out useful information when those unexpected situations occur.

It is not expected that this patch become a permanent part of the set.


 mem_init.c |   54 ++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 50 insertions(+), 4 deletions(-)

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc2-106-breakout_mem_init/mm/mem_init.c linux-2.6.17-rc2-107-debug/mm/mem_init.c
--- linux-2.6.17-rc2-106-breakout_mem_init/mm/mem_init.c	2006-04-24 15:53:22.000000000 +0100
+++ linux-2.6.17-rc2-107-debug/mm/mem_init.c	2006-04-24 15:54:11.000000000 +0100
@@ -593,6 +593,7 @@ static __meminit void init_currently_emp
 }
 
 #ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+
 /* Note: nid == MAX_NUMNODES returns first region */
 static int __init first_active_region_index_in_nid(int nid)
 {
@@ -645,13 +646,24 @@ void __init free_bootmem_with_active_reg
 	for_each_active_range_index_in_nid(i, nid) {
 		unsigned long size_pages = 0;
 		unsigned long end_pfn = early_node_map[i].end_pfn;
-		if (early_node_map[i].start_pfn >= max_low_pfn)
+		if (early_node_map[i].start_pfn >= max_low_pfn) {
+			printk("start_pfn %lu >= %lu\n", early_node_map[i].start_pfn,
+								max_low_pfn);
 			continue;
+		}
 
-		if (end_pfn > max_low_pfn)
+		if (end_pfn > max_low_pfn) {
+			printk("end_pfn %lu going back to %lu\n", early_node_map[i].end_pfn,
+									max_low_pfn);
 			end_pfn = max_low_pfn;
+		}
 
 		size_pages = end_pfn - early_node_map[i].start_pfn;
+		printk("free_bootmem_node(%d, %lu, %lu) :::: pfn ranges (%d, %lu, %lu)\n",
+				early_node_map[i].nid,
+				PFN_PHYS(early_node_map[i].start_pfn),
+				PFN_PHYS(size_pages),
+				early_node_map[i].nid, early_node_map[i].start_pfn, end_pfn);
 		free_bootmem_node(NODE_DATA(early_node_map[i].nid),
 				PFN_PHYS(early_node_map[i].start_pfn),
 				PFN_PHYS(size_pages));
@@ -661,10 +673,15 @@ void __init free_bootmem_with_active_reg
 void __init sparse_memory_present_with_active_regions(int nid)
 {
 	unsigned int i;
-	for_each_active_range_index_in_nid(i, nid)
+	for_each_active_range_index_in_nid(i, nid) {
+		printk("memory_present(%d, %lu, %lu)\n",
+			early_node_map[i].nid,
+			early_node_map[i].start_pfn,
+			early_node_map[i].end_pfn);
 		memory_present(early_node_map[i].nid,
 				early_node_map[i].start_pfn,
 				early_node_map[i].end_pfn);
+	}
 }
 
 void __init get_pfn_range_for_nid(unsigned int nid,
@@ -722,6 +739,8 @@ unsigned long __init __absent_pages_in_r
 	unsigned long prev_end_pfn = 0, hole_pages = 0;
 	unsigned long start_pfn;
 
+	printk("__absent_pages_in_range(%d, %lu, %lu) = ", nid,
+					range_start_pfn, range_end_pfn);
 	/* Find the end_pfn of the first active range of pfns in the node */
 	i = first_active_region_index_in_nid(nid);
 	prev_end_pfn = early_node_map[i].start_pfn;
@@ -749,6 +768,8 @@ unsigned long __init __absent_pages_in_r
 		prev_end_pfn = early_node_map[i].end_pfn;
 	}
 
+	printk("%lu\n", hole_pages);
+
 	return hole_pages;
 }
 
@@ -911,6 +932,9 @@ void __init add_active_range(unsigned in
 {
 	unsigned int i;
 
+	printk("add_active_range(%d, %lu, %lu): ",
+			nid, start_pfn, end_pfn);
+
 	/* Merge with existing active regions if possible */
 	for (i = 0; early_node_map[i].end_pfn; i++) {
 		if (early_node_map[i].nid != nid)
@@ -918,12 +942,15 @@ void __init add_active_range(unsigned in
 
 		/* Skip if an existing region covers this new one */
 		if (start_pfn >= early_node_map[i].start_pfn &&
-				end_pfn <= early_node_map[i].end_pfn)
+				end_pfn <= early_node_map[i].end_pfn) {
+			printk("Existing\n");
 			return;
+		}
 
 		/* Merge forward if suitable */
 		if (start_pfn <= early_node_map[i].end_pfn &&
 				end_pfn > early_node_map[i].end_pfn) {
+			printk("Merging forward\n");
 			early_node_map[i].end_pfn = end_pfn;
 			return;
 		}
@@ -931,6 +958,7 @@ void __init add_active_range(unsigned in
 		/* Merge backward if suitable */
 		if (start_pfn < early_node_map[i].end_pfn &&
 				end_pfn >= early_node_map[i].start_pfn) {
+			printk("Merging backwards\n");
 			early_node_map[i].start_pfn = start_pfn;
 			return;
 		}
@@ -942,6 +970,7 @@ void __init add_active_range(unsigned in
 		return;
 	}
 
+	printk("New\n");
 	early_node_map[i].nid = nid;
 	early_node_map[i].start_pfn = start_pfn;
 	early_node_map[i].end_pfn = end_pfn;
@@ -949,6 +978,7 @@ void __init add_active_range(unsigned in
 
 void __init remove_all_active_ranges()
 {
+	printk("remove_all_active_ranges()\n");
 	memset(early_node_map, 0, sizeof(early_node_map));
 }
 
@@ -976,6 +1006,14 @@ static void __init sort_node_map(void)
 
 	sort(early_node_map, num, sizeof(struct node_active_region),
 						cmp_node_active_region, NULL);
+
+	printk("Dumping sorted node map\n");
+	for (num = 0; early_node_map[num].end_pfn; num++) {
+		printk("entry %lu: %d  %lu -> %lu\n", num,
+				early_node_map[num].nid,
+				early_node_map[num].start_pfn,
+				early_node_map[num].end_pfn);
+	}
 }
 
 /* Find the lowest pfn for a node. This depends on a sorted early_node_map */
@@ -992,6 +1030,7 @@ unsigned long __init find_min_pfn_for_no
 	return 0;
 }
 
+
 unsigned long __init find_min_pfn_with_active_regions(void)
 {
 	return find_min_pfn_for_node(MAX_NUMNODES);
@@ -1005,6 +1044,7 @@ unsigned long __init find_max_pfn_with_a
 	for (i = 0; early_node_map[i].end_pfn; i++)
 		max_pfn = max(max_pfn, early_node_map[i].end_pfn);
 
+	printk("find_max_pfn_with_active_regions() == %lu\n", max_pfn);
 	return max_pfn;
 }
 
@@ -1016,6 +1056,10 @@ void __init free_area_init_nodes(unsigne
 	unsigned long nid;
 	int zone_index;
 
+	printk("free_area_init_nodes(%lu, %lu, %lu, %lu)\n",
+			arch_max_dma_pfn, arch_max_dma32_pfn,
+			arch_max_low_pfn, arch_max_high_pfn);
+
 	/* Record where the zone boundaries are */
 	memset(arch_zone_lowest_possible_pfn, 0,
 				sizeof(arch_zone_lowest_possible_pfn));
@@ -1032,6 +1076,8 @@ void __init free_area_init_nodes(unsigne
 			arch_zone_highest_possible_pfn[zone_index-1];
 	}
 
+	printk("free_area_init_nodes(): find_min_pfn = %lu\n", find_min_pfn_with_active_regions());
+
 	/* Regions in the early_node_map can be in any order */
 	sort_node_map();
 

^ permalink raw reply

* [PATCH 6/7] Break out memory initialisation code from page_alloc.c to mem_init.c
From: Mel Gorman @ 2006-04-24 20:22 UTC (permalink / raw)
  To: davej, tony.luck, linuxppc-dev, linux-kernel, bob.picco, ak,
	linux-mm
  Cc: Mel Gorman
In-Reply-To: <20060424202009.20409.89016.sendpatchset@skynet>


page_alloc.c contains a large amount of memory initialisation code. This patch
breaks out the initialisation code to a separate file to make page_alloc.c
a bit easier to read.


 Makefile     |    2 
 mem_init.c   | 1044 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 page_alloc.c | 1025 -----------------------------------------------------
 3 files changed, 1045 insertions(+), 1026 deletions(-)

Signed-off-by: Mel Gorman <mel@csn.ul.ie>
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc2-105-ia64_use_init_nodes/mm/Makefile linux-2.6.17-rc2-106-breakout_mem_init/mm/Makefile
--- linux-2.6.17-rc2-105-ia64_use_init_nodes/mm/Makefile	2006-04-19 04:00:49.000000000 +0100
+++ linux-2.6.17-rc2-106-breakout_mem_init/mm/Makefile	2006-04-24 15:53:22.000000000 +0100
@@ -8,7 +8,7 @@ mmu-$(CONFIG_MMU)	:= fremap.o highmem.o 
 			   vmalloc.o
 
 obj-y			:= bootmem.o filemap.o mempool.o oom_kill.o fadvise.o \
-			   page_alloc.o page-writeback.o pdflush.o \
+			   page_alloc.o mem_init.o page-writeback.o pdflush.o \
 			   readahead.o swap.o truncate.o vmscan.o \
 			   prio_tree.o util.o mmzone.o $(mmu-y)
 
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc2-105-ia64_use_init_nodes/mm/mem_init.c linux-2.6.17-rc2-106-breakout_mem_init/mm/mem_init.c
--- linux-2.6.17-rc2-105-ia64_use_init_nodes/mm/mem_init.c	2006-04-24 09:36:35.000000000 +0100
+++ linux-2.6.17-rc2-106-breakout_mem_init/mm/mem_init.c	2006-04-24 15:53:22.000000000 +0100
@@ -0,0 +1,1044 @@
+/*
+ * mm/mem_init.c
+ * Initialises the architecture independant view of memory. pgdats, zones, etc
+ *
+ *  Copyright (C) 1991, 1992, 1993, 1994  Linus Torvalds
+ *  Copyright (C) 1995, Stephen Tweedie
+ *  Copyright (C) July 1999, Gerhard Wichert, Siemens AG
+ *  Copyright (C) 1999, Ingo Molnar, Red Hat
+ *  Copyright (C) 1999, 2000, Kanoj Sarcar, SGI
+ *  Copyright (C) Sept 2000, Martin J. Bligh
+ *	(lots of bits borrowed from Ingo Molnar & Andrew Morton)
+ *  Copyright (C) Apr 2006, Mel Gorman, IBM
+ *	(lots of bits taken from architecture-specific code)
+ */
+#include <linux/config.h>
+#include <linux/sort.h>
+#include <linux/pfn.h>
+#include <linux/mm.h>
+#include <linux/bootmem.h>
+#include <linux/cpuset.h>
+#include <linux/mempolicy.h>
+#include <linux/sysctl.h>
+#include <linux/swap.h>
+#include <linux/cpu.h>
+
+static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
+int percpu_pagelist_fraction;
+
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+  #ifdef CONFIG_MAX_ACTIVE_REGIONS
+    #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
+  #else
+    #define MAX_ACTIVE_REGIONS (MAX_NR_ZONES * MAX_NUMNODES + 1)
+  #endif
+
+  struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS];
+  unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
+  unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
+
+/*
+ * Builds allocation fallback zone lists.
+ *
+ * Add all populated zones of a node to the zonelist.
+ */
+static int __init build_zonelists_node(pg_data_t *pgdat,
+			struct zonelist *zonelist, int nr_zones, int zone_type)
+{
+	struct zone *zone;
+
+	BUG_ON(zone_type > ZONE_HIGHMEM);
+
+	do {
+		zone = pgdat->node_zones + zone_type;
+		if (populated_zone(zone)) {
+#ifndef CONFIG_HIGHMEM
+			BUG_ON(zone_type > ZONE_NORMAL);
+#endif
+			zonelist->zones[nr_zones++] = zone;
+			check_highest_zone(zone_type);
+		}
+		zone_type--;
+
+	} while (zone_type >= 0);
+	return nr_zones;
+}
+
+static inline int highest_zone(int zone_bits)
+{
+	int res = ZONE_NORMAL;
+	if (zone_bits & (__force int)__GFP_HIGHMEM)
+		res = ZONE_HIGHMEM;
+	if (zone_bits & (__force int)__GFP_DMA32)
+		res = ZONE_DMA32;
+	if (zone_bits & (__force int)__GFP_DMA)
+		res = ZONE_DMA;
+	return res;
+}
+
+#ifdef CONFIG_NUMA
+#define MAX_NODE_LOAD (num_online_nodes())
+static int __initdata node_load[MAX_NUMNODES];
+/**
+ * find_next_best_node - find the next node that should appear in a given node's fallback list
+ * @node: node whose fallback list we're appending
+ * @used_node_mask: nodemask_t of already used nodes
+ *
+ * We use a number of factors to determine which is the next node that should
+ * appear on a given node's fallback list.  The node should not have appeared
+ * already in @node's fallback list, and it should be the next closest node
+ * according to the distance array (which contains arbitrary distance values
+ * from each node to each node in the system), and should also prefer nodes
+ * with no CPUs, since presumably they'll have very little allocation pressure
+ * on them otherwise.
+ * It returns -1 if no node is found.
+ */
+static int __init find_next_best_node(int node, nodemask_t *used_node_mask)
+{
+	int n, val;
+	int min_val = INT_MAX;
+	int best_node = -1;
+
+	/* Use the local node if we haven't already */
+	if (!node_isset(node, *used_node_mask)) {
+		node_set(node, *used_node_mask);
+		return node;
+	}
+
+	for_each_online_node(n) {
+		cpumask_t tmp;
+
+		/* Don't want a node to appear more than once */
+		if (node_isset(n, *used_node_mask))
+			continue;
+
+		/* Use the distance array to find the distance */
+		val = node_distance(node, n);
+
+		/* Penalize nodes under us ("prefer the next node") */
+		val += (n < node);
+
+		/* Give preference to headless and unused nodes */
+		tmp = node_to_cpumask(n);
+		if (!cpus_empty(tmp))
+			val += PENALTY_FOR_NODE_WITH_CPUS;
+
+		/* Slight preference for less loaded node */
+		val *= (MAX_NODE_LOAD*MAX_NUMNODES);
+		val += node_load[n];
+
+		if (val < min_val) {
+			min_val = val;
+			best_node = n;
+		}
+	}
+
+	if (best_node >= 0)
+		node_set(best_node, *used_node_mask);
+
+	return best_node;
+}
+
+static void __init build_zonelists(pg_data_t *pgdat)
+{
+	int i, j, k, node, local_node;
+	int prev_node, load;
+	struct zonelist *zonelist;
+	nodemask_t used_mask;
+
+	/* initialize zonelists */
+	for (i = 0; i < GFP_ZONETYPES; i++) {
+		zonelist = pgdat->node_zonelists + i;
+		zonelist->zones[0] = NULL;
+	}
+
+	/* NUMA-aware ordering of nodes */
+	local_node = pgdat->node_id;
+	load = num_online_nodes();
+	prev_node = local_node;
+	nodes_clear(used_mask);
+	while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
+		int distance = node_distance(local_node, node);
+
+		/*
+		 * If another node is sufficiently far away then it is better
+		 * to reclaim pages in a zone before going off node.
+		 */
+		if (distance > RECLAIM_DISTANCE)
+			zone_reclaim_mode = 1;
+
+		/*
+		 * We don't want to pressure a particular node.
+		 * So adding penalty to the first node in same
+		 * distance group to make it round-robin.
+		 */
+
+		if (distance != node_distance(local_node, prev_node))
+			node_load[node] += load;
+		prev_node = node;
+		load--;
+		for (i = 0; i < GFP_ZONETYPES; i++) {
+			zonelist = pgdat->node_zonelists + i;
+			for (j = 0; zonelist->zones[j] != NULL; j++);
+
+			k = highest_zone(i);
+
+	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+			zonelist->zones[j] = NULL;
+		}
+	}
+}
+
+#else	/* CONFIG_NUMA */
+
+static void __init build_zonelists(pg_data_t *pgdat)
+{
+	int i, j, k, node, local_node;
+
+	local_node = pgdat->node_id;
+	for (i = 0; i < GFP_ZONETYPES; i++) {
+		struct zonelist *zonelist;
+
+		zonelist = pgdat->node_zonelists + i;
+
+		j = 0;
+		k = highest_zone(i);
+ 		j = build_zonelists_node(pgdat, zonelist, j, k);
+ 		/*
+ 		 * Now we build the zonelist so that it contains the zones
+ 		 * of all the other nodes.
+ 		 * We don't want to pressure a particular node, so when
+ 		 * building the zones for node N, we make sure that the
+ 		 * zones coming right after the local ones are those from
+ 		 * node N+1 (modulo N)
+ 		 */
+		for (node = local_node + 1; node < MAX_NUMNODES; node++) {
+			if (!node_online(node))
+				continue;
+			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+		}
+		for (node = 0; node < local_node; node++) {
+			if (!node_online(node))
+				continue;
+			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
+		}
+
+		zonelist->zones[j] = NULL;
+	}
+}
+
+#endif	/* CONFIG_NUMA */
+
+void __init build_all_zonelists(void)
+{
+	int i;
+
+	for_each_online_node(i)
+		build_zonelists(NODE_DATA(i));
+	printk("Built %i zonelists\n", num_online_nodes());
+	cpuset_init_current_mems_allowed();
+}
+
+/*
+ * Helper functions to size the waitqueue hash table.
+ * Essentially these want to choose hash table sizes sufficiently
+ * large so that collisions trying to wait on pages are rare.
+ * But in fact, the number of active page waitqueues on typical
+ * systems is ridiculously low, less than 200. So this is even
+ * conservative, even though it seems large.
+ *
+ * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
+ * waitqueues, i.e. the size of the waitq table given the number of pages.
+ */
+#define PAGES_PER_WAITQUEUE	256
+
+static inline unsigned long wait_table_size(unsigned long pages)
+{
+	unsigned long size = 1;
+
+	pages /= PAGES_PER_WAITQUEUE;
+
+	while (size < pages)
+		size <<= 1;
+
+	/*
+	 * Once we have dozens or even hundreds of threads sleeping
+	 * on IO we've got bigger problems than wait queue collision.
+	 * Limit the size of the wait table to a reasonable size.
+	 */
+	size = min(size, 4096UL);
+
+	return max(size, 4UL);
+}
+
+/*
+ * This is an integer logarithm so that shifts can be used later
+ * to extract the more random high bits from the multiplicative
+ * hash function before the remainder is taken.
+ */
+static inline unsigned long wait_table_bits(unsigned long size)
+{
+	return ffz(~size);
+}
+
+#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
+
+#ifndef __HAVE_ARCH_MEMMAP_INIT
+#define memmap_init(size, nid, zone, start_pfn) \
+	memmap_init_zone((size), (nid), (zone), (start_pfn))
+#endif
+
+/*
+ * Initially all pages are reserved - free ones are freed
+ * up by free_all_bootmem() once the early boot process is
+ * done. Non-atomic initialization, single-pass.
+ */
+void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+		unsigned long start_pfn)
+{
+	struct page *page;
+	unsigned long end_pfn = start_pfn + size;
+	unsigned long pfn;
+
+	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
+		if (!early_pfn_valid(pfn))
+			continue;
+		page = pfn_to_page(pfn);
+		set_page_links(page, zone, nid, pfn);
+		init_page_count(page);
+		reset_page_mapcount(page);
+		SetPageReserved(page);
+		INIT_LIST_HEAD(&page->lru);
+#ifdef WANT_PAGE_VIRTUAL
+		/* The shift won't overflow because ZONE_NORMAL is below 4G. */
+		if (!is_highmem_idx(zone))
+			set_page_address(page, __va(pfn << PAGE_SHIFT));
+#endif
+	}
+}
+
+void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
+				unsigned long size)
+{
+	int order;
+	for (order = 0; order < MAX_ORDER ; order++) {
+		INIT_LIST_HEAD(&zone->free_area[order].free_list);
+		zone->free_area[order].nr_free = 0;
+	}
+}
+
+#define ZONETABLE_INDEX(x, zone_nr)	((x << ZONES_SHIFT) | zone_nr)
+void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
+		unsigned long size)
+{
+	unsigned long snum = pfn_to_section_nr(pfn);
+	unsigned long end = pfn_to_section_nr(pfn + size);
+
+	if (FLAGS_HAS_NODE)
+		zone_table[ZONETABLE_INDEX(nid, zid)] = zone;
+	else
+		for (; snum <= end; snum++)
+			zone_table[ZONETABLE_INDEX(snum, zid)] = zone;
+}
+
+static __meminit
+void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
+{
+	int i;
+	struct pglist_data *pgdat = zone->zone_pgdat;
+
+	/*
+	 * The per-page waitqueue mechanism uses hashed waitqueues
+	 * per zone.
+	 */
+	zone->wait_table_size = wait_table_size(zone_size_pages);
+	zone->wait_table_bits =	wait_table_bits(zone->wait_table_size);
+	zone->wait_table = (wait_queue_head_t *)
+		alloc_bootmem_node(pgdat, zone->wait_table_size
+					* sizeof(wait_queue_head_t));
+
+	for(i = 0; i < zone->wait_table_size; ++i)
+		init_waitqueue_head(zone->wait_table + i);
+}
+
+/*
+ * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
+ * to the value high for the pageset p.
+ */
+static void setup_pagelist_highmark(struct per_cpu_pageset *p,
+				unsigned long high)
+{
+	struct per_cpu_pages *pcp;
+
+	pcp = &p->pcp[0]; /* hot list */
+	pcp->high = high;
+	pcp->batch = max(1UL, high/4);
+	if ((high/4) > (PAGE_SHIFT * 8))
+		pcp->batch = PAGE_SHIFT * 8;
+}
+
+/*
+ * percpu_pagelist_fraction - changes the pcp->high for each zone on each
+ * cpu.  It is the fraction of total pages in each zone that a hot per cpu pagelist
+ * can have before it gets flushed back to buddy allocator.
+ */
+int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
+	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
+{
+	struct zone *zone;
+	unsigned int cpu;
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
+	if (!write || (ret == -EINVAL))
+		return ret;
+	for_each_zone(zone) {
+		for_each_online_cpu(cpu) {
+			unsigned long  high;
+			high = zone->present_pages / percpu_pagelist_fraction;
+			setup_pagelist_highmark(zone_pcp(zone, cpu), high);
+		}
+	}
+	return 0;
+}
+
+static int __cpuinit zone_batchsize(struct zone *zone)
+{
+	int batch;
+
+	/*
+	 * The per-cpu-pages pools are set to around 1000th of the
+	 * size of the zone.  But no more than 1/2 of a meg.
+	 *
+	 * OK, so we don't know how big the cache is.  So guess.
+	 */
+	batch = zone->present_pages / 1024;
+	if (batch * PAGE_SIZE > 512 * 1024)
+		batch = (512 * 1024) / PAGE_SIZE;
+	batch /= 4;		/* We effectively *= 4 below */
+	if (batch < 1)
+		batch = 1;
+
+	/*
+	 * Clamp the batch to a 2^n - 1 value. Having a power
+	 * of 2 value was found to be more likely to have
+	 * suboptimal cache aliasing properties in some cases.
+	 *
+	 * For example if 2 tasks are alternately allocating
+	 * batches of pages, one task can end up with a lot
+	 * of pages of one half of the possible page colors
+	 * and the other with pages of the other colors.
+	 */
+	batch = (1 << (fls(batch + batch/2)-1)) - 1;
+
+	return batch;
+}
+
+inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
+{
+	struct per_cpu_pages *pcp;
+
+	memset(p, 0, sizeof(*p));
+
+	pcp = &p->pcp[0];		/* hot */
+	pcp->count = 0;
+	pcp->high = 6 * batch;
+	pcp->batch = max(1UL, 1 * batch);
+	INIT_LIST_HEAD(&pcp->list);
+
+	pcp = &p->pcp[1];		/* cold*/
+	pcp->count = 0;
+	pcp->high = 2 * batch;
+	pcp->batch = max(1UL, batch/2);
+	INIT_LIST_HEAD(&pcp->list);
+}
+
+#ifdef CONFIG_NUMA
+/*
+ * Boot pageset table. One per cpu which is going to be used for all
+ * zones and all nodes. The parameters will be set in such a way
+ * that an item put on a list will immediately be handed over to
+ * the buddy list. This is safe since pageset manipulation is done
+ * with interrupts disabled.
+ *
+ * Some NUMA counter updates may also be caught by the boot pagesets.
+ *
+ * The boot_pagesets must be kept even after bootup is complete for
+ * unused processors and/or zones. They do play a role for bootstrapping
+ * hotplugged processors.
+ *
+ * zoneinfo_show() and maybe other functions do
+ * not check if the processor is online before following the pageset pointer.
+ * Other parts of the kernel may not check if the zone is available.
+ */
+static struct per_cpu_pageset boot_pageset[NR_CPUS];
+
+/*
+ * Dynamically allocate memory for the
+ * per cpu pageset array in struct zone.
+ */
+static int __cpuinit process_zones(int cpu)
+{
+	struct zone *zone, *dzone;
+
+	for_each_zone(zone) {
+
+		zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
+					 GFP_KERNEL, cpu_to_node(cpu));
+		if (!zone_pcp(zone, cpu))
+			goto bad;
+
+		setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
+
+		if (percpu_pagelist_fraction)
+			setup_pagelist_highmark(zone_pcp(zone, cpu),
+			 	(zone->present_pages / percpu_pagelist_fraction));
+	}
+
+	return 0;
+bad:
+	for_each_zone(dzone) {
+		if (dzone == zone)
+			break;
+		kfree(zone_pcp(dzone, cpu));
+		zone_pcp(dzone, cpu) = NULL;
+	}
+	return -ENOMEM;
+}
+
+static inline void free_zone_pagesets(int cpu)
+{
+	struct zone *zone;
+
+	for_each_zone(zone) {
+		struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
+
+		zone_pcp(zone, cpu) = NULL;
+		kfree(pset);
+	}
+}
+
+static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
+		unsigned long action,
+		void *hcpu)
+{
+	int cpu = (long)hcpu;
+	int ret = NOTIFY_OK;
+
+	switch (action) {
+		case CPU_UP_PREPARE:
+			if (process_zones(cpu))
+				ret = NOTIFY_BAD;
+			break;
+		case CPU_UP_CANCELED:
+		case CPU_DEAD:
+			free_zone_pagesets(cpu);
+			break;
+		default:
+			break;
+	}
+	return ret;
+}
+
+static struct notifier_block pageset_notifier =
+	{ &pageset_cpuup_callback, NULL, 0 };
+
+void __init setup_per_cpu_pageset(void)
+{
+	int err;
+
+	/* Initialize per_cpu_pageset for cpu 0.
+	 * A cpuup callback will do this for every cpu
+	 * as it comes online
+	 */
+	err = process_zones(smp_processor_id());
+	BUG_ON(err);
+	register_cpu_notifier(&pageset_notifier);
+}
+#endif
+
+static __meminit void zone_pcp_init(struct zone *zone)
+{
+	int cpu;
+	unsigned long batch = zone_batchsize(zone);
+
+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
+#ifdef CONFIG_NUMA
+		/* Early boot. Slab allocator not functional yet */
+		zone_pcp(zone, cpu) = &boot_pageset[cpu];
+		setup_pageset(&boot_pageset[cpu],0);
+#else
+		setup_pageset(zone_pcp(zone,cpu), batch);
+#endif
+	}
+	if (zone->present_pages)
+		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
+			zone->name, zone->present_pages, batch);
+}
+
+static __meminit void init_currently_empty_zone(struct zone *zone,
+		unsigned long zone_start_pfn, unsigned long size)
+{
+	struct pglist_data *pgdat = zone->zone_pgdat;
+
+	zone_wait_table_init(zone, size);
+	pgdat->nr_zones = zone_idx(zone) + 1;
+
+	zone->zone_start_pfn = zone_start_pfn;
+
+	memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
+
+	zone_init_free_lists(pgdat, zone, zone->spanned_pages);
+}
+
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+/* Note: nid == MAX_NUMNODES returns first region */
+static int __init first_active_region_index_in_nid(int nid)
+{
+	int i;
+	for (i = 0; early_node_map[i].end_pfn; i++) {
+		if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
+			return i;
+	}
+
+	return MAX_ACTIVE_REGIONS;
+}
+
+/* Note: nid == MAX_NUMNODES returns next region */
+static int __init next_active_region_index_in_nid(unsigned int index, int nid)
+{
+	for (index = index + 1; early_node_map[index].end_pfn; index++) {
+		if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
+			return index;
+	}
+
+	return MAX_ACTIVE_REGIONS;
+}
+
+#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
+int __init early_pfn_to_nid(unsigned long pfn)
+{
+	int i;
+
+	for (i = 0; early_node_map[i].end_pfn; i++) {
+		unsigned long start_pfn = early_node_map[i].start_pfn;
+		unsigned long end_pfn = early_node_map[i].end_pfn;
+
+		if ((start_pfn <= pfn) && (pfn < end_pfn))
+			return early_node_map[i].nid;
+	}
+
+	return -1;
+}
+#endif
+
+#define for_each_active_range_index_in_nid(i, nid) \
+	for (i = first_active_region_index_in_nid(nid); \
+				i != MAX_ACTIVE_REGIONS; \
+				i = next_active_region_index_in_nid(i, nid))
+
+void __init free_bootmem_with_active_regions(int nid,
+						unsigned long max_low_pfn)
+{
+	unsigned int i;
+	for_each_active_range_index_in_nid(i, nid) {
+		unsigned long size_pages = 0;
+		unsigned long end_pfn = early_node_map[i].end_pfn;
+		if (early_node_map[i].start_pfn >= max_low_pfn)
+			continue;
+
+		if (end_pfn > max_low_pfn)
+			end_pfn = max_low_pfn;
+
+		size_pages = end_pfn - early_node_map[i].start_pfn;
+		free_bootmem_node(NODE_DATA(early_node_map[i].nid),
+				PFN_PHYS(early_node_map[i].start_pfn),
+				PFN_PHYS(size_pages));
+	}
+}
+
+void __init sparse_memory_present_with_active_regions(int nid)
+{
+	unsigned int i;
+	for_each_active_range_index_in_nid(i, nid)
+		memory_present(early_node_map[i].nid,
+				early_node_map[i].start_pfn,
+				early_node_map[i].end_pfn);
+}
+
+void __init get_pfn_range_for_nid(unsigned int nid,
+			unsigned long *start_pfn, unsigned long *end_pfn)
+{
+	unsigned int i;
+	*start_pfn = -1UL;
+	*end_pfn = 0;
+
+	for_each_active_range_index_in_nid(i, nid) {
+		if (early_node_map[i].start_pfn < *start_pfn)
+			*start_pfn = early_node_map[i].start_pfn;
+
+		if (early_node_map[i].end_pfn > *end_pfn)
+			*end_pfn = early_node_map[i].end_pfn;
+	}
+
+	if (*start_pfn == -1UL) {
+		printk(KERN_WARNING "Node %u active with no memory\n", nid);
+		*start_pfn = 0;
+	}
+}
+
+unsigned long __init zone_present_pages_in_node(int nid,
+					unsigned long zone_type,
+					unsigned long *ignored)
+{
+	unsigned long node_start_pfn, node_end_pfn;
+	unsigned long zone_start_pfn, zone_end_pfn;
+
+	/* Get the start and end of the node and zone */
+	get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
+	zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
+	zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
+
+	/* Check that this node has pages within the zone's required range */
+	if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
+		return 0;
+
+	/* Move the zone boundaries inside the node if necessary */
+	if (zone_end_pfn > node_end_pfn)
+		zone_end_pfn = node_end_pfn;
+	if (zone_start_pfn < node_start_pfn)
+		zone_start_pfn = node_start_pfn;
+
+	/* Return the spanned pages */
+	return zone_end_pfn - zone_start_pfn;
+}
+
+unsigned long __init __absent_pages_in_range(int nid,
+				unsigned long range_start_pfn,
+				unsigned long range_end_pfn)
+{
+	int i = 0;
+	unsigned long prev_end_pfn = 0, hole_pages = 0;
+	unsigned long start_pfn;
+
+	/* Find the end_pfn of the first active range of pfns in the node */
+	i = first_active_region_index_in_nid(nid);
+	prev_end_pfn = early_node_map[i].start_pfn;
+
+	/* Find all holes for the zone within the node */
+	for (; i != MAX_ACTIVE_REGIONS;
+			i = next_active_region_index_in_nid(i, nid)) {
+
+		/* No need to continue if prev_end_pfn is outside the zone */
+		if (prev_end_pfn >= range_end_pfn)
+			break;
+
+		/* Make sure the end of the zone is not within the hole */
+		start_pfn = early_node_map[i].start_pfn;
+		if (start_pfn > range_end_pfn)
+			start_pfn = range_end_pfn;
+		if (prev_end_pfn < range_start_pfn)
+			prev_end_pfn = range_start_pfn;
+
+		/* Update the hole size cound and move on */
+		if (start_pfn > range_start_pfn) {
+			BUG_ON(prev_end_pfn > start_pfn);
+			hole_pages += start_pfn - prev_end_pfn;
+		}
+		prev_end_pfn = early_node_map[i].end_pfn;
+	}
+
+	return hole_pages;
+}
+
+unsigned long __init absent_pages_in_range(unsigned long start_pfn,
+							unsigned long end_pfn)
+{
+	return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn);
+}
+
+unsigned long __init zone_absent_pages_in_node(int nid,
+					unsigned long zone_type,
+					unsigned long *ignored)
+{
+	return __absent_pages_in_range(nid,
+				arch_zone_lowest_possible_pfn[zone_type],
+				arch_zone_highest_possible_pfn[zone_type]);
+}
+#else
+static inline unsigned long zone_present_pages_in_node(int nid,
+					unsigned long zone_type,
+					unsigned long *zones_size)
+{
+	return zones_size[zone_type];
+}
+
+static inline unsigned long zone_absent_pages_in_node(int nid,
+						unsigned long zone_type,
+						unsigned long *zholes_size)
+{
+	if (!zholes_size)
+		return 0;
+
+	return zholes_size[zone_type];
+}
+#endif
+
+static void __init calculate_node_totalpages(struct pglist_data *pgdat,
+		unsigned long *zones_size, unsigned long *zholes_size)
+{
+	unsigned long realtotalpages, totalpages = 0;
+	int i;
+
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		totalpages += zone_present_pages_in_node(pgdat->node_id, i,
+								zones_size);
+	}
+	pgdat->node_spanned_pages = totalpages;
+
+	realtotalpages = totalpages;
+	for (i = 0; i < MAX_NR_ZONES; i++) {
+		realtotalpages -=
+			zone_absent_pages_in_node(pgdat->node_id, i, zholes_size);
+	}
+	pgdat->node_present_pages = realtotalpages;
+	printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
+							realtotalpages);
+}
+
+/*
+ * Set up the zone data structures:
+ *   - mark all pages reserved
+ *   - mark all memory queues empty
+ *   - clear the memory bitmaps
+ */
+static void __init free_area_init_core(struct pglist_data *pgdat,
+		unsigned long *zones_size, unsigned long *zholes_size)
+{
+	unsigned long j;
+	int nid = pgdat->node_id;
+	unsigned long zone_start_pfn = pgdat->node_start_pfn;
+
+	pgdat_resize_init(pgdat);
+	pgdat->nr_zones = 0;
+	init_waitqueue_head(&pgdat->kswapd_wait);
+	pgdat->kswapd_max_order = 0;
+
+	for (j = 0; j < MAX_NR_ZONES; j++) {
+		struct zone *zone = pgdat->node_zones + j;
+		unsigned long size, realsize;
+
+		size = zone_present_pages_in_node(nid, j, zones_size);
+		realsize = size - zone_absent_pages_in_node(nid, j,
+								zholes_size);
+		if (j < ZONE_HIGHMEM)
+			nr_kernel_pages += realsize;
+		nr_all_pages += realsize;
+
+		zone->spanned_pages = size;
+		zone->present_pages = realsize;
+		zone->name = zone_names[j];
+		spin_lock_init(&zone->lock);
+		spin_lock_init(&zone->lru_lock);
+		zone_seqlock_init(zone);
+		zone->zone_pgdat = pgdat;
+		zone->free_pages = 0;
+
+		zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
+
+		zone_pcp_init(zone);
+		INIT_LIST_HEAD(&zone->active_list);
+		INIT_LIST_HEAD(&zone->inactive_list);
+		zone->nr_scan_active = 0;
+		zone->nr_scan_inactive = 0;
+		zone->nr_active = 0;
+		zone->nr_inactive = 0;
+		atomic_set(&zone->reclaim_in_progress, 0);
+		if (!size)
+			continue;
+
+		zonetable_add(zone, nid, j, zone_start_pfn, size);
+		init_currently_empty_zone(zone, zone_start_pfn, size);
+		zone_start_pfn += size;
+	}
+}
+
+static void __init alloc_node_mem_map(struct pglist_data *pgdat)
+{
+	/* Skip empty nodes */
+	if (!pgdat->node_spanned_pages)
+		return;
+
+#ifdef CONFIG_FLAT_NODE_MEM_MAP
+	/* ia64 gets its own node_mem_map, before this, without bootmem */
+	if (!pgdat->node_mem_map) {
+		unsigned long size;
+		struct page *map;
+
+		size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
+		map = alloc_remap(pgdat->node_id, size);
+		if (!map)
+			map = alloc_bootmem_node(pgdat, size);
+		pgdat->node_mem_map = map;
+	}
+#ifdef CONFIG_FLATMEM
+	/*
+	 * With no DISCONTIG, the global mem_map is just set as node 0's
+	 */
+	if (pgdat == NODE_DATA(0))
+		mem_map = NODE_DATA(0)->node_mem_map;
+#endif
+#endif /* CONFIG_FLAT_NODE_MEM_MAP */
+}
+
+void __init free_area_init_node(int nid, struct pglist_data *pgdat,
+		unsigned long *zones_size, unsigned long node_start_pfn,
+		unsigned long *zholes_size)
+{
+	pgdat->node_id = nid;
+	pgdat->node_start_pfn = node_start_pfn;
+	calculate_node_totalpages(pgdat, zones_size, zholes_size);
+
+	alloc_node_mem_map(pgdat);
+
+	free_area_init_core(pgdat, zones_size, zholes_size);
+}
+
+#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
+void __init add_active_range(unsigned int nid, unsigned long start_pfn,
+						unsigned long end_pfn)
+{
+	unsigned int i;
+
+	/* Merge with existing active regions if possible */
+	for (i = 0; early_node_map[i].end_pfn; i++) {
+		if (early_node_map[i].nid != nid)
+			continue;
+
+		/* Skip if an existing region covers this new one */
+		if (start_pfn >= early_node_map[i].start_pfn &&
+				end_pfn <= early_node_map[i].end_pfn)
+			return;
+
+		/* Merge forward if suitable */
+		if (start_pfn <= early_node_map[i].end_pfn &&
+				end_pfn > early_node_map[i].end_pfn) {
+			early_node_map[i].end_pfn = end_pfn;
+			return;
+		}
+
+		/* Merge backward if suitable */
+		if (start_pfn < early_node_map[i].end_pfn &&
+				end_pfn >= early_node_map[i].start_pfn) {
+			early_node_map[i].start_pfn = start_pfn;
+			return;
+		}
+	}
+
+	/* Leave last entry NULL, we use range.end_pfn to terminate the walk */
+	if (i >= MAX_ACTIVE_REGIONS - 1) {
+		printk(KERN_ERR "Too many memory regions, truncating\n");
+		return;
+	}
+
+	early_node_map[i].nid = nid;
+	early_node_map[i].start_pfn = start_pfn;
+	early_node_map[i].end_pfn = end_pfn;
+}
+
+void __init remove_all_active_ranges()
+{
+	memset(early_node_map, 0, sizeof(early_node_map));
+}
+
+/* Compare two active node_active_regions */
+static int __init cmp_node_active_region(const void *a, const void *b)
+{
+	struct node_active_region *arange = (struct node_active_region *)a;
+	struct node_active_region *brange = (struct node_active_region *)b;
+
+	/* Done this way to avoid overflows */
+	if (arange->start_pfn > brange->start_pfn)
+		return 1;
+	if (arange->start_pfn < brange->start_pfn)
+		return -1;
+
+	return 0;
+}
+
+/* sort the node_map by start_pfn */
+static void __init sort_node_map(void)
+{
+	size_t num = 0;
+	while (early_node_map[num].end_pfn)
+		num++;
+
+	sort(early_node_map, num, sizeof(struct node_active_region),
+						cmp_node_active_region, NULL);
+}
+
+/* Find the lowest pfn for a node. This depends on a sorted early_node_map */
+unsigned long __init find_min_pfn_for_node(unsigned long nid)
+{
+	int i;
+
+	/* Assuming a sorted map, the first range found has the starting pfn */
+	for_each_active_range_index_in_nid(i, nid)
+		return early_node_map[i].start_pfn;
+
+	/* nid does not exist in early_node_map */
+	printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid);
+	return 0;
+}
+
+unsigned long __init find_min_pfn_with_active_regions(void)
+{
+	return find_min_pfn_for_node(MAX_NUMNODES);
+}
+
+unsigned long __init find_max_pfn_with_active_regions(void)
+{
+	int i;
+	unsigned long max_pfn = 0;
+
+	for (i = 0; early_node_map[i].end_pfn; i++)
+		max_pfn = max(max_pfn, early_node_map[i].end_pfn);
+
+	return max_pfn;
+}
+
+void __init free_area_init_nodes(unsigned long arch_max_dma_pfn,
+				unsigned long arch_max_dma32_pfn,
+				unsigned long arch_max_low_pfn,
+				unsigned long arch_max_high_pfn)
+{
+	unsigned long nid;
+	int zone_index;
+
+	/* Record where the zone boundaries are */
+	memset(arch_zone_lowest_possible_pfn, 0,
+				sizeof(arch_zone_lowest_possible_pfn));
+	memset(arch_zone_highest_possible_pfn, 0,
+				sizeof(arch_zone_highest_possible_pfn));
+	arch_zone_lowest_possible_pfn[ZONE_DMA] =
+					find_min_pfn_with_active_regions();
+	arch_zone_highest_possible_pfn[ZONE_DMA] = arch_max_dma_pfn;
+	arch_zone_highest_possible_pfn[ZONE_DMA32] = arch_max_dma32_pfn;
+	arch_zone_highest_possible_pfn[ZONE_NORMAL] = arch_max_low_pfn;
+	arch_zone_highest_possible_pfn[ZONE_HIGHMEM] = arch_max_high_pfn;
+	for (zone_index = 1; zone_index < MAX_NR_ZONES; zone_index++) {
+		arch_zone_lowest_possible_pfn[zone_index] = 
+			arch_zone_highest_possible_pfn[zone_index-1];
+	}
+
+	/* Regions in the early_node_map can be in any order */
+	sort_node_map();
+
+	for_each_online_node(nid) {
+		pg_data_t *pgdat = NODE_DATA(nid);
+		free_area_init_node(nid, pgdat, NULL,
+				find_min_pfn_for_node(nid), NULL);
+	}
+}
+#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
diff -rup -X /usr/src/patchset-0.5/bin//dontdiff linux-2.6.17-rc2-105-ia64_use_init_nodes/mm/page_alloc.c linux-2.6.17-rc2-106-breakout_mem_init/mm/page_alloc.c
--- linux-2.6.17-rc2-105-ia64_use_init_nodes/mm/page_alloc.c	2006-04-24 15:49:25.000000000 +0100
+++ linux-2.6.17-rc2-106-breakout_mem_init/mm/page_alloc.c	2006-04-24 15:53:22.000000000 +0100
@@ -37,8 +37,6 @@
 #include <linux/nodemask.h>
 #include <linux/vmalloc.h>
 #include <linux/mempolicy.h>
-#include <linux/sort.h>
-#include <linux/pfn.h>
 
 #include <asm/tlbflush.h>
 #include "internal.h"
@@ -55,7 +53,6 @@ unsigned long totalram_pages __read_most
 unsigned long totalhigh_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
 long nr_swap_pages;
-int percpu_pagelist_fraction;
 
 static void __free_pages_ok(struct page *page, unsigned int order);
 
@@ -81,24 +78,11 @@ EXPORT_SYMBOL(totalram_pages);
 struct zone *zone_table[1 << ZONETABLE_SHIFT] __read_mostly;
 EXPORT_SYMBOL(zone_table);
 
-static char *zone_names[MAX_NR_ZONES] = { "DMA", "DMA32", "Normal", "HighMem" };
 int min_free_kbytes = 1024;
 
 unsigned long __initdata nr_kernel_pages;
 unsigned long __initdata nr_all_pages;
 
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
-  #ifdef CONFIG_MAX_ACTIVE_REGIONS
-    #define MAX_ACTIVE_REGIONS CONFIG_MAX_ACTIVE_REGIONS
-  #else
-    #define MAX_ACTIVE_REGIONS (MAX_NR_ZONES * MAX_NUMNODES + 1)
-  #endif
-
-  struct node_active_region __initdata early_node_map[MAX_ACTIVE_REGIONS];
-  unsigned long __initdata arch_zone_lowest_possible_pfn[MAX_NR_ZONES];
-  unsigned long __initdata arch_zone_highest_possible_pfn[MAX_NR_ZONES];
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
-
 #ifdef CONFIG_DEBUG_VM
 static int page_outside_zone_boundaries(struct zone *zone, struct page *page)
 {
@@ -1517,989 +1501,6 @@ void show_free_areas(void)
 	show_swap_cache_info();
 }
 
-/*
- * Builds allocation fallback zone lists.
- *
- * Add all populated zones of a node to the zonelist.
- */
-static int __init build_zonelists_node(pg_data_t *pgdat,
-			struct zonelist *zonelist, int nr_zones, int zone_type)
-{
-	struct zone *zone;
-
-	BUG_ON(zone_type > ZONE_HIGHMEM);
-
-	do {
-		zone = pgdat->node_zones + zone_type;
-		if (populated_zone(zone)) {
-#ifndef CONFIG_HIGHMEM
-			BUG_ON(zone_type > ZONE_NORMAL);
-#endif
-			zonelist->zones[nr_zones++] = zone;
-			check_highest_zone(zone_type);
-		}
-		zone_type--;
-
-	} while (zone_type >= 0);
-	return nr_zones;
-}
-
-static inline int highest_zone(int zone_bits)
-{
-	int res = ZONE_NORMAL;
-	if (zone_bits & (__force int)__GFP_HIGHMEM)
-		res = ZONE_HIGHMEM;
-	if (zone_bits & (__force int)__GFP_DMA32)
-		res = ZONE_DMA32;
-	if (zone_bits & (__force int)__GFP_DMA)
-		res = ZONE_DMA;
-	return res;
-}
-
-#ifdef CONFIG_NUMA
-#define MAX_NODE_LOAD (num_online_nodes())
-static int __initdata node_load[MAX_NUMNODES];
-/**
- * find_next_best_node - find the next node that should appear in a given node's fallback list
- * @node: node whose fallback list we're appending
- * @used_node_mask: nodemask_t of already used nodes
- *
- * We use a number of factors to determine which is the next node that should
- * appear on a given node's fallback list.  The node should not have appeared
- * already in @node's fallback list, and it should be the next closest node
- * according to the distance array (which contains arbitrary distance values
- * from each node to each node in the system), and should also prefer nodes
- * with no CPUs, since presumably they'll have very little allocation pressure
- * on them otherwise.
- * It returns -1 if no node is found.
- */
-static int __init find_next_best_node(int node, nodemask_t *used_node_mask)
-{
-	int n, val;
-	int min_val = INT_MAX;
-	int best_node = -1;
-
-	/* Use the local node if we haven't already */
-	if (!node_isset(node, *used_node_mask)) {
-		node_set(node, *used_node_mask);
-		return node;
-	}
-
-	for_each_online_node(n) {
-		cpumask_t tmp;
-
-		/* Don't want a node to appear more than once */
-		if (node_isset(n, *used_node_mask))
-			continue;
-
-		/* Use the distance array to find the distance */
-		val = node_distance(node, n);
-
-		/* Penalize nodes under us ("prefer the next node") */
-		val += (n < node);
-
-		/* Give preference to headless and unused nodes */
-		tmp = node_to_cpumask(n);
-		if (!cpus_empty(tmp))
-			val += PENALTY_FOR_NODE_WITH_CPUS;
-
-		/* Slight preference for less loaded node */
-		val *= (MAX_NODE_LOAD*MAX_NUMNODES);
-		val += node_load[n];
-
-		if (val < min_val) {
-			min_val = val;
-			best_node = n;
-		}
-	}
-
-	if (best_node >= 0)
-		node_set(best_node, *used_node_mask);
-
-	return best_node;
-}
-
-static void __init build_zonelists(pg_data_t *pgdat)
-{
-	int i, j, k, node, local_node;
-	int prev_node, load;
-	struct zonelist *zonelist;
-	nodemask_t used_mask;
-
-	/* initialize zonelists */
-	for (i = 0; i < GFP_ZONETYPES; i++) {
-		zonelist = pgdat->node_zonelists + i;
-		zonelist->zones[0] = NULL;
-	}
-
-	/* NUMA-aware ordering of nodes */
-	local_node = pgdat->node_id;
-	load = num_online_nodes();
-	prev_node = local_node;
-	nodes_clear(used_mask);
-	while ((node = find_next_best_node(local_node, &used_mask)) >= 0) {
-		int distance = node_distance(local_node, node);
-
-		/*
-		 * If another node is sufficiently far away then it is better
-		 * to reclaim pages in a zone before going off node.
-		 */
-		if (distance > RECLAIM_DISTANCE)
-			zone_reclaim_mode = 1;
-
-		/*
-		 * We don't want to pressure a particular node.
-		 * So adding penalty to the first node in same
-		 * distance group to make it round-robin.
-		 */
-
-		if (distance != node_distance(local_node, prev_node))
-			node_load[node] += load;
-		prev_node = node;
-		load--;
-		for (i = 0; i < GFP_ZONETYPES; i++) {
-			zonelist = pgdat->node_zonelists + i;
-			for (j = 0; zonelist->zones[j] != NULL; j++);
-
-			k = highest_zone(i);
-
-	 		j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
-			zonelist->zones[j] = NULL;
-		}
-	}
-}
-
-#else	/* CONFIG_NUMA */
-
-static void __init build_zonelists(pg_data_t *pgdat)
-{
-	int i, j, k, node, local_node;
-
-	local_node = pgdat->node_id;
-	for (i = 0; i < GFP_ZONETYPES; i++) {
-		struct zonelist *zonelist;
-
-		zonelist = pgdat->node_zonelists + i;
-
-		j = 0;
-		k = highest_zone(i);
- 		j = build_zonelists_node(pgdat, zonelist, j, k);
- 		/*
- 		 * Now we build the zonelist so that it contains the zones
- 		 * of all the other nodes.
- 		 * We don't want to pressure a particular node, so when
- 		 * building the zones for node N, we make sure that the
- 		 * zones coming right after the local ones are those from
- 		 * node N+1 (modulo N)
- 		 */
-		for (node = local_node + 1; node < MAX_NUMNODES; node++) {
-			if (!node_online(node))
-				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
-		}
-		for (node = 0; node < local_node; node++) {
-			if (!node_online(node))
-				continue;
-			j = build_zonelists_node(NODE_DATA(node), zonelist, j, k);
-		}
-
-		zonelist->zones[j] = NULL;
-	}
-}
-
-#endif	/* CONFIG_NUMA */
-
-void __init build_all_zonelists(void)
-{
-	int i;
-
-	for_each_online_node(i)
-		build_zonelists(NODE_DATA(i));
-	printk("Built %i zonelists\n", num_online_nodes());
-	cpuset_init_current_mems_allowed();
-}
-
-/*
- * Helper functions to size the waitqueue hash table.
- * Essentially these want to choose hash table sizes sufficiently
- * large so that collisions trying to wait on pages are rare.
- * But in fact, the number of active page waitqueues on typical
- * systems is ridiculously low, less than 200. So this is even
- * conservative, even though it seems large.
- *
- * The constant PAGES_PER_WAITQUEUE specifies the ratio of pages to
- * waitqueues, i.e. the size of the waitq table given the number of pages.
- */
-#define PAGES_PER_WAITQUEUE	256
-
-static inline unsigned long wait_table_size(unsigned long pages)
-{
-	unsigned long size = 1;
-
-	pages /= PAGES_PER_WAITQUEUE;
-
-	while (size < pages)
-		size <<= 1;
-
-	/*
-	 * Once we have dozens or even hundreds of threads sleeping
-	 * on IO we've got bigger problems than wait queue collision.
-	 * Limit the size of the wait table to a reasonable size.
-	 */
-	size = min(size, 4096UL);
-
-	return max(size, 4UL);
-}
-
-/*
- * This is an integer logarithm so that shifts can be used later
- * to extract the more random high bits from the multiplicative
- * hash function before the remainder is taken.
- */
-static inline unsigned long wait_table_bits(unsigned long size)
-{
-	return ffz(~size);
-}
-
-#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1))
-
-/*
- * Initially all pages are reserved - free ones are freed
- * up by free_all_bootmem() once the early boot process is
- * done. Non-atomic initialization, single-pass.
- */
-void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
-		unsigned long start_pfn)
-{
-	struct page *page;
-	unsigned long end_pfn = start_pfn + size;
-	unsigned long pfn;
-
-	for (pfn = start_pfn; pfn < end_pfn; pfn++) {
-		if (!early_pfn_valid(pfn))
-			continue;
-		page = pfn_to_page(pfn);
-		set_page_links(page, zone, nid, pfn);
-		init_page_count(page);
-		reset_page_mapcount(page);
-		SetPageReserved(page);
-		INIT_LIST_HEAD(&page->lru);
-#ifdef WANT_PAGE_VIRTUAL
-		/* The shift won't overflow because ZONE_NORMAL is below 4G. */
-		if (!is_highmem_idx(zone))
-			set_page_address(page, __va(pfn << PAGE_SHIFT));
-#endif
-	}
-}
-
-void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone,
-				unsigned long size)
-{
-	int order;
-	for (order = 0; order < MAX_ORDER ; order++) {
-		INIT_LIST_HEAD(&zone->free_area[order].free_list);
-		zone->free_area[order].nr_free = 0;
-	}
-}
-
-#define ZONETABLE_INDEX(x, zone_nr)	((x << ZONES_SHIFT) | zone_nr)
-void zonetable_add(struct zone *zone, int nid, int zid, unsigned long pfn,
-		unsigned long size)
-{
-	unsigned long snum = pfn_to_section_nr(pfn);
-	unsigned long end = pfn_to_section_nr(pfn + size);
-
-	if (FLAGS_HAS_NODE)
-		zone_table[ZONETABLE_INDEX(nid, zid)] = zone;
-	else
-		for (; snum <= end; snum++)
-			zone_table[ZONETABLE_INDEX(snum, zid)] = zone;
-}
-
-#ifndef __HAVE_ARCH_MEMMAP_INIT
-#define memmap_init(size, nid, zone, start_pfn) \
-	memmap_init_zone((size), (nid), (zone), (start_pfn))
-#endif
-
-static int __cpuinit zone_batchsize(struct zone *zone)
-{
-	int batch;
-
-	/*
-	 * The per-cpu-pages pools are set to around 1000th of the
-	 * size of the zone.  But no more than 1/2 of a meg.
-	 *
-	 * OK, so we don't know how big the cache is.  So guess.
-	 */
-	batch = zone->present_pages / 1024;
-	if (batch * PAGE_SIZE > 512 * 1024)
-		batch = (512 * 1024) / PAGE_SIZE;
-	batch /= 4;		/* We effectively *= 4 below */
-	if (batch < 1)
-		batch = 1;
-
-	/*
-	 * Clamp the batch to a 2^n - 1 value. Having a power
-	 * of 2 value was found to be more likely to have
-	 * suboptimal cache aliasing properties in some cases.
-	 *
-	 * For example if 2 tasks are alternately allocating
-	 * batches of pages, one task can end up with a lot
-	 * of pages of one half of the possible page colors
-	 * and the other with pages of the other colors.
-	 */
-	batch = (1 << (fls(batch + batch/2)-1)) - 1;
-
-	return batch;
-}
-
-inline void setup_pageset(struct per_cpu_pageset *p, unsigned long batch)
-{
-	struct per_cpu_pages *pcp;
-
-	memset(p, 0, sizeof(*p));
-
-	pcp = &p->pcp[0];		/* hot */
-	pcp->count = 0;
-	pcp->high = 6 * batch;
-	pcp->batch = max(1UL, 1 * batch);
-	INIT_LIST_HEAD(&pcp->list);
-
-	pcp = &p->pcp[1];		/* cold*/
-	pcp->count = 0;
-	pcp->high = 2 * batch;
-	pcp->batch = max(1UL, batch/2);
-	INIT_LIST_HEAD(&pcp->list);
-}
-
-/*
- * setup_pagelist_highmark() sets the high water mark for hot per_cpu_pagelist
- * to the value high for the pageset p.
- */
-
-static void setup_pagelist_highmark(struct per_cpu_pageset *p,
-				unsigned long high)
-{
-	struct per_cpu_pages *pcp;
-
-	pcp = &p->pcp[0]; /* hot list */
-	pcp->high = high;
-	pcp->batch = max(1UL, high/4);
-	if ((high/4) > (PAGE_SHIFT * 8))
-		pcp->batch = PAGE_SHIFT * 8;
-}
-
-
-#ifdef CONFIG_NUMA
-/*
- * Boot pageset table. One per cpu which is going to be used for all
- * zones and all nodes. The parameters will be set in such a way
- * that an item put on a list will immediately be handed over to
- * the buddy list. This is safe since pageset manipulation is done
- * with interrupts disabled.
- *
- * Some NUMA counter updates may also be caught by the boot pagesets.
- *
- * The boot_pagesets must be kept even after bootup is complete for
- * unused processors and/or zones. They do play a role for bootstrapping
- * hotplugged processors.
- *
- * zoneinfo_show() and maybe other functions do
- * not check if the processor is online before following the pageset pointer.
- * Other parts of the kernel may not check if the zone is available.
- */
-static struct per_cpu_pageset boot_pageset[NR_CPUS];
-
-/*
- * Dynamically allocate memory for the
- * per cpu pageset array in struct zone.
- */
-static int __cpuinit process_zones(int cpu)
-{
-	struct zone *zone, *dzone;
-
-	for_each_zone(zone) {
-
-		zone_pcp(zone, cpu) = kmalloc_node(sizeof(struct per_cpu_pageset),
-					 GFP_KERNEL, cpu_to_node(cpu));
-		if (!zone_pcp(zone, cpu))
-			goto bad;
-
-		setup_pageset(zone_pcp(zone, cpu), zone_batchsize(zone));
-
-		if (percpu_pagelist_fraction)
-			setup_pagelist_highmark(zone_pcp(zone, cpu),
-			 	(zone->present_pages / percpu_pagelist_fraction));
-	}
-
-	return 0;
-bad:
-	for_each_zone(dzone) {
-		if (dzone == zone)
-			break;
-		kfree(zone_pcp(dzone, cpu));
-		zone_pcp(dzone, cpu) = NULL;
-	}
-	return -ENOMEM;
-}
-
-static inline void free_zone_pagesets(int cpu)
-{
-	struct zone *zone;
-
-	for_each_zone(zone) {
-		struct per_cpu_pageset *pset = zone_pcp(zone, cpu);
-
-		zone_pcp(zone, cpu) = NULL;
-		kfree(pset);
-	}
-}
-
-static int __cpuinit pageset_cpuup_callback(struct notifier_block *nfb,
-		unsigned long action,
-		void *hcpu)
-{
-	int cpu = (long)hcpu;
-	int ret = NOTIFY_OK;
-
-	switch (action) {
-		case CPU_UP_PREPARE:
-			if (process_zones(cpu))
-				ret = NOTIFY_BAD;
-			break;
-		case CPU_UP_CANCELED:
-		case CPU_DEAD:
-			free_zone_pagesets(cpu);
-			break;
-		default:
-			break;
-	}
-	return ret;
-}
-
-static struct notifier_block pageset_notifier =
-	{ &pageset_cpuup_callback, NULL, 0 };
-
-void __init setup_per_cpu_pageset(void)
-{
-	int err;
-
-	/* Initialize per_cpu_pageset for cpu 0.
-	 * A cpuup callback will do this for every cpu
-	 * as it comes online
-	 */
-	err = process_zones(smp_processor_id());
-	BUG_ON(err);
-	register_cpu_notifier(&pageset_notifier);
-}
-
-#endif
-
-static __meminit
-void zone_wait_table_init(struct zone *zone, unsigned long zone_size_pages)
-{
-	int i;
-	struct pglist_data *pgdat = zone->zone_pgdat;
-
-	/*
-	 * The per-page waitqueue mechanism uses hashed waitqueues
-	 * per zone.
-	 */
-	zone->wait_table_size = wait_table_size(zone_size_pages);
-	zone->wait_table_bits =	wait_table_bits(zone->wait_table_size);
-	zone->wait_table = (wait_queue_head_t *)
-		alloc_bootmem_node(pgdat, zone->wait_table_size
-					* sizeof(wait_queue_head_t));
-
-	for(i = 0; i < zone->wait_table_size; ++i)
-		init_waitqueue_head(zone->wait_table + i);
-}
-
-static __meminit void zone_pcp_init(struct zone *zone)
-{
-	int cpu;
-	unsigned long batch = zone_batchsize(zone);
-
-	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-#ifdef CONFIG_NUMA
-		/* Early boot. Slab allocator not functional yet */
-		zone_pcp(zone, cpu) = &boot_pageset[cpu];
-		setup_pageset(&boot_pageset[cpu],0);
-#else
-		setup_pageset(zone_pcp(zone,cpu), batch);
-#endif
-	}
-	if (zone->present_pages)
-		printk(KERN_DEBUG "  %s zone: %lu pages, LIFO batch:%lu\n",
-			zone->name, zone->present_pages, batch);
-}
-
-static __meminit void init_currently_empty_zone(struct zone *zone,
-		unsigned long zone_start_pfn, unsigned long size)
-{
-	struct pglist_data *pgdat = zone->zone_pgdat;
-
-	zone_wait_table_init(zone, size);
-	pgdat->nr_zones = zone_idx(zone) + 1;
-
-	zone->zone_start_pfn = zone_start_pfn;
-
-	memmap_init(size, pgdat->node_id, zone_idx(zone), zone_start_pfn);
-
-	zone_init_free_lists(pgdat, zone, zone->spanned_pages);
-}
-
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
-/* Note: nid == MAX_NUMNODES returns first region */
-static int __init first_active_region_index_in_nid(int nid)
-{
-	int i;
-	for (i = 0; early_node_map[i].end_pfn; i++) {
-		if (nid == MAX_NUMNODES || early_node_map[i].nid == nid)
-			return i;
-	}
-
-	return MAX_ACTIVE_REGIONS;
-}
-
-/* Note: nid == MAX_NUMNODES returns next region */
-static int __init next_active_region_index_in_nid(unsigned int index, int nid)
-{
-	for (index = index + 1; early_node_map[index].end_pfn; index++) {
-		if (nid == MAX_NUMNODES || early_node_map[index].nid == nid)
-			return index;
-	}
-
-	return MAX_ACTIVE_REGIONS;
-}
-
-#ifndef CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID
-int __init early_pfn_to_nid(unsigned long pfn)
-{
-	int i;
-
-	for (i = 0; early_node_map[i].end_pfn; i++) {
-		unsigned long start_pfn = early_node_map[i].start_pfn;
-		unsigned long end_pfn = early_node_map[i].end_pfn;
-
-		if ((start_pfn <= pfn) && (pfn < end_pfn))
-			return early_node_map[i].nid;
-	}
-
-	return -1;
-}
-#endif /* CONFIG_HAVE_ARCH_EARLY_PFN_TO_NID */
-
-#define for_each_active_range_index_in_nid(i, nid) \
-	for (i = first_active_region_index_in_nid(nid); \
-				i != MAX_ACTIVE_REGIONS; \
-				i = next_active_region_index_in_nid(i, nid))
-
-void __init free_bootmem_with_active_regions(int nid,
-						unsigned long max_low_pfn)
-{
-	unsigned int i;
-	for_each_active_range_index_in_nid(i, nid) {
-		unsigned long size_pages = 0;
-		unsigned long end_pfn = early_node_map[i].end_pfn;
-		if (early_node_map[i].start_pfn >= max_low_pfn)
-			continue;
-
-		if (end_pfn > max_low_pfn)
-			end_pfn = max_low_pfn;
-
-		size_pages = end_pfn - early_node_map[i].start_pfn;
-		free_bootmem_node(NODE_DATA(early_node_map[i].nid),
-				PFN_PHYS(early_node_map[i].start_pfn),
-				PFN_PHYS(size_pages));
-	}
-}
-
-void __init sparse_memory_present_with_active_regions(int nid)
-{
-	unsigned int i;
-	for_each_active_range_index_in_nid(i, nid)
-		memory_present(early_node_map[i].nid,
-				early_node_map[i].start_pfn,
-				early_node_map[i].end_pfn);
-}
-
-void __init get_pfn_range_for_nid(unsigned int nid,
-			unsigned long *start_pfn, unsigned long *end_pfn)
-{
-	unsigned int i;
-	*start_pfn = -1UL;
-	*end_pfn = 0;
-
-	for_each_active_range_index_in_nid(i, nid) {
-		if (early_node_map[i].start_pfn < *start_pfn)
-			*start_pfn = early_node_map[i].start_pfn;
-
-		if (early_node_map[i].end_pfn > *end_pfn)
-			*end_pfn = early_node_map[i].end_pfn;
-	}
-
-	if (*start_pfn == -1UL) {
-		printk(KERN_WARNING "Node %u active with no memory\n", nid);
-		*start_pfn = 0;
-	}
-}
-
-unsigned long __init zone_present_pages_in_node(int nid,
-					unsigned long zone_type,
-					unsigned long *ignored)
-{
-	unsigned long node_start_pfn, node_end_pfn;
-	unsigned long zone_start_pfn, zone_end_pfn;
-
-	/* Get the start and end of the node and zone */
-	get_pfn_range_for_nid(nid, &node_start_pfn, &node_end_pfn);
-	zone_start_pfn = arch_zone_lowest_possible_pfn[zone_type];
-	zone_end_pfn = arch_zone_highest_possible_pfn[zone_type];
-
-	/* Check that this node has pages within the zone's required range */
-	if (zone_end_pfn < node_start_pfn || zone_start_pfn > node_end_pfn)
-		return 0;
-
-	/* Move the zone boundaries inside the node if necessary */
-	if (zone_end_pfn > node_end_pfn)
-		zone_end_pfn = node_end_pfn;
-	if (zone_start_pfn < node_start_pfn)
-		zone_start_pfn = node_start_pfn;
-
-	/* Return the spanned pages */
-	return zone_end_pfn - zone_start_pfn;
-}
-
-unsigned long __init __absent_pages_in_range(int nid,
-				unsigned long range_start_pfn,
-				unsigned long range_end_pfn)
-{
-	int i = 0;
-	unsigned long prev_end_pfn = 0, hole_pages = 0;
-	unsigned long start_pfn;
-
-	/* Find the end_pfn of the first active range of pfns in the node */
-	i = first_active_region_index_in_nid(nid);
-	prev_end_pfn = early_node_map[i].start_pfn;
-
-	/* Find all holes for the zone within the node */
-	for (; i != MAX_ACTIVE_REGIONS;
-			i = next_active_region_index_in_nid(i, nid)) {
-
-		/* No need to continue if prev_end_pfn is outside the zone */
-		if (prev_end_pfn >= range_end_pfn)
-			break;
-
-		/* Make sure the end of the zone is not within the hole */
-		start_pfn = early_node_map[i].start_pfn;
-		if (start_pfn > range_end_pfn)
-			start_pfn = range_end_pfn;
-		if (prev_end_pfn < range_start_pfn)
-			prev_end_pfn = range_start_pfn;
-
-		/* Update the hole size cound and move on */
-		if (start_pfn > range_start_pfn) {
-			BUG_ON(prev_end_pfn > start_pfn);
-			hole_pages += start_pfn - prev_end_pfn;
-		}
-		prev_end_pfn = early_node_map[i].end_pfn;
-	}
-
-	return hole_pages;
-}
-
-unsigned long __init absent_pages_in_range(unsigned long start_pfn,
-							unsigned long end_pfn)
-{
-	return __absent_pages_in_range(MAX_NUMNODES, start_pfn, end_pfn);
-}
-
-unsigned long __init zone_absent_pages_in_node(int nid,
-					unsigned long zone_type,
-					unsigned long *ignored)
-{
-	return __absent_pages_in_range(nid,
-				arch_zone_lowest_possible_pfn[zone_type],
-				arch_zone_highest_possible_pfn[zone_type]);
-}
-#else
-static inline unsigned long zone_present_pages_in_node(int nid,
-					unsigned long zone_type,
-					unsigned long *zones_size)
-{
-	return zones_size[zone_type];
-}
-
-static inline unsigned long zone_absent_pages_in_node(int nid,
-						unsigned long zone_type,
-						unsigned long *zholes_size)
-{
-	if (!zholes_size)
-		return 0;
-
-	return zholes_size[zone_type];
-}
-#endif
-
-static void __init calculate_node_totalpages(struct pglist_data *pgdat,
-		unsigned long *zones_size, unsigned long *zholes_size)
-{
-	unsigned long realtotalpages, totalpages = 0;
-	int i;
-
-	for (i = 0; i < MAX_NR_ZONES; i++) {
-		totalpages += zone_present_pages_in_node(pgdat->node_id, i,
-								zones_size);
-	}
-	pgdat->node_spanned_pages = totalpages;
-
-	realtotalpages = totalpages;
-	for (i = 0; i < MAX_NR_ZONES; i++) {
-		realtotalpages -=
-			zone_absent_pages_in_node(pgdat->node_id, i, zholes_size);
-	}
-	pgdat->node_present_pages = realtotalpages;
-	printk(KERN_DEBUG "On node %d totalpages: %lu\n", pgdat->node_id,
-							realtotalpages);
-}
-
-/*
- * Set up the zone data structures:
- *   - mark all pages reserved
- *   - mark all memory queues empty
- *   - clear the memory bitmaps
- */
-static void __init free_area_init_core(struct pglist_data *pgdat,
-		unsigned long *zones_size, unsigned long *zholes_size)
-{
-	unsigned long j;
-	int nid = pgdat->node_id;
-	unsigned long zone_start_pfn = pgdat->node_start_pfn;
-
-	pgdat_resize_init(pgdat);
-	pgdat->nr_zones = 0;
-	init_waitqueue_head(&pgdat->kswapd_wait);
-	pgdat->kswapd_max_order = 0;
-	
-	for (j = 0; j < MAX_NR_ZONES; j++) {
-		struct zone *zone = pgdat->node_zones + j;
-		unsigned long size, realsize;
-
-		size = zone_present_pages_in_node(nid, j, zones_size);
-		realsize = size - zone_absent_pages_in_node(nid, j,
-								zholes_size);
-		if (j < ZONE_HIGHMEM)
-			nr_kernel_pages += realsize;
-		nr_all_pages += realsize;
-
-		zone->spanned_pages = size;
-		zone->present_pages = realsize;
-		zone->name = zone_names[j];
-		spin_lock_init(&zone->lock);
-		spin_lock_init(&zone->lru_lock);
-		zone_seqlock_init(zone);
-		zone->zone_pgdat = pgdat;
-		zone->free_pages = 0;
-
-		zone->temp_priority = zone->prev_priority = DEF_PRIORITY;
-
-		zone_pcp_init(zone);
-		INIT_LIST_HEAD(&zone->active_list);
-		INIT_LIST_HEAD(&zone->inactive_list);
-		zone->nr_scan_active = 0;
-		zone->nr_scan_inactive = 0;
-		zone->nr_active = 0;
-		zone->nr_inactive = 0;
-		atomic_set(&zone->reclaim_in_progress, 0);
-		if (!size)
-			continue;
-
-		zonetable_add(zone, nid, j, zone_start_pfn, size);
-		init_currently_empty_zone(zone, zone_start_pfn, size);
-		zone_start_pfn += size;
-	}
-}
-
-static void __init alloc_node_mem_map(struct pglist_data *pgdat)
-{
-	/* Skip empty nodes */
-	if (!pgdat->node_spanned_pages)
-		return;
-
-#ifdef CONFIG_FLAT_NODE_MEM_MAP
-	/* ia64 gets its own node_mem_map, before this, without bootmem */
-	if (!pgdat->node_mem_map) {
-		unsigned long size;
-		struct page *map;
-
-		size = (pgdat->node_spanned_pages + 1) * sizeof(struct page);
-		map = alloc_remap(pgdat->node_id, size);
-		if (!map)
-			map = alloc_bootmem_node(pgdat, size);
-		pgdat->node_mem_map = map;
-	}
-#ifdef CONFIG_FLATMEM
-	/*
-	 * With no DISCONTIG, the global mem_map is just set as node 0's
-	 */
-	if (pgdat == NODE_DATA(0))
-		mem_map = NODE_DATA(0)->node_mem_map;
-#endif
-#endif /* CONFIG_FLAT_NODE_MEM_MAP */
-}
-
-void __init free_area_init_node(int nid, struct pglist_data *pgdat,
-		unsigned long *zones_size, unsigned long node_start_pfn,
-		unsigned long *zholes_size)
-{
-	pgdat->node_id = nid;
-	pgdat->node_start_pfn = node_start_pfn;
-	calculate_node_totalpages(pgdat, zones_size, zholes_size);
-
-	alloc_node_mem_map(pgdat);
-
-	free_area_init_core(pgdat, zones_size, zholes_size);
-}
-
-#ifdef CONFIG_ARCH_POPULATES_NODE_MAP
-void __init add_active_range(unsigned int nid, unsigned long start_pfn,
-						unsigned long end_pfn)
-{
-	unsigned int i;
-
-	/* Merge with existing active regions if possible */
-	for (i = 0; early_node_map[i].end_pfn; i++) {
-		if (early_node_map[i].nid != nid)
-			continue;
-
-		/* Skip if an existing region covers this new one */
-		if (start_pfn >= early_node_map[i].start_pfn &&
-				end_pfn <= early_node_map[i].end_pfn)
-			return;
-
-		/* Merge forward if suitable */
-		if (start_pfn <= early_node_map[i].end_pfn &&
-				end_pfn > early_node_map[i].end_pfn) {
-			early_node_map[i].end_pfn = end_pfn;
-			return;
-		}
-
-		/* Merge backward if suitable */
-		if (start_pfn < early_node_map[i].end_pfn &&
-				end_pfn >= early_node_map[i].start_pfn) {
-			early_node_map[i].start_pfn = start_pfn;
-			return;
-		}
-	}
-
-	/* Leave last entry NULL, we use range.end_pfn to terminate the walk */
-	if (i >= MAX_ACTIVE_REGIONS - 1) {
-		printk(KERN_ERR "Too many memory regions, truncating\n");
-		return;
-	}
-
-	early_node_map[i].nid = nid;
-	early_node_map[i].start_pfn = start_pfn;
-	early_node_map[i].end_pfn = end_pfn;
-}
-
-void __init remove_all_active_ranges()
-{
-	memset(early_node_map, 0, sizeof(early_node_map));
-}
-
-/* Compare two active node_active_regions */
-static int __init cmp_node_active_region(const void *a, const void *b)
-{
-	struct node_active_region *arange = (struct node_active_region *)a;
-	struct node_active_region *brange = (struct node_active_region *)b;
-
-	/* Done this way to avoid overflows */
-	if (arange->start_pfn > brange->start_pfn)
-		return 1;
-	if (arange->start_pfn < brange->start_pfn)
-		return -1;
-
-	return 0;
-}
-
-/* sort the node_map by start_pfn */
-static void __init sort_node_map(void)
-{
-	size_t num = 0;
-	while (early_node_map[num].end_pfn)
-		num++;
-
-	sort(early_node_map, num, sizeof(struct node_active_region),
-						cmp_node_active_region, NULL);
-}
-
-/* Find the lowest pfn for a node. This depends on a sorted early_node_map */
-unsigned long __init find_min_pfn_for_node(unsigned long nid)
-{
-	int i;
-
-	/* Assuming a sorted map, the first range found has the starting pfn */
-	for_each_active_range_index_in_nid(i, nid)
-		return early_node_map[i].start_pfn;
-
-	/* nid does not exist in early_node_map */
-	printk(KERN_WARNING "Could not find start_pfn for node %lu\n", nid);
-	return 0;
-}
-
-unsigned long __init find_min_pfn_with_active_regions(void)
-{
-	return find_min_pfn_for_node(MAX_NUMNODES);
-}
-
-unsigned long __init find_max_pfn_with_active_regions(void)
-{
-	int i;
-	unsigned long max_pfn = 0;
-
-	for (i = 0; early_node_map[i].end_pfn; i++)
-		max_pfn = max(max_pfn, early_node_map[i].end_pfn);
-
-	return max_pfn;
-}
-
-void __init free_area_init_nodes(unsigned long arch_max_dma_pfn,
-				unsigned long arch_max_dma32_pfn,
-				unsigned long arch_max_low_pfn,
-				unsigned long arch_max_high_pfn)
-{
-	unsigned long nid;
-	int zone_index;
-
-	/* Record where the zone boundaries are */
-	memset(arch_zone_lowest_possible_pfn, 0,
-				sizeof(arch_zone_lowest_possible_pfn));
-	memset(arch_zone_highest_possible_pfn, 0,
-				sizeof(arch_zone_highest_possible_pfn));
-	arch_zone_lowest_possible_pfn[ZONE_DMA] =
-					find_min_pfn_with_active_regions();
-	arch_zone_highest_possible_pfn[ZONE_DMA] = arch_max_dma_pfn;
-	arch_zone_highest_possible_pfn[ZONE_DMA32] = arch_max_dma32_pfn;
-	arch_zone_highest_possible_pfn[ZONE_NORMAL] = arch_max_low_pfn;
-	arch_zone_highest_possible_pfn[ZONE_HIGHMEM] = arch_max_high_pfn;
-	for (zone_index = 1; zone_index < MAX_NR_ZONES; zone_index++) {
-		arch_zone_lowest_possible_pfn[zone_index] =
-			arch_zone_highest_possible_pfn[zone_index-1];
-	}
-
-	/* Regions in the early_node_map can be in any order */
-	sort_node_map();
-
-	for_each_online_node(nid) {
-		pg_data_t *pgdat = NODE_DATA(nid);
-		free_area_init_node(nid, pgdat, NULL,
-				find_min_pfn_for_node(nid), NULL);
-	}
-}
-#endif /* CONFIG_ARCH_POPULATES_NODE_MAP */
-
 #ifndef CONFIG_NEED_MULTIPLE_NODES
 static bootmem_data_t contig_bootmem_data;
 struct pglist_data contig_page_data = { .bdata = &contig_bootmem_data };
@@ -3020,32 +2021,6 @@ int lowmem_reserve_ratio_sysctl_handler(
 	return 0;
 }
 
-/*
- * percpu_pagelist_fraction - changes the pcp->high for each zone on each
- * cpu.  It is the fraction of total pages in each zone that a hot per cpu pagelist
- * can have before it gets flushed back to buddy allocator.
- */
-
-int percpu_pagelist_fraction_sysctl_handler(ctl_table *table, int write,
-	struct file *file, void __user *buffer, size_t *length, loff_t *ppos)
-{
-	struct zone *zone;
-	unsigned int cpu;
-	int ret;
-
-	ret = proc_dointvec_minmax(table, write, file, buffer, length, ppos);
-	if (!write || (ret == -EINVAL))
-		return ret;
-	for_each_zone(zone) {
-		for_each_online_cpu(cpu) {
-			unsigned long  high;
-			high = zone->present_pages / percpu_pagelist_fraction;
-			setup_pagelist_highmark(zone_pcp(zone, cpu), high);
-		}
-	}
-	return 0;
-}
-
 __initdata int hashdist = HASHDIST_DEFAULT;
 
 #ifdef CONFIG_NUMA

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox