LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 4/7] [RFC][V2] enable BGP_L1_WRITETHROUGH mode for BG/P
From: Eric Van Hensbergen @ 2011-05-19 21:42 UTC (permalink / raw)
  To: linux-kernel; +Cc: linuxppc-dev, bg-linux
In-Reply-To: <1305753895-24845-4-git-send-email-ericvh@gmail.com>

BG/P nodes need to be configured for writethrough to work in SMP
configurations.  This patch adds the right hooks in the MMU code
to make sure BGP_L1_WRITETHROUGH configurations are setup for BG/P.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 arch/powerpc/include/asm/mmu-44x.h     |    2 ++
 arch/powerpc/kernel/head_44x.S         |   24 ++++++++++++++++++++++--
 arch/powerpc/kernel/misc_32.S          |   15 +++++++++++++++
 arch/powerpc/lib/copy_32.S             |   10 ++++++++++
 arch/powerpc/mm/44x_mmu.c              |    7 +++++--
 arch/powerpc/platforms/Kconfig         |    5 +++++
 arch/powerpc/platforms/Kconfig.cputype |    1 +
 7 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index bf52d70..ca1b90c 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -8,6 +8,7 @@
 
 #define PPC44x_MMUCR_TID	0x000000ff
 #define PPC44x_MMUCR_STS	0x00010000
+#define PPC44x_MMUCR_U2		0x00200000
 
 #define	PPC44x_TLB_PAGEID	0
 #define	PPC44x_TLB_XLAT		1
@@ -32,6 +33,7 @@
 
 /* Storage attribute and access control fields */
 #define PPC44x_TLB_ATTR_MASK	0x0000ff80
+#define PPC44x_TLB_WL1		0x00100000	/* Write-through L1 */
 #define PPC44x_TLB_U0		0x00008000      /* User 0 */
 #define PPC44x_TLB_U1		0x00004000      /* User 1 */
 #define PPC44x_TLB_U2		0x00002000      /* User 2 */
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 5e12b74..f10ac53 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -429,7 +429,16 @@ finish_tlb_load_44x:
 	andi.	r10,r12,_PAGE_USER		/* User page ? */
 	beq	1f				/* nope, leave U bits empty */
 	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
-1:	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
+1:
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+	andi.	r10, r11, PPC44x_TLB_I
+	bne	2f
+	oris    r11,r11,PPC44x_TLB_WL1@h	/* Add coherency for */
+						/* non-inhibited */
+	ori	r11,r11,PPC44x_TLB_U2|PPC44x_TLB_M
+2:
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
+	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
 
 	/* Done...restore registers and get out of here.
 	*/
@@ -799,7 +808,11 @@ skpinv:	addi	r4,r4,1				/* Increment */
 	sync
 
 	/* Initialize MMUCR */
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+	lis	r5, PPC44x_MMUCR_U2@h
+#else
 	li	r5,0
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
 	mtspr	SPRN_MMUCR,r5
 	sync
 
@@ -814,7 +827,14 @@ skpinv:	addi	r4,r4,1				/* Increment */
 	/* attrib fields */
 	/* Added guarded bit to protect against speculative loads/stores */
 	li	r5,0
-	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | \
+						PPC44x_TLB_G | PPC44x_TLB_U2)
+	oris	r5,r5,PPC44x_TLB_WL1@h
+#else
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | \
+			PPC44x_TLB_G)
+#endif /* CONFIG_BGP_L1_WRITETHROUGH
 
         li      r0,63                    /* TLB slot 63 */
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 094bd98..3f56d7b 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -506,7 +506,20 @@ _GLOBAL(clear_pages)
 	li	r0,PAGE_SIZE/L1_CACHE_BYTES
 	slw	r0,r0,r4
 	mtctr	r0
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+	/* assuming 32 byte cacheline */
+	li      r4, 0
+1:	stw     r4, 0(r3)
+	stw     r4, 4(r3)
+	stw     r4, 8(r3)
+	stw     r4, 12(r3)
+	stw     r4, 16(r3)
+	stw     r4, 20(r3)
+	stw     r4, 24(r3)
+	stw     r4, 28(r3)
+#else
 1:	dcbz	0,r3
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
 	addi	r3,r3,L1_CACHE_BYTES
 	bdnz	1b
 	blr
@@ -550,7 +563,9 @@ _GLOBAL(copy_page)
 	mtctr	r0
 1:
 	dcbt	r11,r4
+#ifndef CONFIG_BGP_L1_WRITETHROUGH
 	dcbz	r5,r3
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
 	COPY_16_BYTES
 #if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 55f19f9..552df54 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -98,7 +98,11 @@ _GLOBAL(cacheable_memzero)
 	bdnz	4b
 3:	mtctr	r9
 	li	r7,4
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+10:
+#else
 10:	dcbz	r7,r6
+#endif /* CONFIG_L1_WRITETHROUGH */
 	addi	r6,r6,CACHELINE_BYTES
 	bdnz	10b
 	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
@@ -187,7 +191,9 @@ _GLOBAL(cacheable_memcpy)
 	mtctr	r0
 	beq	63f
 53:
+#ifndef CONFIG_BGP_L1_WRITETHROUGH
 	dcbz	r11,r6
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
 	COPY_16_BYTES
 #if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES
@@ -368,7 +374,11 @@ _GLOBAL(__copy_tofrom_user)
 	mtctr	r8
 
 53:	dcbt	r3,r4
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+54:
+#else
 54:	dcbz	r11,r6
+#endif
 	.section __ex_table,"a"
 	.align	2
 	.long	54b,105f
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 024acab..f5c60b3 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -80,9 +80,12 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
 	:
 #ifdef CONFIG_PPC47x
 	: "r" (PPC47x_TLB2_S_RWX),
-#else
+#elseif CONFIG_BGP_L1_WRITETHROUGH
+	: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_WL1 \
+		| PPC44x_TLB_U2 | PPC44x_TLB_M),
+#else /* neither CONFIG_PPC47x or CONFIG_BGP_L1_WRITETHROUGH */
 	: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
-#endif
+#endif /* CONFIG_PPC47x */
 	  "r" (phys),
 	  "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
 	  "r" (entry),
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f7b0772..7defe94 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -348,4 +348,9 @@ config XILINX_PCI
 	bool "Xilinx PCI host bridge support"
 	depends on PCI && XILINX_VIRTEX
 
+config BGP_L1_WRITETHROUGH
+	bool "Blue Gene/P enabled writethrough mode"
+	depends on BGP
+	default y
+
 endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 1ae59c5..caa3bbf 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -333,6 +333,7 @@ config NOT_COHERENT_CACHE
 	bool
 	depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
 	default n if PPC_47x
+	default n if BGP
 	default y
 
 config CHECK_CACHE_COHERENCY
-- 
1.7.4.1

^ permalink raw reply related

* RE: Unable to handle kernel paging request for data at address 0x00000000
From: Burton Samograd @ 2011-05-19 21:52 UTC (permalink / raw)
  To: linuxppc-dev
In-Reply-To: <C5362BC88E4CB545B18EA43E89EC0D56024427BD@cgyexchg.Interalia.ca>

[-- Attachment #1: Type: text/plain, Size: 11909 bytes --]

Reply to myself,

 

Looked like I had HIMEM enabled.  Disabling it caused the problem to go
away.

 

Sorry for the noise.

 

--

Burton Samograd

 

 

________________________________

From: linuxppc-dev-bounces+bsamograd=interalia.com@lists.ozlabs.org
[mailto:linuxppc-dev-bounces+bsamograd=interalia.com@lists.ozlabs.org]
On Behalf Of Burton Samograd
Sent: May 19, 2011 2:12 PM
To: linuxppc-dev@lists.ozlabs.org
Subject: Unable to handle kernel paging request for data at address
0x00000000

 

Hello,

 

I'm trying to get 2.6.38.6 up and running on my ppc880 board and I've
run into a problem when the kernel is attempting to perform it's first
exec (/sbin/init).  I've tried to debug it as much as I can so I thought
I would post this to see if anybody has any ideas or suggestions about
what might be going wrong.

 

Following is a full output of the board bootup messages from U-boot and
the kernel:

 

------------------------------------------------------------------------
-----------------------------------------------

U-Boot 1.3.1 (May 19 2011 - 09:55:57)

 

CPU:   MPC885ZPnn at 131.666 MHz [40.0...133.0 MHz]

       8 kB I-Cache 8 kB D-Cache FEC present

DRAM:  64 MB

*** Warning - bad CRC, using default environment

 

In:    serial

Out:   serial

Err:   serial

Net:   FEC ETHERNET, FEC2 ETHERNET

IDE:   Hit any key to stop autoboot:  0

Using FEC ETHERNET device

TFTP from server 192.168.0.6; our IP address is 192.168.0.4

Filename 'uImage'.

Load address: 0xe00000

Loading: T
#################################################################

         ##############

done

Bytes transferred = 1155044 (119fe4 hex)

## Booting image at 00e00000 ...

   Image Name:   Linux-2.6.38.6sbx-dirty

   Image Type:   PowerPC Linux Kernel Image (gzip compressed)

   Data Size:    1154980 Bytes =  1.1 MB

   Load Address: 00400000

   Entry Point:  0040055c

   Verifying Checksum ... OK

   Uncompressing Kernel Image ... OK

## Current stack ends at 0x03F91AB8 => set upper limit to 0x00800000

## cmdline at 0x007FFF00 ... 0x007FFF79

memstart    = 0x00000000

memsize     = 0x04000000

flashstart  = 0x00000000

flashsize   = 0x00000000

flashoffset = 0x00000000

sramstart   = 0x00000000

sramsize    = 0x00000000

immr_base   = 0xFF000000

bootflags   = 0x00000001

intfreq     = 131.666 MHz

busfreq     = 65.833 MHz

ethaddr     = 00:07:07:00:4C:64

IP addr     = 192.168.0.4

baudrate    =  38400 bps

No initrd

## Transferring control to Linux (at address 0040055c) ...

Kernel Command Line: console=ttyCPM0,38400 root=/dev/nfs,rw
nfsroot=192.168.0.6:/sbx ip=192.168.0.15:192.168.0.6:::sbx:eth0:off
init=/bin/bash

Memory <- <0x0 0x4000000> (64MB)

ENET0: local-mac-address <- 00:07:07:00:4c:64

ENET1: local-mac-address <- 00:3b:f5:21:63:6f

CPU clock-frequency <- 0x7d912de (132MHz)

CPU timebase-frequency <- 0x3ec896 (4MHz)

CPU bus-frequency <- 0x3ec896f (66MHz)

 

zImage starting: loaded at 0x00400000 (sp: 0x03f91978)

Allocating 0x251480 bytes for kernel ...

gunzipping (0x00000000 <- 0x0040c000:0x0068b948)...done 0x23cde0 bytes

 

Linux/PowerPC load: console=ttyCPM0,38400 root=/dev/nfs,rw
nfsroot=192.168.0.6:/sbx ip=192.168.0.15:192.168.0.6:::sbx:eth0:off
init=/bin/bash

Finalizing device tree... flat tree at 0x698300

Using Freescale MPC885 ADS machine description

Linux version 2.6.38.6sbx-dirty (root@burton-fedora-14.interalia.com)
(gcc version 4.2.2) #43 Thu May 19 13:55:22 MDT 2011

Zone PFN ranges:

  DMA      0x00000000 -> 0x00000800

  Normal   empty

  HighMem  0x00000800 -> 0x00004000

Movable zone start PFN for each node

early_node_map[1] active PFN ranges

    0: 0x00000000 -> 0x00004000

MMU: Allocated 72 bytes of context maps for 16 contexts

Built 1 zonelists in Zone order, mobility grouping on.  Total pages:
16256

Kernel command line: console=ttyCPM0,38400 root=/dev/nfs,rw
nfsroot=192.168.0.6:/sbx ip=192.168.0.15:192.168.0.6:::sbx:eth0:off
init=/bin/bash

PID hash table entries: 32 (order: -5, 128 bytes)

Dentry cache hash table entries: 1024 (order: 0, 4096 bytes)

Inode-cache hash table entries: 1024 (order: 0, 4096 bytes)

Memory: 62544k/65536k available (2212k kernel code, 2992k reserved, 80k
data, 81k bss, 96k init)

Kernel virtual memory layout:

  * 0xfffcf000..0xfffff000  : fixmap

  * 0xff800000..0xffc00000  : highmem PTEs

  * 0xff600000..0xff800000  : consistent mem

  * 0xff5f8000..0xff600000  : early ioremap

  * 0xc1000000..0xff5f8000  : vmalloc & ioremap

SLUB: Genslabs=14, HWalign=16, Order=0-3, MinObjects=0, CPUs=1, Nodes=1

NR_IRQS:512 nr_irqs:512 16

Decrementer Frequency = 0x7d912d

clocksource: timebase mult[1e61377e] shift[22] registered

console [ttyCPM0] enabled

pid_max: default: 4096 minimum: 301

Mount-cache hash table entries: 512

NET: Registered protocol family 16

bio: create slab <bio-0> at 0

Switching to clocksource timebase

NET: Registered protocol family 2

Switched to NOHz mode on CPU #0

IP route cache hash table entries: 1024 (order: 0, 4096 bytes)

TCP established hash table entries: 512 (order: 0, 4096 bytes)

TCP bind hash table entries: 512 (order: -1, 2048 bytes)

TCP: Hash tables configured (established 512 bind 512)

TCP reno registered

NET: Registered protocol family 1

RPC: Registered udp transport module.

RPC: Registered tcp transport module.

RPC: Registered tcp NFSv4.1 backchannel transport module.

highmem bounce pool size: 64 pages

msgmni has been set to 16

io scheduler noop registered

io scheduler deadline registered (default)

ff000a80.serial: ttyCPM0 at MMIO 0xc1010a80 (irq = 19) is a CPM UART

ff000a90.serial: ttyCPM1 at MMIO 0xc1018a90 (irq = 24) is a CPM UART

Generic RTC Driver v1.07

Uniform Multi-Platform E-IDE driver

ide-gd driver 1.18

eth0: fs_enet: 00:07:07:00:4c:64

eth1: fs_enet: 00:3b:f5:21:63:6f

FEC MII Bus: probed

mdio_bus ff000e00: error probing PHY at address 3

TCP cubic registered

NET: Registered protocol family 17

IP-Config: Guessing netmask 255.255.255.0

IP-Config: Complete:

     device=eth0, addr=192.168.0.15, mask=255.255.255.0,
gw=255.255.255.255,

     host=sbx, domain=, nis-domain=(none),

     bootserver=192.168.0.6, rootserver=192.168.0.6, rootpath=

VFS: Mounted root (nfs filesystem) readonly on device 0:10.

Freeing unused kernel memory: 96k init

Failed to execute /bin/bash.  Attempting defaults...

Unable to handle kernel paging request for data at address 0x00000000

Faulting instruction address: 0xc000b77c

Oops: Kernel access of bad area, sig: 11 [#1]

Freescale MPC885 ADS

last sysfs file:

NIP: c000b77c LR: c000deb0 CTR: 00000100

REGS: c02e7cc0 TRAP: 0300   Not tainted  (2.6.38.6sbx-dirty)

MSR: 00009032 <EE,ME,IR,DR>  CR: 22248042  XER: 0000f800

DAR: 00000000, DSISR: c0000000

TASK = c02e4000[1] 'swapper' THREAD: c02e6000

GPR00: c000deac c02e7d70 c02e4000 00000000 00000100 c03ecffc 00000000
c022e8ec

GPR08: c02445a0 c02445a0 00009032 000005e0 22248042 10000900 22242044
22242044

GPR16: 22248044 00000000 00000000 00000001 c03e1000 c02e7e38 00000030
00000022

GPR24: c03e1000 c03e27fc c0240000 7ffffff1 c03ecffc 7ffffff1 03c05d21
c02cd0a0

NIP [c000b77c] __flush_dcache_icache+0x14/0x40

LR [c000deb0] flush_dcache_icache_page+0x14/0x24

Call Trace:

[c02e7d70] [c000deac] flush_dcache_icache_page+0x10/0x24 (unreliable)

[c02e7d80] [c000e0c8] set_pte_at+0x50/0x7c

[c02e7da0] [c0060548] handle_pte_fault+0x228/0x3ac

[c02e7dd0] [c00617d8] __get_user_pages+0x2a4/0x3cc

[c02e7e30] [c007536c] get_arg_page+0x40/0xb8

[c02e7e50] [c00755dc] copy_strings+0x114/0x2e8

[c02e7ea0] [c0075ca0] do_execve+0x120/0x254

[c02e7ee0] [c000719c] sys_execve+0x50/0x7c

[c02e7f00] [c000be6c] ret_from_syscall+0x0/0x38

[c02e7fc0] [00001032] 0x1032

[c02e7fd0] [c0002404] init_post+0x88/0xb8

[c02e7fe0] [c0211424] kernel_init+0x144/0x15c

[c02e7ff0] [c000b994] kernel_thread+0x4c/0x68

Instruction dump:

4d820020 7c8903a6 7c001bac 38630010 4200fff8 7c0004ac 4e800020 60000000

54630026 38800100 7c8903a6 7c661b78 <7c00186c> 38630010 4200fff8
7c0004ac

---[ end trace 5e2c5aac0577498f ]---

note: swapper[1] exited with preempt_count 1

BUG: scheduling while atomic: swapper/1/0x00000001

Call Trace:

[c02e7950] [c0006908] show_stack+0x50/0x154 (unreliable)

[c02e7990] [c0016b54] __schedule_bug+0x54/0x68

[c02e79a0] [c01b6560] schedule+0x60/0x344

[c02e7a90] [c01b6d84] schedule_timeout+0x148/0x17c

[c02e7ac0] [c00247c0] msleep_interruptible+0x1c/0x54

[c02e7ad0] [c01139d4] __uart_wait_until_sent+0x90/0xec

[c02e7af0] [c01156c0] uart_close+0x19c/0x2c4

[c02e7b40] [c0109cfc] tty_release+0x1b4/0x45c

[c02e7be0] [c00713ac] fput+0xa8/0x14c

[c02e7c00] [c006e77c] filp_close+0x78/0x90

[c02e7c20] [c001c18c] put_files_struct+0x88/0xe8

[c02e7c40] [c001d640] do_exit+0x168/0x544

[c02e7c80] [c0009528] die+0x184/0x19c

[c02e7ca0] [c000d7c0] bad_page_fault+0xe8/0xfc

[c02e7cb0] [c000c30c] handle_page_fault+0x7c/0x80

[c02e7d70] [c000deac] flush_dcache_icache_page+0x10/0x24

[c02e7d80] [c000e0c8] set_pte_at+0x50/0x7c

[c02e7da0] [c0060548] handle_pte_fault+0x228/0x3ac

[c02e7dd0] [c00617d8] __get_user_pages+0x2a4/0x3cc

[c02e7e30] [c007536c] get_arg_page+0x40/0xb8

[c02e7e50] [c00755dc] copy_strings+0x114/0x2e8

[c02e7ea0] [c0075ca0] do_execve+0x120/0x254

[c02e7ee0] [c000719c] sys_execve+0x50/0x7c

[c02e7f00] [c000be6c] ret_from_syscall+0x0/0x38

[c02e7fc0] [00001032] 0x1032

[c02e7fd0] [c0002404] init_post+0x88/0xb8

[c02e7fe0] [c0211424] kernel_init+0x144/0x15c

[c02e7ff0] [c000b994] kernel_thread+0x4c/0x68

BUG: scheduling while atomic: swapper/1/0x00000001

Call Trace:

[c02e7950] [c0006908] show_stack+0x50/0x154 (unreliable)

[c02e7990] [c0016b54] __schedule_bug+0x54/0x68

[c02e79a0] [c01b6560] schedule+0x60/0x344

[c02e7a90] [c01b6d84] schedule_timeout+0x148/0x17c

[c02e7ac0] [c00247c0] msleep_interruptible+0x1c/0x54

[c02e7ad0] [c01139d4] __uart_wait_until_sent+0x90/0xec

[c02e7af0] [c01156fc] uart_close+0x1d8/0x2c4

[c02e7b40] [c0109cfc] tty_release+0x1b4/0x45c

[c02e7be0] [c00713ac] fput+0xa8/0x14c

[c02e7c00] [c006e77c] filp_close+0x78/0x90

[c02e7c20] [c001c18c] put_files_struct+0x88/0xe8

[c02e7c40] [c001d640] do_exit+0x168/0x544

[c02e7c80] [c0009528] die+0x184/0x19c

[c02e7ca0] [c000d7c0] bad_page_fault+0xe8/0xfc

[c02e7cb0] [c000c30c] handle_page_fault+0x7c/0x80

[c02e7d70] [c000deac] flush_dcache_icache_page+0x10/0x24

[c02e7d80] [c000e0c8] set_pte_at+0x50/0x7c

[c02e7da0] [c0060548] handle_pte_fault+0x228/0x3ac

[c02e7dd0] [c00617d8] __get_user_pages+0x2a4/0x3cc

[c02e7e30] [c007536c] get_arg_page+0x40/0xb8

[c02e7e50] [c00755dc] copy_strings+0x114/0x2e8

[c02e7ea0] [c0075ca0] do_execve+0x120/0x254

[c02e7ee0] [c000719c] sys_execve+0x50/0x7c

[c02e7f00] [c000be6c] ret_from_syscall+0x0/0x38

[c02e7fc0] [00001032] 0x1032

[c02e7fd0] [c0002404] init_post+0x88/0xb8

[c02e7fe0] [c0211424] kernel_init+0x144/0x15c

[c02e7ff0] [c000b994] kernel_thread+0x4c/0x68

Kernel panic - not syncing: Attempted to kill init!

Rebooting in 180 seconds..

------------------------------------------------------------------------
-----------------------------------------------

 

Note: I specify /bin/bash for init on the command line (which currently
isn't present on my current root file system), but it's defaulting to
/sbin/init, which is present in my root filesystem.  This problem
occurred whether /bin/bash or /sbin/init were set.

 

Any advice or help appreciated.

 

--

Burton Samograd


[-- Attachment #2: Type: text/html, Size: 46631 bytes --]

^ permalink raw reply

* Re: [PATCH 3/7] [RFC] add support for BlueGene/P FPU
From: Eric Van Hensbergen @ 2011-05-19 21:55 UTC (permalink / raw)
  To: Michael Neuling; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <29601.1305840992@neuling.org>

Damnit Mikey, just after I hit send on [V2].....

On Thu, May 19, 2011 at 4:36 PM, Michael Neuling <mikey@neuling.org> wrote:
> In message <BANLkTimKhApFW8G1-pG0u_9Kv2YB0R1O0w@mail.gmail.com> you wrote=
:
>> On Thu, May 19, 2011 at 12:58 AM, Michael Neuling <mikey@neuling.org> wr=
ote=3D
>> :
>> > Eric,
>> >
>> >> This patch adds save/restore register support for the BlueGene/P
>> >> double hummer FPU.
>> >
>> > What does this mean? =3DA0Needs more details here.
>> >

okay, I've changed it a bit in [V2], if you want more I can do my best.

>> "Each of the two FPU units contains 32 64-bit floating point registers
>> for a total of 64 FP registers per processor." which would seem to
>> point to the kittyhawk version - but they have a second SAVE_32SFPRS
>> for the second hummer. =A0What wasn't clear to me with this version of
>> the code was whether or not they were doing something clever like
>> saving the pair of the 64-bit FPU registers in a single 128-bit slot
>> (seems plausible).
>
> Ok, sounds like there is 32*8*2 bytes of data, rather than the normal
> 32*8 bytes for FP only (ignoring VSX). =A0If this is the case, then you'l=
l
> need make 'fpr' in the thread struct bigger which you can do by setting
> TS_FPRWIDTH =3D 2 like we do for VSX.
>

Okay, I'll incorporate that into [V3].

> If there is some instruction that saves and restores two of these at a
> time (which LFPDX/STFPDX might I guess), then we can use that, otherwise
> we'll have to do 64 saves/restores. =A0Double load/stores will be faster
> I'm guessing though.

I assume that's true.

>
> If two at a time, do we need to increase the index in pairs?
>

I don't believe so.

>> If this is not the way to go, I can certainly
>> switch the kittyhawk version of the patch with the *, the extra
>> SAVE32SFPR and the extra double hummer specific storage space in the
>> thread_struct.
>
> I'd be tempted to keep it in the 'fpr' part of the struct so you can
> then access it with ptrace/signals/core dumps.
>
>> If it would help I can post an alternate version of the patch for
>> discussion with the kittyhawk version.
>
> Sure.
>

Kittyhawk version can be seen here:

http://git.kernel.org/?p=3Dlinux/kernel/git/ericvh/bluegene.git;a=3Dcommitd=
iff;h=3D94bffe786324b9bd07187b11afd836e3ec362d95

>
> The most useful thing would be to see the instruction definition for
> STFPDX/LFPDX.
>

https://wiki.alcf.anl.gov/images/d/d9/PPC440_FP2_arch.pdf

>>
>> >> =3DA0/*
>> >> diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platfo=
rms=3D
>> /44x/
>> > Kconfig
>> >> index f485fc5f..24a515e 100644
>> >> --- a/arch/powerpc/platforms/44x/Kconfig
>> >> +++ b/arch/powerpc/platforms/44x/Kconfig
>> >> @@ -169,6 +169,15 @@ config YOSEMITE
>> >> =3DA0 =3DA0 =3DA0 help
>> >> =3DA0 =3DA0 =3DA0 =3DA0 This option enables support for the AMCC PPC4=
40EP evalua=3D
>> tion board.
>> >>
>> >> +config =3DA0 =3DA0 =3DA0 BGP
>> >
>> > Does this FPU feature have a specific name like double hammer? =3DA0I'=
d
>> > rather have the BGP defconfig depend on PPC_FPU_DOUBLE_HUMMER, or
>> > something like that...
>> >
>> >> + =3DA0 =3DA0 bool "Blue Gene/P"
>> >> + =3DA0 =3DA0 depends on 44x
>> >> + =3DA0 =3DA0 default n
>> >> + =3DA0 =3DA0 select PPC_FPU
>> >> + =3DA0 =3DA0 select PPC_DOUBLE_FPU
>> >
>> > ... in fact, it seem you are doing something like these here but you
>> > don't use PPC_DOUBLE_FPU anywhere?
>> >
>>
>> A fair point. =A0I'm fine with calling it DOUBLE_HUMMER, but I wasn't su=
re if
>> that was "too internal" of a name for the kernel. =A0Let me know and
>> I'll fix it up.
>
> What I'm mostly concerned about is disassociating it with a particular
> CPU.
>
> If it has an external name, then all the better.
>

Since it isn't available on other chips, shoudl it just be PPC_BGP_FPU
or PPC_BGP_DOUBLE_FPU?

      -eric

^ permalink raw reply

* Re: Kernel cannot see PCI device
From: Benjamin Herrenschmidt @ 2011-05-19 23:12 UTC (permalink / raw)
  To: Bjorn Helgaas; +Cc: linux-pci, Prashant Bhole, linuxppc-dev
In-Reply-To: <BANLkTimoyzERHjw_kQ5SAbNWudWJoX7yMA@mail.gmail.com>

On Thu, 2011-05-19 at 11:58 -0600, Bjorn Helgaas wrote:
> The scan below PCIX0 (bus 0001:00) doesn't find anything.  You really
> need a powerpc expert to help here, but in their absence, my guess
> would be something's wrong with config space access, so I would start
> by just adding some printks to ppc4xx_probe_pcix_bridge() to see if
> the rsrc_cfg address looks reasonable.  You might need a chip spec or
> maybe you can compare it to the device tree (I have no idea what the
> relation between the device tree and OF is).
> 
> You mentioned the u-boot "pci 2" command earlier.  It found a device
> on bus 2, which means there must be at least one P2P bridge to get you
> from bus 0 to bus 2.  So the output of "pci 0", "pci 1", "pci 80", and
> "pci 81" (to compare with what Linux found) would be interesting.

Well, if it's PCIe, there's the "virtual" P2P bridge of the root
complex.

The question is on what PCIe is his device connected, the one that we
see or the one that's disabled in the device-tree.

In the former case, maybe something Linux does kills it. IE. we reset
and re-train the link on these chips afaik, at least from memory, check
what's happening in ppc4xx_pci.c there. Maybe the card doesn't like it
or we're doing something wrong....

In the later case, then something in the device-tree is saying Linux not
to touch that bridge, usually the "status" property.

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH 3/7] [RFC] add support for BlueGene/P FPU
From: Michael Neuling @ 2011-05-19 23:16 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <BANLkTi=rc5vZm3xAXHpHSxSH1wBWKhv92A@mail.gmail.com>

In message <BANLkTi=rc5vZm3xAXHpHSxSH1wBWKhv92A@mail.gmail.com> you wrote:
> Damnit Mikey, just after I hit send on [V2].....
> 
> On Thu, May 19, 2011 at 4:36 PM, Michael Neuling <mikey@neuling.org> wrote:
> > In message <BANLkTimKhApFW8G1-pG0u_9Kv2YB0R1O0w@mail.gmail.com> you wrote=
> :
> >> On Thu, May 19, 2011 at 12:58 AM, Michael Neuling <mikey@neuling.org> wr=
> ote=3D
> >> :
> >> > Eric,
> >> >
> >> >> This patch adds save/restore register support for the BlueGene/P
> >> >> double hummer FPU.
> >> >
> >> > What does this mean? =3DA0Needs more details here.
> >> >
> 
> okay, I've changed it a bit in [V2], if you want more I can do my best.

If you can describe the whole primary and secondary registers that'd be
cool.  ASCII art would be awesome! :-)


> 
> >> "Each of the two FPU units contains 32 64-bit floating point registers
> >> for a total of 64 FP registers per processor." which would seem to
> >> point to the kittyhawk version - but they have a second SAVE_32SFPRS
> >> for the second hummer. =A0What wasn't clear to me with this version of
> >> the code was whether or not they were doing something clever like
> >> saving the pair of the 64-bit FPU registers in a single 128-bit slot
> >> (seems plausible).
> >
> > Ok, sounds like there is 32*8*2 bytes of data, rather than the normal
> > 32*8 bytes for FP only (ignoring VSX). =A0If this is the case, then you'l=
> l
> > need make 'fpr' in the thread struct bigger which you can do by setting
> > TS_FPRWIDTH =3D 2 like we do for VSX.
> >
> 
> Okay, I'll incorporate that into [V3].
> 
> > If there is some instruction that saves and restores two of these at a
> > time (which LFPDX/STFPDX might I guess), then we can use that, otherwise
> > we'll have to do 64 saves/restores. =A0Double load/stores will be faster
> > I'm guessing though.
> 
> I assume that's true.
> 
> >
> > If two at a time, do we need to increase the index in pairs?
> >
> 
> I don't believe so.
> 
> >> If this is not the way to go, I can certainly
> >> switch the kittyhawk version of the patch with the *, the extra
> >> SAVE32SFPR and the extra double hummer specific storage space in the
> >> thread_struct.
> >
> > I'd be tempted to keep it in the 'fpr' part of the struct so you can
> > then access it with ptrace/signals/core dumps.
> >
> >> If it would help I can post an alternate version of the patch for
> >> discussion with the kittyhawk version.
> >
> > Sure.
> >
> 
> Kittyhawk version can be seen here:
> 
> http://git.kernel.org/?p=3Dlinux/kernel/git/ericvh/bluegene.git;a=3Dcommitd=
> iff;h=3D94bffe786324b9bd07187b11afd836e3ec362d95

OK.  I can see the secondary.

BTW I think it's buggy in a different way.

--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -51,6 +51,9 @@ _GLOBAL(load_up_fpu)
        toreal(r4)
        addi    r4,r4,THREAD            /* want last_task_used_math->thread */
        SAVE_32FPRS(0, r4)
+#ifdef CONFIG_DOUBLE_HUMMER
+        SAVE_32SFPRS(0, r10, r3)
+#endif /* CONFIG_DOUBLE_HUMMER */
        mffs    fr0
        stfd    fr0,THREAD_FPSCR(r4)
        PPC_LL  r5,PT_REGS(r4)
@@ -78,6 +81,9 @@ _GLOBAL(load_up_fpu)
        lfd     fr0,THREAD_FPSCR(r5)
        MTFSF_L(fr0)
        REST_32FPRS(0, r5)
+#ifdef  CONFIG_DOUBLE_HUMMER
+        REST_32SFPRS(0, r10, r5)
+#endif  /* CONFIG_DOUBLE_HUMMER */

REST uses r5 as the base in both cases (primary and secondary) which is
good.  SAVE uses r4 in the primary case and r3 in the secondary, which
is the wrong base. 

> 
> >
> > The most useful thing would be to see the instruction definition for
> > STFPDX/LFPDX.
> >
> 
> https://wiki.alcf.anl.gov/images/d/d9/PPC440_FP2_arch.pdf

stfpdx  does Primary->DW[EA]  Secondary->DW[EA+8]

I'm tempted to continue to use this and store the data in 'fpr' in the
thread_struct.  Doing it this way the primary register will continue to
be in the same location as before, which will mean ptrace etc will
continue to work at least for the primary.  The secondary will be
accessible using ptrace etc as well, but it'll be a bit of kludge
because it'll appear in the VSX location.

Putting the secondary register in a new area in the thread struct will
mean it's totally inaccessible for debugging without extra code in
ptrace.c/signals.c etc

We are going to need 16x spacing but you are doing to have to increase
the size using TS_FPRWIDTH = 2.

> >>
> >> >> =3DA0/*
> >> >> diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platfo=
> rms=3D
> >> /44x/
> >> > Kconfig
> >> >> index f485fc5f..24a515e 100644
> >> >> --- a/arch/powerpc/platforms/44x/Kconfig
> >> >> +++ b/arch/powerpc/platforms/44x/Kconfig
> >> >> @@ -169,6 +169,15 @@ config YOSEMITE
> >> >> =3DA0 =3DA0 =3DA0 help
> >> >> =3DA0 =3DA0 =3DA0 =3DA0 This option enables support for the AMCC PPC4=
> 40EP evalua=3D
> >> tion board.
> >> >>
> >> >> +config =3DA0 =3DA0 =3DA0 BGP
> >> >
> >> > Does this FPU feature have a specific name like double hammer? =3DA0I'=
> d
> >> > rather have the BGP defconfig depend on PPC_FPU_DOUBLE_HUMMER, or
> >> > something like that...
> >> >
> >> >> + =3DA0 =3DA0 bool "Blue Gene/P"
> >> >> + =3DA0 =3DA0 depends on 44x
> >> >> + =3DA0 =3DA0 default n
> >> >> + =3DA0 =3DA0 select PPC_FPU
> >> >> + =3DA0 =3DA0 select PPC_DOUBLE_FPU
> >> >
> >> > ... in fact, it seem you are doing something like these here but you
> >> > don't use PPC_DOUBLE_FPU anywhere?
> >> >
> >>
> >> A fair point. =A0I'm fine with calling it DOUBLE_HUMMER, but I wasn't su=
> re if
> >> that was "too internal" of a name for the kernel. =A0Let me know and
> >> I'll fix it up.
> >
> > What I'm mostly concerned about is disassociating it with a particular
> > CPU.
> >
> > If it has an external name, then all the better.
> >
> 
> Since it isn't available on other chips, shoudl it just be PPC_BGP_FPU
> or PPC_BGP_DOUBLE_FPU?

I'd probably still prefer it disassociated with the CPU name, but we are
really bike shedding here.  I'm not too fussed.

Mikey

^ permalink raw reply

* Re: Kernel cannot see PCI device
From: Bjorn Helgaas @ 2011-05-19 23:19 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-pci@vger.kernel.org, Prashant Bhole, linuxppc-dev
In-Reply-To: <1305846751.7481.55.camel@pasglop>

On Thu, May 19, 2011 at 5:12 PM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Thu, 2011-05-19 at 11:58 -0600, Bjorn Helgaas wrote:
>> The scan below PCIX0 (bus 0001:00) doesn't find anything. =A0You really
>> need a powerpc expert to help here, but in their absence, my guess
>> would be something's wrong with config space access, so I would start
>> by just adding some printks to ppc4xx_probe_pcix_bridge() to see if
>> the rsrc_cfg address looks reasonable. =A0You might need a chip spec or
>> maybe you can compare it to the device tree (I have no idea what the
>> relation between the device tree and OF is).
>>
>> You mentioned the u-boot "pci 2" command earlier. =A0It found a device
>> on bus 2, which means there must be at least one P2P bridge to get you
>> from bus 0 to bus 2. =A0So the output of "pci 0", "pci 1", "pci 80", and
>> "pci 81" (to compare with what Linux found) would be interesting.
>
> Well, if it's PCIe, there's the "virtual" P2P bridge of the root
> complex.
>
> The question is on what PCIe is his device connected, the one that we
> see or the one that's disabled in the device-tree.

I *think* the device Prashant is looking for ("02.00.00   0x1000
0x0072     Mass storage controller 0x00") is below the PCI-X bridge;
at least the canyonlands.dts he posted says that PCIX0 leads to buses
0-3f.

^ permalink raw reply

* Re: [PATCH 3/7] [RFC] add support for BlueGene/P FPU
From: Eric Van Hensbergen @ 2011-05-20  0:30 UTC (permalink / raw)
  To: Michael Neuling; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <6241.1305847001@neuling.org>

On Thu, May 19, 2011 at 6:16 PM, Michael Neuling <mikey@neuling.org> wrote:
> In message <BANLkTi=3Drc5vZm3xAXHpHSxSH1wBWKhv92A@mail.gmail.com> you wro=
te:
>> On Thu, May 19, 2011 at 4:36 PM, Michael Neuling <mikey@neuling.org> wro=
te:
>> > In message <BANLkTimKhApFW8G1-pG0u_9Kv2YB0R1O0w@mail.gmail.com> you wr=
ote=3D
>> :
>> >> On Thu, May 19, 2011 at 12:58 AM, Michael Neuling <mikey@neuling.org>=
 wr=3D
>> ote=3D3D
>> >> :
>> >> > Eric,
>> >> >
>> >> >> This patch adds save/restore register support for the BlueGene/P
>> >> >> double hummer FPU.
>> >> >
>> >> > What does this mean? =3D3DA0Needs more details here.
>> >> >
>>
>> okay, I've changed it a bit in [V2], if you want more I can do my best.
>
> If you can describe the whole primary and secondary registers that'd be
> cool. =A0ASCII art would be awesome! :-)
>

You sure you don't just want a bitfield.conf? :)  I'll do my best, but my
ASCII art isn't what it used to be.  I'll also include a reference to the P=
DF.

>> >
>> > Ok, sounds like there is 32*8*2 bytes of data, rather than the normal
>> > 32*8 bytes for FP only (ignoring VSX). =3DA0If this is the case, then =
you'l=3D
>> l
>> > need make 'fpr' in the thread struct bigger which you can do by settin=
g
>> > TS_FPRWIDTH =3D3D 2 like we do for VSX.
>> >

Okay - so basically what I have now and TS_FPRWIDTH=3D2 ?

>>
>> Since it isn't available on other chips, shoudl it just be PPC_BGP_FPU
>> or PPC_BGP_DOUBLE_FPU?
>
> I'd probably still prefer it disassociated with the CPU name, but we are
> really bike shedding here. =A0I'm not too fussed.
>

I'll leave it separate and switch it to PPC_FP2 (or would you prefer
PPC_FP2_FPU to make it clear) since the public PDF refers to it this way.

If that all sounds good, I'll spin [V3] tomorrow.

     -eric

^ permalink raw reply

* Re: [PATCH 2/7] [RFC] add bluegene entry to cputable
From: Benjamin Herrenschmidt @ 2011-05-20  0:35 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305753895-24845-2-git-send-email-ericvh@gmail.com>

On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
> ---
>  arch/powerpc/kernel/cputable.c |   14 ++++++++++++++
>  1 files changed, 14 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c
> index b9602ee..0eb245e 100644
> --- a/arch/powerpc/kernel/cputable.c
> +++ b/arch/powerpc/kernel/cputable.c
> @@ -1732,6 +1732,20 @@ static struct cpu_spec __initdata cpu_specs[] = {
>  		.machine_check		= machine_check_440A,
>  		.platform		= "ppc440",
>  	},
> +	{ /* Blue Gene/P */
> +		.pvr_mask		= 0xfffffff0,
> +		.pvr_value		= 0x52131880,
> +		.cpu_name		= "450 Blue Gene/P",
> +		.cpu_features		= CPU_FTRS_440x6,
> +		.cpu_user_features	= COMMON_USER_BOOKE |
> +						PPC_FEATURE_HAS_FPU,
> +		.mmu_features		= MMU_FTR_TYPE_44x,
> +		.icache_bsize		= 32,
> +		.dcache_bsize		= 32,
> +		.cpu_setup		= __setup_cpu_460gt,
                                            ^^^^^^^^^^^^^^^^^^
Are you sure ?

Cheers,
Ben.

> +		.machine_check		= machine_check_440A,
> +		.platform		= "ppc440",
> +	},
>  	{ /* 460EX */
>  		.pvr_mask		= 0xffff0006,
>  		.pvr_value		= 0x13020002,

^ permalink raw reply

* Re: [PATCH 5/7] [RFC] force 32-byte aligned kmallocs
From: Benjamin Herrenschmidt @ 2011-05-20  0:36 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305753895-24845-5-git-send-email-ericvh@gmail.com>

On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
> For BGP, it is convenient for 'kmalloc' to come back with 32-byte
> aligned units for torus DMA
> 
> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
> ---
>  arch/powerpc/include/asm/page_32.h |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
> index 68d73b2..fb0a7ae 100644
> --- a/arch/powerpc/include/asm/page_32.h
> +++ b/arch/powerpc/include/asm/page_32.h
> @@ -9,7 +9,7 @@
>  
>  #define VM_DATA_DEFAULT_FLAGS	VM_DATA_DEFAULT_FLAGS32
>  
> -#ifdef CONFIG_NOT_COHERENT_CACHE
> +#if defined(CONFIG_NOT_COHERENT_CACHE) || defined(CONFIG_BGP)
>  #define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
>  #endif

Is DMA cache coherent on BG/P ? That's odd for a 4xx base :-)

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH 6/7] [RFC] enable early TLBs for BG/P
From: Benjamin Herrenschmidt @ 2011-05-20  0:39 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305753895-24845-6-git-send-email-ericvh@gmail.com>

On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
> BG/P maps firmware with an early TLB

That's a bit gross. How often do you call that firmware in practice ?
Aren't you better off instead inserting a TLB entry for it when you call
it instead ? A simple tlbsx. + tlbwe sequence would do. That would free
up a TLB entry for normal use.

Cheers,
Ben.

> Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
> ---
>  arch/powerpc/include/asm/mmu-44x.h |    6 +++++-
>  1 files changed, 5 insertions(+), 1 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
> index ca1b90c..2807d6e 100644
> --- a/arch/powerpc/include/asm/mmu-44x.h
> +++ b/arch/powerpc/include/asm/mmu-44x.h
> @@ -115,8 +115,12 @@ typedef struct {
>  #endif /* !__ASSEMBLY__ */
>  
>  #ifndef CONFIG_PPC_EARLY_DEBUG_44x
> +#ifndef CONFIG_BGP
>  #define PPC44x_EARLY_TLBS	1
> -#else
> +#else /* CONFIG_BGP */
> +#define PPC44x_EARLY_TLBS	2
> +#endif /* CONFIG_BGP */
> +#else /* CONFIG_PPC_EARLY_DEBUG_44x */
>  #define PPC44x_EARLY_TLBS	2
>  #define PPC44x_EARLY_DEBUG_VIRTADDR	(ASM_CONST(0xf0000000) \
>  	| (ASM_CONST(CONFIG_PPC_EARLY_DEBUG_44x_PHYSLOW) & 0xffff))

^ permalink raw reply

* Re: [PATCH 3/7] [RFC] add support for BlueGene/P FPU
From: Michael Neuling @ 2011-05-20  0:43 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <BANLkTindzEFRjEGkc9bLccrFUdVtvbA73A@mail.gmail.com>

> >> >> > Eric,
> >> >> >
> >> >> >> This patch adds save/restore register support for the BlueGene/P
> >> >> >> double hummer FPU.
> >> >> >
> >> >> > What does this mean? =3DA0Needs more details here.
> >> >> >
> >>
> >> okay, I've changed it a bit in [V2], if you want more I can do my best.
> >
> > If you can describe the whole primary and secondary registers that'd be
> > cool. =A0ASCII art would be awesome! :-)
> >
> 
> You sure you don't just want a bitfield.conf? :)  

hehe, maybe an interpretive dance video posted on youtube?

> I'll do my best, but my ASCII art isn't what it used to be.  I'll also
> include a reference to the PDF.

Something self contained in the comments would be great as external
links tend to disappear.  

> >> > Ok, sounds like there is 32*8*2 bytes of data, rather than the normal
> >> > 32*8 bytes for FP only (ignoring VSX). If this is the case, then you'll
> >> > need make 'fpr' in the thread struct bigger which you can do by setting
> >> > TS_FPRWIDTH = 2 like we do for VSX.
> >> >
> 
> Okay - so basically what I have now and TS_FPRWIDTH=2 ?

Yes.

> >>
> >> Since it isn't available on other chips, shoudl it just be PPC_BGP_FPU
> >> or PPC_BGP_DOUBLE_FPU?
> >
> > I'd probably still prefer it disassociated with the CPU name, but we are
> > really bike shedding here. =A0I'm not too fussed.
> >
> 
> I'll leave it separate and switch it to PPC_FP2 (or would you prefer
> PPC_FP2_FPU to make it clear) since the public PDF refers to it this
> way.

PPC_FPU_FP2 would be my vote.

> If that all sounds good, I'll spin [V3] tomorrow.

Thanks!

Mikey

^ permalink raw reply

* Re: [PATCH 5/7] [RFC] force 32-byte aligned kmallocs
From: Eric Van Hensbergen @ 2011-05-20  0:47 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305851769.7481.90.camel@pasglop>

On Thu, May 19, 2011 at 7:36 PM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
>>
>> -#ifdef CONFIG_NOT_COHERENT_CACHE
>> +#if defined(CONFIG_NOT_COHERENT_CACHE) || defined(CONFIG_BGP)
>> =A0#define ARCH_DMA_MINALIGN =A0 =A0L1_CACHE_BYTES
>> =A0#endif
>
> Is DMA cache coherent on BG/P ? That's odd for a 4xx base :-)
>

My understanding of things (which could be totally wrong) is that the
DMA we care about on BG/P (namely the Torus and Collective networks)
is coherent at the L2.  Of course the change in question is talking
about L1_CACHE_BYTES, so my reading of this is that its a sleazy way
of getting aligned mallocs that make interactions with the tightly
coupled networks easier/more-efficient.  I'm open to alternative
suggestions.

      -eric

^ permalink raw reply

* Re: [PATCH 3/7] [RFC] add support for BlueGene/P FPU
From: Benjamin Herrenschmidt @ 2011-05-20  0:52 UTC (permalink / raw)
  To: Michael Neuling; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <425.1305784718@neuling.org>

On Thu, 2011-05-19 at 15:58 +1000, Michael Neuling wrote:

> > +
> >  #define SAVE_2GPRS(n, base)	SAVE_GPR(n, base); SAVE_GPR(n+1, base)
> >  #define SAVE_4GPRS(n, base)	SAVE_2GPRS(n, base); SAVE_2GPRS(n+2, base)
> >  #define SAVE_8GPRS(n, base)	SAVE_4GPRS(n, base); SAVE_4GPRS(n+4, base)
> > @@ -97,18 +104,26 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
> >  #define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
> >  #define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)
> >  
> > -#define SAVE_FPR(n, base)	stfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
> > -#define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
> > -#define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
> > -#define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
> > -#define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
> > -#define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
> > -#define REST_FPR(n, base)	lfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
> > -#define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
> > -#define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
> > -#define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
> > -#define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
> > -#define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
> > +#ifdef CONFIG_BGP
> > +#define SAVE_FPR(n, b, base)	li b, THREAD_FPR0+(16*(n)); STFPDX(n, base, b)
> > +#define REST_FPR(n, b, base)	li b, THREAD_FPR0+(16*(n)); LFPDX(n, base, b)
> 
> 16*?  Are these FP regs 64 or 128 bits wide?  If 128 you are doing to
> have to play with TS_WIDTH to get the size of the FPs correct in the
> thread_struct.
> 
> I think there's a bug here.

Regardless of that, btw, I don't think it's very sane to change those
macros that way. I'd rather have a separate set to save/restore the BG
stuff and separate code alltogether for loading/saving/flushing/etc...
like FSP SPE. The FPU save/restore code is already too complex as it is.

Also, should we aim to have this co-exist with other 4xx platforms in a
multiplatform kernel ? In that case it should not break the normal FP
case. Feel free to use CPU feature bits, there are 2 or 3 left available
in the 32-bit space, maybe pick a "combo" one for BGP (or one for hummer
and a MMU bit for the odd SMP tricks).

Hrm... thinking of which, what about doing it using the alternate
feature section ? This allows two "alternate" piece of codes to overlay,
the kernel will replace the original one with the alternative one if the
feature bits match. That way you can just stick an alternate around
SAVE/REST_32FPRS that replace them with your new SAVE/REST_32HFPRS (or
whatever you want to call you new set of macros).

Of course you'll probably need a separate area in the thread
struct/pt_regs etc... which mean a userspace ABI change, a change of the
sig context etc etc ....

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH 3/7] [RFC] add support for BlueGene/P FPU
From: Benjamin Herrenschmidt @ 2011-05-20  0:53 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: Michael Neuling, linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <BANLkTimKhApFW8G1-pG0u_9Kv2YB0R1O0w@mail.gmail.com>

On Thu, 2011-05-19 at 08:53 -0500, Eric Van Hensbergen wrote:
> On Thu, May 19, 2011 at 12:58 AM, Michael Neuling <mikey@neuling.org> wrote:
> > Eric,
> >
> >> This patch adds save/restore register support for the BlueGene/P
> >> double hummer FPU.
> >
> > What does this mean?  Needs more details here.
> >
> 
> Hi Mikey,
> 
> any specific details you are looking for here?  AFAIK these patches
> are required for the kernel to save/restore the double hummer
> properly.

A description of the double hummer would be good.

Cheers,
Ben.

> >>
> >> +#ifdef CONFIG_BGP
> >> +#define LFPDX(frt, ra, rb)   .long (31<<26)|((frt)<<21)|((ra)<<16)| \
> >> +                                                     ((rb)<<11)|(462<<1)
> >> +#define STFPDX(frt, ra, rb)  .long (31<<26)|((frt)<<21)|((ra)<<16)| \
> >> +                                                     ((rb)<<11)|(974<<1)
> >> +#endif /* CONFIG_BGP */
> >
> > Put these in arch/powerpc/include/asm/ppc-opcode.h and reformat to fit
> > whats there already.
> >
> > Also, don't need to put these defines inside a #ifdef.
> >
> 
> Sure, I'll fix that up.
> 
> >> +#ifdef CONFIG_BGP
> >> +#define SAVE_FPR(n, b, base) li b, THREAD_FPR0+(16*(n)); STFPDX(n, base, b)
> >> +#define REST_FPR(n, b, base) li b, THREAD_FPR0+(16*(n)); LFPDX(n, base, b)
> >
> > 16*?  Are these FP regs 64 or 128 bits wide?  If 128 you are doing to
> > have to play with TS_WIDTH to get the size of the FPs correct in the
> > thread_struct.
> >
> > I think there's a bug here.
> >
> 
> I actually have three different versions of this code from different
> source patches that I'm drawing from - so your help in figuring out
> the best way to approach this is appreciated.  The kittyhawk version
> of the code has 8* instead of 16*.  According to the docs:
> "Each of the two FPU units contains 32 64-bit floating point registers
> for a total of 64 FP registers per processor." which would seem to
> point to the kittyhawk version - but they have a second SAVE_32SFPRS
> for the second hummer.  What wasn't clear to me with this version of
> the code was whether or not they were doing something clever like
> saving the pair of the 64-bit FPU registers in a single 128-bit slot
> (seems plausible).  If this is not the way to go, I can certainly
> switch the kittyhawk version of the patch with the *, the extra
> SAVE32SFPR and the extra double hummer specific storage space in the
> thread_struct.  If it would help I can post an alternate version of
> the patch for discussion with the kittyhawk version.
> 
> >>  /*
> >> diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/
> > Kconfig
> >> index f485fc5f..24a515e 100644
> >> --- a/arch/powerpc/platforms/44x/Kconfig
> >> +++ b/arch/powerpc/platforms/44x/Kconfig
> >> @@ -169,6 +169,15 @@ config YOSEMITE
> >>       help
> >>         This option enables support for the AMCC PPC440EP evaluation board.
> >>
> >> +config       BGP
> >
> > Does this FPU feature have a specific name like double hammer?  I'd
> > rather have the BGP defconfig depend on PPC_FPU_DOUBLE_HUMMER, or
> > something like that...
> >
> >> +     bool "Blue Gene/P"
> >> +     depends on 44x
> >> +     default n
> >> +     select PPC_FPU
> >> +     select PPC_DOUBLE_FPU
> >
> > ... in fact, it seem you are doing something like these here but you
> > don't use PPC_DOUBLE_FPU anywhere?
> >
> 
> A fair point.  I'm fine with calling it DOUBLE_HUMMER, but I wasn't sure if
> that was "too internal" of a name for the kernel.  Let me know and
> I'll fix it up.
> I'll also change the CONFIG_BGP defines in the FPU code to PPC_DOUBLE_FPU
> or PPC_DOUBLE_HUMMER depending on what the community decides.
> 
> Thanks for the feedback!
> 
>         -eric
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply

* Re: [PATCH 4/7] [RFC] enable L1_WRITETHROUGH mode for BG/P
From: Benjamin Herrenschmidt @ 2011-05-20  1:01 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305753895-24845-4-git-send-email-ericvh@gmail.com>

On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
> BG/P nodes need to be configured for writethrough to work in SMP
> configurations.  This patch adds the right hooks in the MMU code
> to make sure L1_WRITETHROUGH configurations are setup for BG/P.

>  /* Storage attribute and access control fields */
>  #define PPC44x_TLB_ATTR_MASK	0x0000ff80
> +#define PPC44x_TLB_WL1		0x00100000	/* Write-through L1 */
>  #define PPC44x_TLB_U0		0x00008000      /* User 0 */
>  #define PPC44x_TLB_U1		0x00004000      /* User 1 */
>  #define PPC44x_TLB_U2		0x00002000      /* User 2 */
> diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
> index 5e12b74..1f7ae60 100644
> --- a/arch/powerpc/kernel/head_44x.S
> +++ b/arch/powerpc/kernel/head_44x.S
> @@ -429,7 +429,16 @@ finish_tlb_load_44x:
>  	andi.	r10,r12,_PAGE_USER		/* User page ? */
>  	beq	1f				/* nope, leave U bits empty */
>  	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
> -1:	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
> +1:
> +#ifdef CONFIG_L1_WRITETHROUGH
> +	andi.	r10, r11, PPC44x_TLB_I
> +	bne	2f
> +	oris    r11,r11,PPC44x_TLB_WL1@h	/* Add coherency for */
> +						/* non-inhibited */
> +	ori	r11,r11,PPC44x_TLB_U2|PPC44x_TLB_M
> +2:
> +#endif /* CONFIG_L1_WRITETHROUGH */

Make it an MMU feature so it's done at runtime rather than compile time.

Also, you should aim toward avoiding that conditional branch in such a
critical hot path :-) A way to do so would be to shove these in the PTE
instead, there's plenty of unused bits in the top part for example.

> +	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
>  
>  	/* Done...restore registers and get out of here.
>  	*/
> @@ -799,7 +808,11 @@ skpinv:	addi	r4,r4,1				/* Increment */
>  	sync
>  
>  	/* Initialize MMUCR */
> +#ifdef CONFIG_L1_WRITETHROUGH
> +	lis	r5, PPC44x_MMUCR_U2@h
> +#else
>  	li	r5,0
> +#endif /* CONFIG_L1_WRITETHROUGH */
>  	mtspr	SPRN_MMUCR,r5
>  	sync
>  
> @@ -814,7 +827,14 @@ skpinv:	addi	r4,r4,1				/* Increment */
>  	/* attrib fields */
>  	/* Added guarded bit to protect against speculative loads/stores */
>  	li	r5,0
> -	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
> +#ifdef CONFIG_L1_WRITETHROUGH
> +	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | \
> +						PPC44x_TLB_G | PPC44x_TLB_U2)
> +	oris	r5,r5,PPC44x_TLB_WL1@h
> +#else
> +	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | \
> +			PPC44x_TLB_G)
> +#endif /* CONFIG_L1_WRITETHROUGH
>  
>          li      r0,63                    /* TLB slot 63 */
>  
> diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
> index 094bd98..d88369b 100644
> --- a/arch/powerpc/kernel/misc_32.S
> +++ b/arch/powerpc/kernel/misc_32.S
> @@ -506,7 +506,20 @@ _GLOBAL(clear_pages)
>  	li	r0,PAGE_SIZE/L1_CACHE_BYTES
>  	slw	r0,r0,r4
>  	mtctr	r0
> +#ifdef CONFIG_L1_WRITETHROUGH
> +	/* assuming 32 byte cacheline */
> +	li      r4, 0
> +1:	stw     r4, 0(r3)
> +	stw     r4, 4(r3)
> +	stw     r4, 8(r3)
> +	stw     r4, 12(r3)
> +	stw     r4, 16(r3)
> +	stw     r4, 20(r3)
> +	stw     r4, 24(r3)
> +	stw     r4, 28(r3)
> +#else
>  1:	dcbz	0,r3
> +#endif /* CONFIG_L1_WRITETHROUGH */

wtf ? dcbz doesn't work ? yuck ! This isn't a HW design, it's a hack :-)

make it an mmu feature btw, as I said, I'd like to keep it a unified
kernel.

>  	addi	r3,r3,L1_CACHE_BYTES
>  	bdnz	1b
>  	blr
> @@ -550,7 +563,9 @@ _GLOBAL(copy_page)
>  	mtctr	r0
>  1:
>  	dcbt	r11,r4
> +#ifndef CONFIG_L1_WRITETHROUGH
>  	dcbz	r5,r3
> +#endif
>  	COPY_16_BYTES
>  #if L1_CACHE_BYTES >= 32
>  	COPY_16_BYTES
> diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
> index 55f19f9..98a07e3 100644
> --- a/arch/powerpc/lib/copy_32.S
> +++ b/arch/powerpc/lib/copy_32.S
> @@ -98,7 +98,11 @@ _GLOBAL(cacheable_memzero)
>  	bdnz	4b
>  3:	mtctr	r9
>  	li	r7,4
> +#ifdef CONFIG_L1_WRITETHROUGH
> +10:
> +#else
>  10:	dcbz	r7,r6
> +#endif /* CONFIG_L1_WRITETHROUGH */
>  	addi	r6,r6,CACHELINE_BYTES
>  	bdnz	10b
>  	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
> @@ -187,7 +191,9 @@ _GLOBAL(cacheable_memcpy)
>  	mtctr	r0
>  	beq	63f
>  53:
> +#ifndef CONFIG_L1_WRITETHROUGH
>  	dcbz	r11,r6
> +#endif /* CONFIG_L1_WRITETHROUGH */
>  	COPY_16_BYTES
>  #if L1_CACHE_BYTES >= 32
>  	COPY_16_BYTES
> @@ -368,7 +374,11 @@ _GLOBAL(__copy_tofrom_user)
>  	mtctr	r8
>  
>  53:	dcbt	r3,r4
> +#ifdef CONFIG_L1_WRITETHROUGH
> +54:
> +#else
>  54:	dcbz	r11,r6
> +#endif
>  	.section __ex_table,"a"
>  	.align	2
>  	.long	54b,105f
> diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
> index 024acab..b684c8a 100644
> --- a/arch/powerpc/mm/44x_mmu.c
> +++ b/arch/powerpc/mm/44x_mmu.c
> @@ -80,9 +80,12 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
>  	:
>  #ifdef CONFIG_PPC47x
>  	: "r" (PPC47x_TLB2_S_RWX),
> -#else
> +#elseif CONFIG_L1_WRITETHROUGH
> +	: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_WL1 \
> +		| PPC44x_TLB_U2 | PPC44x_TLB_M),
> +#else /* neither CONFIG_PPC47x or CONFIG_L1_WRITETHROUGH */
>  	: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
> -#endif
> +#endif /* CONFIG_PPC47x */
>  	  "r" (phys),
>  	  "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
>  	  "r" (entry),
> diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
> index f7b0772..684a281 100644
> --- a/arch/powerpc/platforms/Kconfig
> +++ b/arch/powerpc/platforms/Kconfig
> @@ -348,4 +348,9 @@ config XILINX_PCI
>  	bool "Xilinx PCI host bridge support"
>  	depends on PCI && XILINX_VIRTEX
>  
> +config L1_WRITETHROUGH
> +	bool "Blue Gene/P enabled writethrough mode"
> +	depends on BGP
> +	default y
> +
>  endmenu
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index 111138c..3a3c711 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -329,9 +329,13 @@ config NOT_COHERENT_CACHE
>  	bool
>  	depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
>  	default n if PPC_47x
> +	default n if BGP
>  	default y
>  
>  config CHECK_CACHE_COHERENCY
>  	bool
>  
> +config L1_WRITETHROUGH
> +	bool
> +
>  endmenu

^ permalink raw reply

* Re: [PATCH 7/7] [RFC] SMP support code
From: Benjamin Herrenschmidt @ 2011-05-20  1:05 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305753895-24845-7-git-send-email-ericvh@gmail.com>

On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:

> +#ifdef CONFIG_BGP
> +/*
> + * The icbi instruction does not broadcast to all cpus in the ppc450
> + * processor used by Blue Gene/P.  It is unlikely this problem will
> + * be exhibited in other processors so this remains ifdef'ed for BGP
> + * specifically.
> + *
> + * We deal with this by marking executable pages either writable, or
> + * executable, but never both.  The permissions will fault back and
> + * forth if the thread is actively writing to executable sections.
> + * Each time we fault to become executable we flush the dcache into
> + * icache on all cpus.
> + *

I know that hack :-) I think I wrote it even (or a version of it, that
was a long time ago) ;-) That doesn't make it pretty tho ...
> 

> +struct bgp_fixup_parm {
> +	struct page		*page;
> +	unsigned long		address;
> +	struct vm_area_struct	*vma;
> +};
> +
> +static void bgp_fixup_cache_tlb(void *parm)
> +{
> +	struct bgp_fixup_parm	*p = parm;
> +
> +	if (!PageHighMem(p->page))
> +		flush_dcache_icache_page(p->page);
> +	local_flush_tlb_page(p->vma, p->address);
> +}
> +
> +static void bgp_fixup_access_perms(struct vm_area_struct *vma,
> +				  unsigned long address,
> +				  int is_write, int is_exec)
> +{
> +	struct mm_struct *mm = vma->vm_mm;
> +	pte_t *ptep = NULL;
> +	pmd_t *pmdp;
> +
> +	if (get_pteptr(mm, address, &ptep, &pmdp)) {
> +		spinlock_t *ptl = pte_lockptr(mm, pmdp);
> +		pte_t old;
> +
> +		spin_lock(ptl);
> +		old = *ptep;
> +		if (pte_present(old)) {
> +			struct page *page = pte_page(old);
> +
> +			if (is_exec) {
> +				struct bgp_fixup_parm param = {
> +					.page		= page,
> +					.address	= address,
> +					.vma		= vma,
> +				};
> +				pte_update(ptep, _PAGE_HWWRITE, 0);
> +				on_each_cpu(bgp_fixup_cache_tlb, &param, 1);

Gotta be very careful with on_each_cpu() done within a lock. I wonder if
we could fast-path & simplify that using crits, is there a way to shoot
criticial IPIs to the other cores ? Might even be able in this case to
do it entirely in asm in the page fault path.

> +				pte_update(ptep, 0, _PAGE_EXEC);
> +				pte_unmap_unlock(ptep, ptl);
> +				return;
> +			}
> +			if (is_write &&
> +			    (pte_val(old) & _PAGE_RW) &&
> +			    (pte_val(old) & _PAGE_DIRTY) &&
> +			    !(pte_val(old) & _PAGE_HWWRITE)) {
> +				pte_update(ptep, _PAGE_EXEC, _PAGE_HWWRITE);
> +			}
> +		}
> +		if (!pte_same(old, *ptep))
> +			flush_tlb_page(vma, address);
> +		pte_unmap_unlock(ptep, ptl);
> +	}
> +}
> +#endif /* CONFIG_BGP */
> +
>  /*
>   * For 600- and 800-family processors, the error_code parameter is DSISR
>   * for a data fault, SRR1 for an instruction fault. For 400-family processors
> @@ -333,6 +404,12 @@ good_area:
>  		perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, 0,
>  				     regs, address);
>  	}
> +
> +#ifdef CONFIG_BGP
> +	/* Fixup _PAGE_EXEC and _PAGE_HWWRITE if necessary */
> +	bgp_fixup_access_perms(vma, address, is_write, is_exec);
> +#endif /* CONFIG_BGP */
> +
>  	up_read(&mm->mmap_sem);
>  	return 0;
>  
> diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
> index 3a3c711..b77a25f 100644
> --- a/arch/powerpc/platforms/Kconfig.cputype
> +++ b/arch/powerpc/platforms/Kconfig.cputype
> @@ -300,7 +300,7 @@ config PPC_PERF_CTRS
>           This enables the powerpc-specific perf_event back-end.
>  
>  config SMP
> -	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x
> +	depends on PPC_BOOK3S || PPC_BOOK3E || FSL_BOOKE || PPC_47x || BGP
>  	bool "Symmetric multi-processing support"
>  	---help---
>  	  This enables support for systems with more than one CPU. If you have

^ permalink raw reply

* Re: [PATCH 2/7] [RFC] add bluegene entry to cputable
From: Eric Van Hensbergen @ 2011-05-20  1:08 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305851709.7481.89.camel@pasglop>

On Thu, May 19, 2011 at 7:35 PM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
>> + =A0 =A0 =A0 =A0 =A0 =A0 .dcache_bsize =A0 =A0 =A0 =A0 =A0 =3D 32,
>> + =A0 =A0 =A0 =A0 =A0 =A0 .cpu_setup =A0 =A0 =A0 =A0 =A0 =A0 =A0=3D __se=
tup_cpu_460gt,
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
=A0 =A0 =A0 =A0^^^^^^^^^^^^^^^^^^
> Are you sure ?
>

That surprised me too, I figured it must have been a close enough
match (at least in the 2.6.29.1 time frame which is where I'm trying
to merge the BG/P patches up from.  The kittyhawk patches don't even
use this, so its possible we could just remove it.


_GLOBAL(__setup_cpu_460ex)
_GLOBAL(__setup_cpu_460gt)
        mflr    r4
        bl      __init_fpu_44x
        bl      __fixup_440A_mcheck
        mtlr    r4
        blr

Looks like the 460 setup invokes a bunch of 440 calls!  Would you
prefer I setup my own entry point (setup_cpu_bgp or setup_cpu_450)
which makes the same calls?

     -eric

^ permalink raw reply

* Re: [PATCH] agp/uninorth: Fix lockups with radeon KMS and >1x.
From: Benjamin Herrenschmidt @ 2011-05-20  1:10 UTC (permalink / raw)
  To: Michel Dänzer; +Cc: linuxppc-dev, Dave Airlie, dri-devel
In-Reply-To: <1305814119-32288-1-git-send-email-michel@daenzer.net>

On Thu, 2011-05-19 at 16:08 +0200, Michel Dänzer wrote:
> From: Michel Dänzer <daenzer@vmware.com>
> 
> This was based on a description by Ben Herrenschmidt:
> 
> > I've removed that SBA reset from the normal TLB invalidation path and
> > left it only once after turning AGP on.
> 
> About six months ago, he said:
> 
> > I did it a bit differently, but yeah, you get the idea. I'm doing a
> > patch series so don't bother pushing things too hard yet.
> 
> But I haven't seen anything from him about this since then, and people are
> regularly hitting these lockups, so here we are...
> 
> Signed-off-by: Michel Dänzer <daenzer@vmware.com>

Oops. I do have a pile of patches, but I never got something "stable"
enough and got distracted by more important stuff. Dave, please merge
this for now.

Acked-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>

Thanks !

Cheers,
Ben.

> ---
>  drivers/char/agp/uninorth-agp.c |    2 +-
>  1 files changed, 1 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/char/agp/uninorth-agp.c b/drivers/char/agp/uninorth-agp.c
> index 47c2218..55af723 100644
> --- a/drivers/char/agp/uninorth-agp.c
> +++ b/drivers/char/agp/uninorth-agp.c
> @@ -80,7 +80,7 @@ static void uninorth_tlbflush(struct agp_memory *mem)
>  			       ctrl | UNI_N_CFG_GART_INVAL);
>  	pci_write_config_dword(agp_bridge->dev, UNI_N_CFG_GART_CTRL, ctrl);
>  
> -	if (uninorth_rev <= 0x30) {
> +	if (!mem && uninorth_rev <= 0x30) {
>  		pci_write_config_dword(agp_bridge->dev, UNI_N_CFG_GART_CTRL,
>  				       ctrl | UNI_N_CFG_GART_2xRESET);
>  		pci_write_config_dword(agp_bridge->dev, UNI_N_CFG_GART_CTRL,

^ permalink raw reply

* Re: [PATCH 6/7] [RFC] enable early TLBs for BG/P
From: Eric Van Hensbergen @ 2011-05-20  1:21 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305851941.7481.92.camel@pasglop>

On Thu, May 19, 2011 at 7:39 PM, Benjamin Herrenschmidt
<benh@kernel.crashing.org> wrote:
> On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
>> BG/P maps firmware with an early TLB
>
> That's a bit gross. How often do you call that firmware in practice ?
> Aren't you better off instead inserting a TLB entry for it when you call
> it instead ? A simple tlbsx. + tlbwe sequence would do. That would free
> up a TLB entry for normal use.
>

Well, it depends on who you talk to.  The production software BG/P
guys use the firmware
constantly, its the primary interface to the networks, the console,
and the management software
which runs the machine.  As such the IO Node guys, the Compute Node
Kernel guys and the
ZeptoOS guys use it quite a bit.  The kittyhawk guys on the other hand
barely use it at all, in fact
I believe they do all the interaction with it during uboot and then shut it off.

IIRC, the sticky question is RAS support, there are certain things it
wants to jump to firmware
to deal with and expects things to be mapped an pinned into memory.
Furthermore, I think it
may make assumptions about where in the TLB the mappings are.  Since
the kittyhawk guys
obviously ignore this by shutting it down, its not clear just how
important this is.  I'm game to
try the dynamic mapping as you suggest if you would prefer it.

Its worth mentioning that I believe with BG/Q, the plan is to rely on
the firmware even more
extensively, but I haven't looked at any of the code yet to verify
whether or not this is true.

     -eric

^ permalink raw reply

* Re: [bg-linux] [PATCH 5/7] [RFC] force 32-byte aligned kmallocs
From: Kazutomo Yoshii @ 2011-05-20  1:32 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305851769.7481.90.camel@pasglop>

[-- Attachment #1: Type: text/plain, Size: 1361 bytes --]

On 05/19/2011 07:36 PM, Benjamin Herrenschmidt wrote:
> On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
>    
>> For BGP, it is convenient for 'kmalloc' to come back with 32-byte
>> aligned units for torus DMA
>>
>> Signed-off-by: Eric Van Hensbergen<ericvh@gmail.com>
>> ---
>>   arch/powerpc/include/asm/page_32.h |    2 +-
>>   1 files changed, 1 insertions(+), 1 deletions(-)
>>
>> diff --git a/arch/powerpc/include/asm/page_32.h b/arch/powerpc/include/asm/page_32.h
>> index 68d73b2..fb0a7ae 100644
>> --- a/arch/powerpc/include/asm/page_32.h
>> +++ b/arch/powerpc/include/asm/page_32.h
>> @@ -9,7 +9,7 @@
>>
>>   #define VM_DATA_DEFAULT_FLAGS	VM_DATA_DEFAULT_FLAGS32
>>
>> -#ifdef CONFIG_NOT_COHERENT_CACHE
>> +#if defined(CONFIG_NOT_COHERENT_CACHE) || defined(CONFIG_BGP)
>>   #define ARCH_DMA_MINALIGN	L1_CACHE_BYTES
>>   #endif
>>      
> Is DMA cache coherent on BG/P ? That's odd for a 4xx base :-)
>    

Actually DMA sends invalidate requests to the snoop unit(L2 level).
BGP SoC is a bit different from other 4xx base.

Some details can be found in
www.scc.acad.bg/documentation/team.pdf

- kaz

> Cheers,
> Ben.
>
>
>
> _______________________________________________
> bg-linux mailing list
> bg-linux@lists.anl-external.org
> https://lists.anl-external.org/mailman/listinfo/bg-linux
> http://bg-linux.anl-external.org/wiki
>    


[-- Attachment #2: Type: text/html, Size: 2316 bytes --]

^ permalink raw reply

* Re: [PATCH 5/7] [RFC] force 32-byte aligned kmallocs
From: Benjamin Herrenschmidt @ 2011-05-20  1:50 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <BANLkTimc3jQTOJdqAiqbPxGQNVWe39VqfQ@mail.gmail.com>

On Thu, 2011-05-19 at 19:47 -0500, Eric Van Hensbergen wrote:
> On Thu, May 19, 2011 at 7:36 PM, Benjamin Herrenschmidt
> <benh@kernel.crashing.org> wrote:
> > On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
> >>
> >> -#ifdef CONFIG_NOT_COHERENT_CACHE
> >> +#if defined(CONFIG_NOT_COHERENT_CACHE) || defined(CONFIG_BGP)
> >>  #define ARCH_DMA_MINALIGN    L1_CACHE_BYTES
> >>  #endif
> >
> > Is DMA cache coherent on BG/P ? That's odd for a 4xx base :-)
> >
> 
> My understanding of things (which could be totally wrong) is that the
> DMA we care about on BG/P (namely the Torus and Collective networks)
> is coherent at the L2.  Of course the change in question is talking
> about L1_CACHE_BYTES, so my reading of this is that its a sleazy way
> of getting aligned mallocs that make interactions with the tightly
> coupled networks easier/more-efficient.  I'm open to alternative
> suggestions.

But if it's not coherent with L1, then you sould have
CONFIG_NOT_COHERENT_CACHE set and not need that patch... or am I missing
something ?

One thing we should do some day as well is make that whole non-coherent
be runtime selected, on the list of things to fix 440+47x in the same
kernel. Pfiew....

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH 2/7] [RFC] add bluegene entry to cputable
From: Benjamin Herrenschmidt @ 2011-05-20  1:50 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <BANLkTimEqYnTn10rV2=gfDkXfgxX+2OSCg@mail.gmail.com>

On Thu, 2011-05-19 at 20:08 -0500, Eric Van Hensbergen wrote:
> On Thu, May 19, 2011 at 7:35 PM, Benjamin Herrenschmidt
> <benh@kernel.crashing.org> wrote:
> > On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
> >> +             .dcache_bsize           = 32,
> >> +             .cpu_setup              = __setup_cpu_460gt,
> >                                            ^^^^^^^^^^^^^^^^^^
> > Are you sure ?
> >
> 
> That surprised me too, I figured it must have been a close enough
> match (at least in the 2.6.29.1 time frame which is where I'm trying
> to merge the BG/P patches up from.  The kittyhawk patches don't even
> use this, so its possible we could just remove it.
>
> _GLOBAL(__setup_cpu_460ex)
> _GLOBAL(__setup_cpu_460gt)
>         mflr    r4
>         bl      __init_fpu_44x
>         bl      __fixup_440A_mcheck
>         mtlr    r4
>         blr
> 
> Looks like the 460 setup invokes a bunch of 440 calls!  Would you
> prefer I setup my own entry point (setup_cpu_bgp or setup_cpu_450)
> which makes the same calls?

Yes, add an entry. 460's are just 440's btw :-)

Cheers,
Ben.

^ permalink raw reply

* Re: [PATCH 6/7] [RFC] enable early TLBs for BG/P
From: Benjamin Herrenschmidt @ 2011-05-20  1:54 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <BANLkTinAqeu7C+U_HFrOAOctFTuSUwG-iA@mail.gmail.com>

On Thu, 2011-05-19 at 20:21 -0500, Eric Van Hensbergen wrote:
> On Thu, May 19, 2011 at 7:39 PM, Benjamin Herrenschmidt
> <benh@kernel.crashing.org> wrote:
> > On Wed, 2011-05-18 at 16:24 -0500, Eric Van Hensbergen wrote:
> >> BG/P maps firmware with an early TLB
> >
> > That's a bit gross. How often do you call that firmware in practice ?
> > Aren't you better off instead inserting a TLB entry for it when you call
> > it instead ? A simple tlbsx. + tlbwe sequence would do. That would free
> > up a TLB entry for normal use.
> >
> 
> Well, it depends on who you talk to.  The production software BG/P
> guys use the firmware constantly, its the primary interface to the networks, the console,
> and the management software which runs the machine.

Yuck.

> As such the IO Node guys, the Compute Node Kernel guys and the
> ZeptoOS guys use it quite a bit.  The kittyhawk guys on the other hand
> barely use it at all, in fact I believe they do all the interaction with
> it during uboot and then shut it off.

I would prefer that approach.

> IIRC, the sticky question is RAS support, there are certain things it
> wants to jump to firmware to deal with and expects things to be mapped
> an pinned into memory.
>
> Furthermore, I think it may make assumptions about where in the TLB the
> mappings are.  

This is gross, especially on a system with only 64 SW loaded TLB
entries :-(

> Since the kittyhawk guys
> obviously ignore this by shutting it down, its not clear just how
> important this is.  I'm game to
> try the dynamic mapping as you suggest if you would prefer it.

I would yes, we can sort things out later for RAS.

> Its worth mentioning that I believe with BG/Q, the plan is to rely on
> the firmware even more extensively, but I haven't looked at any of the code yet to verify
> whether or not this is true.

This is tantamount to linking a binary blob with the kernel ... it's a
fine line. At some point we might refuse the patches if they go too far
in that direction.

Cheers,
Ben.

>      -eric
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/

^ permalink raw reply

* Re: [bg-linux] [PATCH 5/7] [RFC] force 32-byte aligned kmallocs
From: Benjamin Herrenschmidt @ 2011-05-20  2:08 UTC (permalink / raw)
  To: Kazutomo Yoshii; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <4DD5C49E.4050103@gmail.com>

On Thu, 2011-05-19 at 20:32 -0500, Kazutomo Yoshii wrote:
> 
> Actually DMA sends invalidate requests to the snoop unit(L2 level).
> BGP SoC is a bit different from other 4xx base. 

Well, some other 44x also have a snooping L2 (more or less), but L1 is
usually the problem.

Cheers,
Ben.

^ permalink raw reply

* Re: [bg-linux] [PATCH 5/7] [RFC] force 32-byte aligned kmallocs
From: Benjamin Herrenschmidt @ 2011-05-20  2:13 UTC (permalink / raw)
  To: Kazutomo Yoshii; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <1305857301.7481.121.camel@pasglop>

On Fri, 2011-05-20 at 12:08 +1000, Benjamin Herrenschmidt wrote:
> On Thu, 2011-05-19 at 20:32 -0500, Kazutomo Yoshii wrote:
> > 
> > Actually DMA sends invalidate requests to the snoop unit(L2 level).
> > BGP SoC is a bit different from other 4xx base. 
> 
> Well, some other 44x also have a snooping L2 (more or less), but L1 is
> usually the problem.

Hrm... looking at the doco you pointed me to, it looks like the L1 -is-
coherent, it gets snoop kills from the L2.

Ok so we do need to make this non-coherent cache stuff a runtime option
(well, we need that for 440+476 too anyways so may as well do it now).

As for the alignment of kmalloc, it looks like a hack that should be
done in the torus code itself.

Cheers,
Ben.

^ permalink raw reply


This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox