LinuxPPC-Dev Archive on lore.kernel.org

LinuxPPC-Dev Archive on lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 4/7] [RFC][V2] enable BGP_L1_WRITETHROUGH mode for BG/P
From: Eric Van Hensbergen @ 2011-05-19 21:42 UTC (permalink / raw)
  To: linux-kernel; +Cc: linuxppc-dev, bg-linux
In-Reply-To: <1305753895-24845-4-git-send-email-ericvh@gmail.com>

BG/P nodes need to be configured for writethrough to work in SMP
configurations.  This patch adds the right hooks in the MMU code
to make sure BGP_L1_WRITETHROUGH configurations are setup for BG/P.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 arch/powerpc/include/asm/mmu-44x.h     |    2 ++
 arch/powerpc/kernel/head_44x.S         |   24 ++++++++++++++++++++++--
 arch/powerpc/kernel/misc_32.S          |   15 +++++++++++++++
 arch/powerpc/lib/copy_32.S             |   10 ++++++++++
 arch/powerpc/mm/44x_mmu.c              |    7 +++++--
 arch/powerpc/platforms/Kconfig         |    5 +++++
 arch/powerpc/platforms/Kconfig.cputype |    1 +
 7 files changed, 60 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/include/asm/mmu-44x.h b/arch/powerpc/include/asm/mmu-44x.h
index bf52d70..ca1b90c 100644
--- a/arch/powerpc/include/asm/mmu-44x.h
+++ b/arch/powerpc/include/asm/mmu-44x.h
@@ -8,6 +8,7 @@
 
 #define PPC44x_MMUCR_TID	0x000000ff
 #define PPC44x_MMUCR_STS	0x00010000
+#define PPC44x_MMUCR_U2		0x00200000
 
 #define	PPC44x_TLB_PAGEID	0
 #define	PPC44x_TLB_XLAT		1
@@ -32,6 +33,7 @@
 
 /* Storage attribute and access control fields */
 #define PPC44x_TLB_ATTR_MASK	0x0000ff80
+#define PPC44x_TLB_WL1		0x00100000	/* Write-through L1 */
 #define PPC44x_TLB_U0		0x00008000      /* User 0 */
 #define PPC44x_TLB_U1		0x00004000      /* User 1 */
 #define PPC44x_TLB_U2		0x00002000      /* User 2 */
diff --git a/arch/powerpc/kernel/head_44x.S b/arch/powerpc/kernel/head_44x.S
index 5e12b74..f10ac53 100644
--- a/arch/powerpc/kernel/head_44x.S
+++ b/arch/powerpc/kernel/head_44x.S
@@ -429,7 +429,16 @@ finish_tlb_load_44x:
 	andi.	r10,r12,_PAGE_USER		/* User page ? */
 	beq	1f				/* nope, leave U bits empty */
 	rlwimi	r11,r11,3,26,28			/* yes, copy S bits to U */
-1:	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
+1:
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+	andi.	r10, r11, PPC44x_TLB_I
+	bne	2f
+	oris    r11,r11,PPC44x_TLB_WL1@h	/* Add coherency for */
+						/* non-inhibited */
+	ori	r11,r11,PPC44x_TLB_U2|PPC44x_TLB_M
+2:
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
+	tlbwe	r11,r13,PPC44x_TLB_ATTRIB	/* Write ATTRIB */
 
 	/* Done...restore registers and get out of here.
 	*/
@@ -799,7 +808,11 @@ skpinv:	addi	r4,r4,1				/* Increment */
 	sync
 
 	/* Initialize MMUCR */
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+	lis	r5, PPC44x_MMUCR_U2@h
+#else
 	li	r5,0
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
 	mtspr	SPRN_MMUCR,r5
 	sync
 
@@ -814,7 +827,14 @@ skpinv:	addi	r4,r4,1				/* Increment */
 	/* attrib fields */
 	/* Added guarded bit to protect against speculative loads/stores */
 	li	r5,0
-	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G)
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | \
+						PPC44x_TLB_G | PPC44x_TLB_U2)
+	oris	r5,r5,PPC44x_TLB_WL1@h
+#else
+	ori	r5,r5,(PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | \
+			PPC44x_TLB_G)
+#endif /* CONFIG_BGP_L1_WRITETHROUGH
 
         li      r0,63                    /* TLB slot 63 */
 
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 094bd98..3f56d7b 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -506,7 +506,20 @@ _GLOBAL(clear_pages)
 	li	r0,PAGE_SIZE/L1_CACHE_BYTES
 	slw	r0,r0,r4
 	mtctr	r0
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+	/* assuming 32 byte cacheline */
+	li      r4, 0
+1:	stw     r4, 0(r3)
+	stw     r4, 4(r3)
+	stw     r4, 8(r3)
+	stw     r4, 12(r3)
+	stw     r4, 16(r3)
+	stw     r4, 20(r3)
+	stw     r4, 24(r3)
+	stw     r4, 28(r3)
+#else
 1:	dcbz	0,r3
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
 	addi	r3,r3,L1_CACHE_BYTES
 	bdnz	1b
 	blr
@@ -550,7 +563,9 @@ _GLOBAL(copy_page)
 	mtctr	r0
 1:
 	dcbt	r11,r4
+#ifndef CONFIG_BGP_L1_WRITETHROUGH
 	dcbz	r5,r3
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
 	COPY_16_BYTES
 #if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES
diff --git a/arch/powerpc/lib/copy_32.S b/arch/powerpc/lib/copy_32.S
index 55f19f9..552df54 100644
--- a/arch/powerpc/lib/copy_32.S
+++ b/arch/powerpc/lib/copy_32.S
@@ -98,7 +98,11 @@ _GLOBAL(cacheable_memzero)
 	bdnz	4b
 3:	mtctr	r9
 	li	r7,4
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+10:
+#else
 10:	dcbz	r7,r6
+#endif /* CONFIG_L1_WRITETHROUGH */
 	addi	r6,r6,CACHELINE_BYTES
 	bdnz	10b
 	clrlwi	r5,r8,32-LG_CACHELINE_BYTES
@@ -187,7 +191,9 @@ _GLOBAL(cacheable_memcpy)
 	mtctr	r0
 	beq	63f
 53:
+#ifndef CONFIG_BGP_L1_WRITETHROUGH
 	dcbz	r11,r6
+#endif /* CONFIG_BGP_L1_WRITETHROUGH */
 	COPY_16_BYTES
 #if L1_CACHE_BYTES >= 32
 	COPY_16_BYTES
@@ -368,7 +374,11 @@ _GLOBAL(__copy_tofrom_user)
 	mtctr	r8
 
 53:	dcbt	r3,r4
+#ifdef CONFIG_BGP_L1_WRITETHROUGH
+54:
+#else
 54:	dcbz	r11,r6
+#endif
 	.section __ex_table,"a"
 	.align	2
 	.long	54b,105f
diff --git a/arch/powerpc/mm/44x_mmu.c b/arch/powerpc/mm/44x_mmu.c
index 024acab..f5c60b3 100644
--- a/arch/powerpc/mm/44x_mmu.c
+++ b/arch/powerpc/mm/44x_mmu.c
@@ -80,9 +80,12 @@ static void __init ppc44x_pin_tlb(unsigned int virt, unsigned int phys)
 	:
 #ifdef CONFIG_PPC47x
 	: "r" (PPC47x_TLB2_S_RWX),
-#else
+#elseif CONFIG_BGP_L1_WRITETHROUGH
+	: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_WL1 \
+		| PPC44x_TLB_U2 | PPC44x_TLB_M),
+#else /* neither CONFIG_PPC47x or CONFIG_BGP_L1_WRITETHROUGH */
 	: "r" (PPC44x_TLB_SW | PPC44x_TLB_SR | PPC44x_TLB_SX | PPC44x_TLB_G),
-#endif
+#endif /* CONFIG_PPC47x */
 	  "r" (phys),
 	  "r" (virt | PPC44x_TLB_VALID | PPC44x_TLB_256M),
 	  "r" (entry),
diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig
index f7b0772..7defe94 100644
--- a/arch/powerpc/platforms/Kconfig
+++ b/arch/powerpc/platforms/Kconfig
@@ -348,4 +348,9 @@ config XILINX_PCI
 	bool "Xilinx PCI host bridge support"
 	depends on PCI && XILINX_VIRTEX
 
+config BGP_L1_WRITETHROUGH
+	bool "Blue Gene/P enabled writethrough mode"
+	depends on BGP
+	default y
+
 endmenu
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 1ae59c5..caa3bbf 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -333,6 +333,7 @@ config NOT_COHERENT_CACHE
 	bool
 	depends on 4xx || 8xx || E200 || PPC_MPC512x || GAMECUBE_COMMON
 	default n if PPC_47x
+	default n if BGP
 	default y
 
 config CHECK_CACHE_COHERENCY
-- 
1.7.4.1

^ permalink raw reply related

* [PATCH 3/7] [RFC][V2] add support for BlueGene/P Double FPU
From: Eric Van Hensbergen @ 2011-05-19 21:41 UTC (permalink / raw)
  To: linux-kernel; +Cc: linuxppc-dev, bg-linux
In-Reply-To: <1305753895-24845-3-git-send-email-ericvh@gmail.com>

This patch adds save/restore register support for the BlueGene/P
double FPU.  Since there are two FPUs, we need to save and restore
twice the registers.  Fortunately BG/P gives us some opcodes to
assist with that task.

Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
 arch/powerpc/include/asm/ppc-opcode.h  |    9 +++++++++
 arch/powerpc/include/asm/ppc_asm.h     |   32 ++++++++++++++++++++------------
 arch/powerpc/kernel/fpu.S              |    8 ++++----
 arch/powerpc/platforms/44x/Kconfig     |    9 +++++++++
 arch/powerpc/platforms/Kconfig.cputype |    4 ++++
 5 files changed, 46 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h
index 1255569..12a3cc9 100644
--- a/arch/powerpc/include/asm/ppc-opcode.h
+++ b/arch/powerpc/include/asm/ppc-opcode.h
@@ -56,6 +56,9 @@
 #define PPC_INST_TLBSRX_DOT		0x7c0006a5
 #define PPC_INST_XXLOR			0xf0000510
 
+#define PPC_INST_LFPDX			0x7c00039c
+#define PPC_INST_STFPDX			0x7c00079c
+
 /* macros to insert fields into opcodes */
 #define __PPC_RA(a)	(((a) & 0x1f) << 16)
 #define __PPC_RB(b)	(((b) & 0x1f) << 11)
@@ -126,4 +129,10 @@
 #define XXLOR(t, a, b)		stringify_in_c(.long PPC_INST_XXLOR | \
 					       VSX_XX3((t), (a), (b)))
 
+#define LFPDX(t, a, b)		stringify_in_c(.long PPC_INST_LFPDX | \
+				    __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b)))
+#define STFPDX(t, a, b)		stringify_in_c(.long PPC_INST_STFPDX | \
+				    __PPC_RT(t) | __PPC_RA(a) | __PPC_RB(b)))
+
+
 #endif /* _ASM_POWERPC_PPC_OPCODE_H */
diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h
index 9821006..c5f05ad 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -97,18 +97,26 @@ END_FW_FTR_SECTION_IFSET(FW_FEATURE_SPLPAR)
 #define REST_8GPRS(n, base)	REST_4GPRS(n, base); REST_4GPRS(n+4, base)
 #define REST_10GPRS(n, base)	REST_8GPRS(n, base); REST_2GPRS(n+8, base)
 
-#define SAVE_FPR(n, base)	stfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
-#define SAVE_2FPRS(n, base)	SAVE_FPR(n, base); SAVE_FPR(n+1, base)
-#define SAVE_4FPRS(n, base)	SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
-#define SAVE_8FPRS(n, base)	SAVE_4FPRS(n, base); SAVE_4FPRS(n+4, base)
-#define SAVE_16FPRS(n, base)	SAVE_8FPRS(n, base); SAVE_8FPRS(n+8, base)
-#define SAVE_32FPRS(n, base)	SAVE_16FPRS(n, base); SAVE_16FPRS(n+16, base)
-#define REST_FPR(n, base)	lfd	n,THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
-#define REST_2FPRS(n, base)	REST_FPR(n, base); REST_FPR(n+1, base)
-#define REST_4FPRS(n, base)	REST_2FPRS(n, base); REST_2FPRS(n+2, base)
-#define REST_8FPRS(n, base)	REST_4FPRS(n, base); REST_4FPRS(n+4, base)
-#define REST_16FPRS(n, base)	REST_8FPRS(n, base); REST_8FPRS(n+8, base)
-#define REST_32FPRS(n, base)	REST_16FPRS(n, base); REST_16FPRS(n+16, base)
+#ifdef CONFIG_DOUBLE_FPU
+#define SAVE_FPR(n, b, base)	li b, THREAD_FPR0+(16*(n)); STFPDX(n, base, b)
+#define REST_FPR(n, b, base)	li b, THREAD_FPR0+(16*(n)); LFPDX(n, base, b)
+#else /* CONFIG_DOUBLE_FPU */
+#define SAVE_FPR(n, b, base)	stfd	n, THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#define REST_FPR(n, b, base)	lfd	n, THREAD_FPR0+8*TS_FPRWIDTH*(n)(base)
+#endif /* CONFIG_DOUBLE_FPU */
+
+#define SAVE_2FPRS(n, b, base)	SAVE_FPR(n, b, base); SAVE_FPR(n+1, b, base)
+#define SAVE_4FPRS(n, b, base)	SAVE_2FPRS(n, b, base); SAVE_2FPRS(n+2, b, base)
+#define SAVE_8FPRS(n, b, base)	SAVE_4FPRS(n, b, base); SAVE_4FPRS(n+4, b, base)
+#define SAVE_16FPRS(n, b, base)	SAVE_8FPRS(n, b, base); SAVE_8FPRS(n+8, b, base)
+#define SAVE_32FPRS(n, b, base)	SAVE_16FPRS(n, b, base); \
+				SAVE_16FPRS(n+16, b, base)
+#define REST_2FPRS(n, b, base)	REST_FPR(n, b, base); REST_FPR(n+1, b, base)
+#define REST_4FPRS(n, b, base)	REST_2FPRS(n, b, base); REST_2FPRS(n+2, b, base)
+#define REST_8FPRS(n, b, base)	REST_4FPRS(n, b, base); REST_4FPRS(n+4, b, base)
+#define REST_16FPRS(n, b, base)	REST_8FPRS(n, b, base); REST_8FPRS(n+8, b, base)
+#define REST_32FPRS(n, b, base)	REST_16FPRS(n, b, base); \
+				REST_16FPRS(n+16, b, base)
 
 #define SAVE_VR(n,b,base)	li b,THREAD_VR0+(16*(n));  stvx n,base,b
 #define SAVE_2VRS(n,b,base)	SAVE_VR(n,b,base); SAVE_VR(n+1,b,base)
diff --git a/arch/powerpc/kernel/fpu.S b/arch/powerpc/kernel/fpu.S
index de36955..9f11c66 100644
--- a/arch/powerpc/kernel/fpu.S
+++ b/arch/powerpc/kernel/fpu.S
@@ -30,7 +30,7 @@
 BEGIN_FTR_SECTION							\
 	b	2f;							\
 END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
-	REST_32FPRS(n,base);						\
+	REST_32FPRS(n,c,base);						\
 	b	3f;							\
 2:	REST_32VSRS(n,c,base);						\
 3:
@@ -39,13 +39,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
 BEGIN_FTR_SECTION							\
 	b	2f;							\
 END_FTR_SECTION_IFSET(CPU_FTR_VSX);					\
-	SAVE_32FPRS(n,base);						\
+	SAVE_32FPRS(n,c,base);						\
 	b	3f;							\
 2:	SAVE_32VSRS(n,c,base);						\
 3:
 #else
-#define REST_32FPVSRS(n,b,base)	REST_32FPRS(n, base)
-#define SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n, base)
+#define REST_32FPVSRS(n,b,base)	REST_32FPRS(n,b,base)
+#define SAVE_32FPVSRS(n,b,base)	SAVE_32FPRS(n,b,base)
 #endif
 
 /*
diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms/44x/Kconfig
index f485fc5f..24a515e 100644
--- a/arch/powerpc/platforms/44x/Kconfig
+++ b/arch/powerpc/platforms/44x/Kconfig
@@ -169,6 +169,15 @@ config YOSEMITE
 	help
 	  This option enables support for the AMCC PPC440EP evaluation board.
 
+config	BGP
+	bool "Blue Gene/P"
+	depends on 44x
+	default n
+	select PPC_FPU
+	select PPC_DOUBLE_FPU
+	help
+	  This option enables support for the IBM BlueGene/P supercomputer.
+
 config ISS4xx
 	bool "ISS 4xx Simulator"
 	depends on (44x || 40x)
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index 111138c..1ae59c5 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -137,6 +137,10 @@ config PPC_FPU
 	bool
 	default y if PPC64
 
+config PPC_DOUBLE_FPU
+	bool "Bluegene/P Double FPU Support"
+	depends on BGP
+
 config FSL_EMB_PERFMON
 	bool "Freescale Embedded Perfmon"
 	depends on E500 || PPC_83xx
-- 
1.7.4.1

^ permalink raw reply related

* Re: [PATCH 3/7] [RFC] add support for BlueGene/P FPU
From: Michael Neuling @ 2011-05-19 21:36 UTC (permalink / raw)
  To: Eric Van Hensbergen; +Cc: linuxppc-dev, linux-kernel, bg-linux
In-Reply-To: <BANLkTimKhApFW8G1-pG0u_9Kv2YB0R1O0w@mail.gmail.com>

In message <BANLkTimKhApFW8G1-pG0u_9Kv2YB0R1O0w@mail.gmail.com> you wrote:
> On Thu, May 19, 2011 at 12:58 AM, Michael Neuling <mikey@neuling.org> wrote=
> :
> > Eric,
> >
> >> This patch adds save/restore register support for the BlueGene/P
> >> double hummer FPU.
> >
> > What does this mean? =A0Needs more details here.
> >
> 
> Hi Mikey,
> 
> any specific details you are looking for here?  AFAIK these patches
> are required for the kernel to save/restore the double hummer
> properly.

I should have been more specific.  What does double hammer mean?

I description of how double hammer differs from normal and why a change
in the fpu code is needed would be great.

> 
> >>
> >> +#ifdef CONFIG_BGP
> >> +#define LFPDX(frt, ra, rb) =A0 .long (31<<26)|((frt)<<21)|((ra)<<16)| \
> >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ((rb)<<11)|(462<<1)
> >> +#define STFPDX(frt, ra, rb) =A0.long (31<<26)|((frt)<<21)|((ra)<<16)| \
> >> + =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =
> =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 =A0 ((rb)<<11)|(974<<1)
> >> +#endif /* CONFIG_BGP */
> >
> > Put these in arch/powerpc/include/asm/ppc-opcode.h and reformat to fit
> > whats there already.
> >
> > Also, don't need to put these defines inside a #ifdef.
> >
> 
> Sure, I'll fix that up.
> 
> >> +#ifdef CONFIG_BGP
> >> +#define SAVE_FPR(n, b, base) li b, THREAD_FPR0+(16*(n)); STFPDX(n, base=
> , b)
> >> +#define REST_FPR(n, b, base) li b, THREAD_FPR0+(16*(n)); LFPDX(n, base,=
>  b)
> >
> > 16*? =A0Are these FP regs 64 or 128 bits wide? =A0If 128 you are doing to
> > have to play with TS_WIDTH to get the size of the FPs correct in the
> > thread_struct.
> >
> > I think there's a bug here.
> >
> 
> I actually have three different versions of this code from different
> source patches that I'm drawing from - so your help in figuring out
> the best way to approach this is appreciated.  The kittyhawk version
> of the code has 8* instead of 16*.  According to the docs:
> "Each of the two FPU units contains 32 64-bit floating point registers
> for a total of 64 FP registers per processor." which would seem to
> point to the kittyhawk version - but they have a second SAVE_32SFPRS
> for the second hummer.  What wasn't clear to me with this version of
> the code was whether or not they were doing something clever like
> saving the pair of the 64-bit FPU registers in a single 128-bit slot
> (seems plausible).  

Ok, sounds like there is 32*8*2 bytes of data, rather than the normal
32*8 bytes for FP only (ignoring VSX).  If this is the case, then you'll
need make 'fpr' in the thread struct bigger which you can do by setting
TS_FPRWIDTH = 2 like we do for VSX.

If there is some instruction that saves and restores two of these at a
time (which LFPDX/STFPDX might I guess), then we can use that, otherwise
we'll have to do 64 saves/restores.  Double load/stores will be faster
I'm guessing though.  

If two at a time, do we need to increase the index in pairs?

> If this is not the way to go, I can certainly
> switch the kittyhawk version of the patch with the *, the extra
> SAVE32SFPR and the extra double hummer specific storage space in the
> thread_struct.  

I'd be tempted to keep it in the 'fpr' part of the struct so you can
then access it with ptrace/signals/core dumps.

> If it would help I can post an alternate version of the patch for
> discussion with the kittyhawk version.

Sure.

The most useful thing would be to see the instruction definition for
STFPDX/LFPDX.

> 
> >> =A0/*
> >> diff --git a/arch/powerpc/platforms/44x/Kconfig b/arch/powerpc/platforms=
> /44x/
> > Kconfig
> >> index f485fc5f..24a515e 100644
> >> --- a/arch/powerpc/platforms/44x/Kconfig
> >> +++ b/arch/powerpc/platforms/44x/Kconfig
> >> @@ -169,6 +169,15 @@ config YOSEMITE
> >> =A0 =A0 =A0 help
> >> =A0 =A0 =A0 =A0 This option enables support for the AMCC PPC440EP evalua=
> tion board.
> >>
> >> +config =A0 =A0 =A0 BGP
> >
> > Does this FPU feature have a specific name like double hammer? =A0I'd
> > rather have the BGP defconfig depend on PPC_FPU_DOUBLE_HUMMER, or
> > something like that...
> >
> >> + =A0 =A0 bool "Blue Gene/P"
> >> + =A0 =A0 depends on 44x
> >> + =A0 =A0 default n
> >> + =A0 =A0 select PPC_FPU
> >> + =A0 =A0 select PPC_DOUBLE_FPU
> >
> > ... in fact, it seem you are doing something like these here but you
> > don't use PPC_DOUBLE_FPU anywhere?
> >
> 
> A fair point.  I'm fine with calling it DOUBLE_HUMMER, but I wasn't sure if
> that was "too internal" of a name for the kernel.  Let me know and
> I'll fix it up.

What I'm mostly concerned about is disassociating it with a particular
CPU.  

If it has an external name, then all the better.

> I'll also change the CONFIG_BGP defines in the FPU code to PPC_DOUBLE_FPU
> or PPC_DOUBLE_HUMMER depending on what the community decides.

Mikey

^ permalink raw reply

* Re: [PATCH 3/5] v2 seccomp_filters: Enable ftrace-based system call filtering
From: Will Drewry @ 2011-05-19 21:05 UTC (permalink / raw)
  To: Steven Rostedt, Ingo Molnar, Peter Zijlstra, Frederic Weisbecker
  Cc: linux-mips, linux-sh, Heiko Carstens, Oleg Nesterov,
	David Howells, Paul Mackerras, Ralf Baechle, H. Peter Anvin,
	sparclinux, Jiri Slaby, linux-s390, Russell King, x86,
	James Morris, Linus Torvalds, Ingo Molnar, kees.cook,
	Serge E. Hallyn, Tejun Heo, Thomas Gleixner, linux-arm-kernel,
	Michal Marek, Michal Simek, linuxppc-dev, linux-kernel,
	Eric Paris, Paul Mundt, Martin Schwidefsky, linux390,
	Andrew Morton, agl, David S. Miller
In-Reply-To: <1305807728.11267.25.camel@gandalf.stny.rr.com>

On Thu, May 19, 2011 at 7:22 AM, Steven Rostedt <rostedt@goodmis.org> wrote=
:
> On Wed, 2011-05-18 at 21:07 -0700, Will Drewry wrote:
>
>> Do event_* that return non-void exist in the tree at all now? =A0I've
>> looked at the various tracepoint macros as well as some of the other
>> handlers (trace_function, perf_tp_event, etc) and I'm not seeing any
>> places where a return value is honored nor could be. =A0At best, the
>> perf_tp_event can be short-circuited it in the hlist_for_each, but
>> it'd still need a way to bubble up a failure and result in not calling
>> the trace/event that the hook precedes.
>
> No, none of the current trace hooks have return values. That was what I
> was talking about how to implement in my previous emails.

Led on by complete ignorance, I think I'm finally beginning to untwist
the different pieces of the tracing infrastructure.  Unfortunately,
that means I took a few wrong turns along the way...

I think function tracing looks something like this:

ftrace_call has been injected into at a specific callsite.  Upon hit:
1. ftrace_call triggers
2. does some checks then calls ftrace_trace_function (via mcount instrs)
3. ftrace_trace_function may be a single func or a list. For a list it
will be: ftrace_list_func
4. ftrace_list_func calls each registered hook for that function in a
while loop ignoring return values
5. registered hook funcs may then track the call, farm it out to
specific sub-handlers, etc.

This seems to be a red herring for my use case :/ though this helped
me understand your back and forth (Ingo & Steve) regarding dynamic
versus explicit events.

System call tracing is done via kernel/tracepoint.c events fed in via
arch/[arch]/kernel/ptrace.c where it calls trace_sys_enter.  This
yields direct sys_enter and sys_exit event sources (and an event class
to hook up per-system call events).  This means that
ftrace_syscall_enter() does the event prep prior to doing a filter
check comparing the ftrace_event object for the given syscall_nr to
the event data.  perf_sysenter_enter() is similar but it pushes the
info over to perf_tp_event to be matched not against the global
syscall event entry, but against any sub-event in the linked list on
that syscall's event.

Is that roughly an accurate representation of the two?  I wish I
hadn't gotten distracted along the function path, but at least I
learned something (and it is relevant to the larger scope of this
thread's discussion).

After doing that digging, it looks like providing hook call
pre-emption and return value propagation will be a unique and fun task
for each tracer and event subsystem.  If I look solely at tracepoints,
a generic change would be to make the trace_##subsys function return
an int (which I think was the event_vfs_getname proposal?).  The other
option is to change the trace_sys_enter proto to include a 'int
*retcode'.

That change would allow the propagation of some sort of policy
information.  To put it to use, seccomp mode 1 could be implemented on
top of trace_syscalls.  The following changes would need to happen:
1. dummy metadata should be inserted for all unhooked system calls
2. perf_trace_buf_submit would need to return an int or a new
TRACE_REG_SECCOMP_REGISTER handler would need to be setup in
syscall_enter_register.
3. If perf is abused, a "kill/fail_on_discard" bit would be added to
event->attrs.
4. perf_trace_buf_submit/perf_tp_event will return 0 for no matches,
'n' for the number of event matches, and -EACCES/? if a
'fail_on_discard' event is seen.
5. perf_syscall_enter would set *retcode =3D perf_trace_buf_submit()'s retc=
ode
6. trace_sys_enter() would need to be moved to be the first entry
arch/../kernel/ptrace.c for incoming syscalls
7. if trace_sys_enter() yields a negative return code, then
do_exit(SIGKILL) the process and return.

Entering into seccomp mode 1 would require adding a  "0" filter for
every system call number (which is why we need a dummy event call for
them since failing to check the bitmask can't be flagged
fail_on_discard) with the fail_on_discard bit.  For the three calls
that are allowed, a '1' filter would be set.

That would roughly implement seccomp mode 1.  It's pretty ugly and the
fact that every system call that's disallowed has to be blacklisted is
not ideal.  An alternate model would be to just use the seccomp mode
as we do today and let secure_computing() handle the return code of "#
of matches".  If it the # of matches is 0, it terminates. A
'fail_on_discard' bit then would only be good to stop further
tracepoint callback evaluation.  This approach would also *almost* nix
the need to provide dummy syscall hooks.  (Since sigreturn isn't
hooked on x86 because it uses ptregs fixup, a dummy would still be
needed to apply a "1" filter to.)

Even with that tweak to move to a whitelist model, the perf event
evaluation and tracepoint callback ordering is still not guaranteed.
Without changing tracepoint itself, all other TPs will still execute.
And for perf events, it'll be first-come-first-serve until a
fail_on_discard is hit.

After using seccomp mode=3D1 as the sample case to reduce scope, it's
possible to ignore it for now :) and look at the seccomp-filter/mode=3D2
case.  The same mechanism could be used to inject more expressive
filters.  This would be done over the sys_perf_event_open() interface
assuming the new attr is added to stop perf event list enumeration.
Assuming a whitelist model, a call to prctl(PR_SET_SECCOMP, 2) would
be needed (maybe with the ON_EXEC flag option too to mirror the
event->attr on-exec bit). That would yield the ability to register
perf events for system calls then use ioctl() to SET_FILTER on them.

Reducing the privileges of the filters after installation could be
done with another attribute bit like 'set_filter_ands'.  If that is
also set on the event, and a filter is installed to ensure that
sys_perf_event_open is blocked, then it should be reasonably sane.

I'd need to add a PERF_EVENT_IOC_GET_FILTER handler to allow
extracting the settings.

Clearly, I haven't written the code for that yet, though making the
change for a single platform shouldn't be too much code.

So that leaves me with some questions:
- Is this the type of reuse that was being encouraged?
- Does it really make sense to cram this through the perf interface
and events?  While the changed attributes are innocuous and
potentially reusable, it seems that a large amount of the perf
facilities are exposed that could have weird side effects, but I'm
sure I still don't fully grok the perf infrastructure.
- Does extending one tracepoint to provide return values via a pointer
make sense? I'd hesitate to make all tracepoint hooks return an int by
default.
- If all tracepoints returned an int, what would the standard value
look like - or would it need to be per tracepoint impl?
- How is ambiguity resolved if multiple perf_events are registered for
a syscall with different filters?  Maybe a 'stop_on_match'? though
ordering is still a problem then.
- Is there a way to affect a task-wide change without a seccomp flag
(or new task_struct entry) via the existing sys_perf_event_open
syscall?  I considered suggesting a attr bit call 'secure_computing'
that when an event with the bit is enabled, it automagically forces
the task into seccomp enforcement mode, but that, again, seemed
hackish.

While I like the idea of sharing the tracepoints infrastructure and
the trace_syscalls hooks as well as using a pre-existing interface
with very minor changes, I'm worried that the complexity of the
interface and of the infrastructure might undermine the ability to
continue meeting the desired security goals.  I had originally stayed
very close to the seccomp world because of how trivial it is to review
the code and verify its accuracy/effectiveness.  This approach leaves
a lot of gaps for kernel code to seep through and a fair amount of
ambiguity in what locked down syscall filters might look like.

To me, the best part of the above is that it shows that even if we go
with a prctl SET_SECCOMP_FILTER-style interface, it is completely
certain that if a perf/ftrace-based security infrastructure is on our
future, it will be entirely compatible -- even if the prctl()
interface is just the "simpler" interface at that point somewhere down
the road.

Regardless, I'll hack up a proof of concept based on the outline
above. Perhaps something more elegant will emerge once I start
tweaking the source, but I'm still seeing too many gaps to be
comfortable so far.

[[There is a whole other approach to this too. We could continue with
the prctl() interface and mirror the trace_sys_enter model for
secure_computing().   Instead of keeping a seccomp_t-based hlist of
events, they could be stored in a new hlist for seccomp_events in
struct ftrace_event_call.  The ftrace filters would be installed there
and the seccomp_syscall_enter() function could do the checks and pass
up some state data on the task's seccomp_t that indicates it needs to
do_exit().  That would likely reduce the amount of code in
seccomp_filter.c pretty seriously (though not entirely
beneficially).]]

Thanks again for all the feedback and insights! I really hope we can
come to an agreeable approach for implementing kernel attack surface
reduction.
will

^ permalink raw reply

* Unable to handle kernel paging request for data at address 0x00000000
From: Burton Samograd @ 2011-05-19 20:11 UTC (permalink / raw)
  To: linuxppc-dev

[-- Attachment #1: Type: text/plain, Size: 11375 bytes --]

Hello,

 

I'm trying to get 2.6.38.6 up and running on my ppc880 board and I've
run into a problem when the kernel is attempting to perform it's first
exec (/sbin/init).  I've tried to debug it as much as I can so I thought
I would post this to see if anybody has any ideas or suggestions about
what might be going wrong.

 

Following is a full output of the board bootup messages from U-boot and
the kernel:

 

------------------------------------------------------------------------
-----------------------------------------------

U-Boot 1.3.1 (May 19 2011 - 09:55:57)

 

CPU:   MPC885ZPnn at 131.666 MHz [40.0...133.0 MHz]

       8 kB I-Cache 8 kB D-Cache FEC present

DRAM:  64 MB

*** Warning - bad CRC, using default environment

 

In:    serial

Out:   serial

Err:   serial

Net:   FEC ETHERNET, FEC2 ETHERNET

IDE:   Hit any key to stop autoboot:  0

Using FEC ETHERNET device

TFTP from server 192.168.0.6; our IP address is 192.168.0.4

Filename 'uImage'.

Load address: 0xe00000

Loading: T
#################################################################

         ##############

done

Bytes transferred = 1155044 (119fe4 hex)

## Booting image at 00e00000 ...

   Image Name:   Linux-2.6.38.6sbx-dirty

   Image Type:   PowerPC Linux Kernel Image (gzip compressed)

   Data Size:    1154980 Bytes =  1.1 MB

   Load Address: 00400000

   Entry Point:  0040055c

   Verifying Checksum ... OK

   Uncompressing Kernel Image ... OK

## Current stack ends at 0x03F91AB8 => set upper limit to 0x00800000

## cmdline at 0x007FFF00 ... 0x007FFF79

memstart    = 0x00000000

memsize     = 0x04000000

flashstart  = 0x00000000

flashsize   = 0x00000000

flashoffset = 0x00000000

sramstart   = 0x00000000

sramsize    = 0x00000000

immr_base   = 0xFF000000

bootflags   = 0x00000001

intfreq     = 131.666 MHz

busfreq     = 65.833 MHz

ethaddr     = 00:07:07:00:4C:64

IP addr     = 192.168.0.4

baudrate    =  38400 bps

No initrd

## Transferring control to Linux (at address 0040055c) ...

Kernel Command Line: console=ttyCPM0,38400 root=/dev/nfs,rw
nfsroot=192.168.0.6:/sbx ip=192.168.0.15:192.168.0.6:::sbx:eth0:off
init=/bin/bash

Memory <- <0x0 0x4000000> (64MB)

ENET0: local-mac-address <- 00:07:07:00:4c:64

ENET1: local-mac-address <- 00:3b:f5:21:63:6f

CPU clock-frequency <- 0x7d912de (132MHz)

CPU timebase-frequency <- 0x3ec896 (4MHz)

CPU bus-frequency <- 0x3ec896f (66MHz)

 

zImage starting: loaded at 0x00400000 (sp: 0x03f91978)

Allocating 0x251480 bytes for kernel ...

gunzipping (0x00000000 <- 0x0040c000:0x0068b948)...done 0x23cde0 bytes

 

Linux/PowerPC load: console=ttyCPM0,38400 root=/dev/nfs,rw
nfsroot=192.168.0.6:/sbx ip=192.168.0.15:192.168.0.6:::sbx:eth0:off
init=/bin/bash

Finalizing device tree... flat tree at 0x698300

Using Freescale MPC885 ADS machine description

Linux version 2.6.38.6sbx-dirty (root@burton-fedora-14.interalia.com)
(gcc version 4.2.2) #43 Thu May 19 13:55:22 MDT 2011

Zone PFN ranges:

  DMA      0x00000000 -> 0x00000800

  Normal   empty

  HighMem  0x00000800 -> 0x00004000

Movable zone start PFN for each node

early_node_map[1] active PFN ranges

    0: 0x00000000 -> 0x00004000

MMU: Allocated 72 bytes of context maps for 16 contexts

Built 1 zonelists in Zone order, mobility grouping on.  Total pages:
16256

Kernel command line: console=ttyCPM0,38400 root=/dev/nfs,rw
nfsroot=192.168.0.6:/sbx ip=192.168.0.15:192.168.0.6:::sbx:eth0:off
init=/bin/bash

PID hash table entries: 32 (order: -5, 128 bytes)

Dentry cache hash table entries: 1024 (order: 0, 4096 bytes)

Inode-cache hash table entries: 1024 (order: 0, 4096 bytes)

Memory: 62544k/65536k available (2212k kernel code, 2992k reserved, 80k
data, 81k bss, 96k init)

Kernel virtual memory layout:

  * 0xfffcf000..0xfffff000  : fixmap

  * 0xff800000..0xffc00000  : highmem PTEs

  * 0xff600000..0xff800000  : consistent mem

  * 0xff5f8000..0xff600000  : early ioremap

  * 0xc1000000..0xff5f8000  : vmalloc & ioremap

SLUB: Genslabs=14, HWalign=16, Order=0-3, MinObjects=0, CPUs=1, Nodes=1

NR_IRQS:512 nr_irqs:512 16

Decrementer Frequency = 0x7d912d

clocksource: timebase mult[1e61377e] shift[22] registered

console [ttyCPM0] enabled

pid_max: default: 4096 minimum: 301

Mount-cache hash table entries: 512

NET: Registered protocol family 16

bio: create slab <bio-0> at 0

Switching to clocksource timebase

NET: Registered protocol family 2

Switched to NOHz mode on CPU #0

IP route cache hash table entries: 1024 (order: 0, 4096 bytes)

TCP established hash table entries: 512 (order: 0, 4096 bytes)

TCP bind hash table entries: 512 (order: -1, 2048 bytes)

TCP: Hash tables configured (established 512 bind 512)

TCP reno registered

NET: Registered protocol family 1

RPC: Registered udp transport module.

RPC: Registered tcp transport module.

RPC: Registered tcp NFSv4.1 backchannel transport module.

highmem bounce pool size: 64 pages

msgmni has been set to 16

io scheduler noop registered

io scheduler deadline registered (default)

ff000a80.serial: ttyCPM0 at MMIO 0xc1010a80 (irq = 19) is a CPM UART

ff000a90.serial: ttyCPM1 at MMIO 0xc1018a90 (irq = 24) is a CPM UART

Generic RTC Driver v1.07

Uniform Multi-Platform E-IDE driver

ide-gd driver 1.18

eth0: fs_enet: 00:07:07:00:4c:64

eth1: fs_enet: 00:3b:f5:21:63:6f

FEC MII Bus: probed

mdio_bus ff000e00: error probing PHY at address 3

TCP cubic registered

NET: Registered protocol family 17

IP-Config: Guessing netmask 255.255.255.0

IP-Config: Complete:

     device=eth0, addr=192.168.0.15, mask=255.255.255.0,
gw=255.255.255.255,

     host=sbx, domain=, nis-domain=(none),

     bootserver=192.168.0.6, rootserver=192.168.0.6, rootpath=

VFS: Mounted root (nfs filesystem) readonly on device 0:10.

Freeing unused kernel memory: 96k init

Failed to execute /bin/bash.  Attempting defaults...

Unable to handle kernel paging request for data at address 0x00000000

Faulting instruction address: 0xc000b77c

Oops: Kernel access of bad area, sig: 11 [#1]

Freescale MPC885 ADS

last sysfs file:

NIP: c000b77c LR: c000deb0 CTR: 00000100

REGS: c02e7cc0 TRAP: 0300   Not tainted  (2.6.38.6sbx-dirty)

MSR: 00009032 <EE,ME,IR,DR>  CR: 22248042  XER: 0000f800

DAR: 00000000, DSISR: c0000000

TASK = c02e4000[1] 'swapper' THREAD: c02e6000

GPR00: c000deac c02e7d70 c02e4000 00000000 00000100 c03ecffc 00000000
c022e8ec

GPR08: c02445a0 c02445a0 00009032 000005e0 22248042 10000900 22242044
22242044

GPR16: 22248044 00000000 00000000 00000001 c03e1000 c02e7e38 00000030
00000022

GPR24: c03e1000 c03e27fc c0240000 7ffffff1 c03ecffc 7ffffff1 03c05d21
c02cd0a0

NIP [c000b77c] __flush_dcache_icache+0x14/0x40

LR [c000deb0] flush_dcache_icache_page+0x14/0x24

Call Trace:

[c02e7d70] [c000deac] flush_dcache_icache_page+0x10/0x24 (unreliable)

[c02e7d80] [c000e0c8] set_pte_at+0x50/0x7c

[c02e7da0] [c0060548] handle_pte_fault+0x228/0x3ac

[c02e7dd0] [c00617d8] __get_user_pages+0x2a4/0x3cc

[c02e7e30] [c007536c] get_arg_page+0x40/0xb8

[c02e7e50] [c00755dc] copy_strings+0x114/0x2e8

[c02e7ea0] [c0075ca0] do_execve+0x120/0x254

[c02e7ee0] [c000719c] sys_execve+0x50/0x7c

[c02e7f00] [c000be6c] ret_from_syscall+0x0/0x38

[c02e7fc0] [00001032] 0x1032

[c02e7fd0] [c0002404] init_post+0x88/0xb8

[c02e7fe0] [c0211424] kernel_init+0x144/0x15c

[c02e7ff0] [c000b994] kernel_thread+0x4c/0x68

Instruction dump:

4d820020 7c8903a6 7c001bac 38630010 4200fff8 7c0004ac 4e800020 60000000

54630026 38800100 7c8903a6 7c661b78 <7c00186c> 38630010 4200fff8
7c0004ac

---[ end trace 5e2c5aac0577498f ]---

note: swapper[1] exited with preempt_count 1

BUG: scheduling while atomic: swapper/1/0x00000001

Call Trace:

[c02e7950] [c0006908] show_stack+0x50/0x154 (unreliable)

[c02e7990] [c0016b54] __schedule_bug+0x54/0x68

[c02e79a0] [c01b6560] schedule+0x60/0x344

[c02e7a90] [c01b6d84] schedule_timeout+0x148/0x17c

[c02e7ac0] [c00247c0] msleep_interruptible+0x1c/0x54

[c02e7ad0] [c01139d4] __uart_wait_until_sent+0x90/0xec

[c02e7af0] [c01156c0] uart_close+0x19c/0x2c4

[c02e7b40] [c0109cfc] tty_release+0x1b4/0x45c

[c02e7be0] [c00713ac] fput+0xa8/0x14c

[c02e7c00] [c006e77c] filp_close+0x78/0x90

[c02e7c20] [c001c18c] put_files_struct+0x88/0xe8

[c02e7c40] [c001d640] do_exit+0x168/0x544

[c02e7c80] [c0009528] die+0x184/0x19c

[c02e7ca0] [c000d7c0] bad_page_fault+0xe8/0xfc

[c02e7cb0] [c000c30c] handle_page_fault+0x7c/0x80

[c02e7d70] [c000deac] flush_dcache_icache_page+0x10/0x24

[c02e7d80] [c000e0c8] set_pte_at+0x50/0x7c

[c02e7da0] [c0060548] handle_pte_fault+0x228/0x3ac

[c02e7dd0] [c00617d8] __get_user_pages+0x2a4/0x3cc

[c02e7e30] [c007536c] get_arg_page+0x40/0xb8

[c02e7e50] [c00755dc] copy_strings+0x114/0x2e8

[c02e7ea0] [c0075ca0] do_execve+0x120/0x254

[c02e7ee0] [c000719c] sys_execve+0x50/0x7c

[c02e7f00] [c000be6c] ret_from_syscall+0x0/0x38

[c02e7fc0] [00001032] 0x1032

[c02e7fd0] [c0002404] init_post+0x88/0xb8

[c02e7fe0] [c0211424] kernel_init+0x144/0x15c

[c02e7ff0] [c000b994] kernel_thread+0x4c/0x68

BUG: scheduling while atomic: swapper/1/0x00000001

Call Trace:

[c02e7950] [c0006908] show_stack+0x50/0x154 (unreliable)

[c02e7990] [c0016b54] __schedule_bug+0x54/0x68

[c02e79a0] [c01b6560] schedule+0x60/0x344

[c02e7a90] [c01b6d84] schedule_timeout+0x148/0x17c

[c02e7ac0] [c00247c0] msleep_interruptible+0x1c/0x54

[c02e7ad0] [c01139d4] __uart_wait_until_sent+0x90/0xec

[c02e7af0] [c01156fc] uart_close+0x1d8/0x2c4

[c02e7b40] [c0109cfc] tty_release+0x1b4/0x45c

[c02e7be0] [c00713ac] fput+0xa8/0x14c

[c02e7c00] [c006e77c] filp_close+0x78/0x90

[c02e7c20] [c001c18c] put_files_struct+0x88/0xe8

[c02e7c40] [c001d640] do_exit+0x168/0x544

[c02e7c80] [c0009528] die+0x184/0x19c

[c02e7ca0] [c000d7c0] bad_page_fault+0xe8/0xfc

[c02e7cb0] [c000c30c] handle_page_fault+0x7c/0x80

[c02e7d70] [c000deac] flush_dcache_icache_page+0x10/0x24

[c02e7d80] [c000e0c8] set_pte_at+0x50/0x7c

[c02e7da0] [c0060548] handle_pte_fault+0x228/0x3ac

[c02e7dd0] [c00617d8] __get_user_pages+0x2a4/0x3cc

[c02e7e30] [c007536c] get_arg_page+0x40/0xb8

[c02e7e50] [c00755dc] copy_strings+0x114/0x2e8

[c02e7ea0] [c0075ca0] do_execve+0x120/0x254

[c02e7ee0] [c000719c] sys_execve+0x50/0x7c

[c02e7f00] [c000be6c] ret_from_syscall+0x0/0x38

[c02e7fc0] [00001032] 0x1032

[c02e7fd0] [c0002404] init_post+0x88/0xb8

[c02e7fe0] [c0211424] kernel_init+0x144/0x15c

[c02e7ff0] [c000b994] kernel_thread+0x4c/0x68

Kernel panic - not syncing: Attempted to kill init!

Rebooting in 180 seconds..

------------------------------------------------------------------------
-----------------------------------------------

 

Note: I specify /bin/bash for init on the command line (which currently
isn't present on my current root file system), but it's defaulting to
/sbin/init, which is present in my root filesystem.  This problem
occurred whether /bin/bash or /sbin/init were set.

 

Any advice or help appreciated.

 

--

Burton Samograd


[-- Attachment #2: Type: text/html, Size: 43092 bytes --]

^ permalink raw reply

* [PATCH 6/7] [v2] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Timur Tabi @ 2011-05-19 18:34 UTC (permalink / raw)
  To: kumar.gala, benh, greg, akpm, linuxppc-dev, linux-kernel,
	linux-console, alan, arnd

The ePAPR embedded hypervisor specification provides an API for "byte
channels", which are serial-like virtual devices for sending and receiving
streams of bytes.  This driver provides Linux kernel support for byte
channels via three distinct interfaces:

1) An early-console (udbg) driver.  This provides early console output
through a byte channel.  The byte channel handle must be specified in a
Kconfig option.

2) A normal console driver.  Output is sent to the byte channel designated
for stdout in the device tree.  The console driver is for handling kernel
printk calls.

3) A tty driver, which is used to handle user-space input and output.  The
byte channel used for the console is designated as the default tty.

Signed-off-by: Timur Tabi <timur@freescale.com>
---
 arch/powerpc/include/asm/udbg.h |    1 +
 arch/powerpc/kernel/udbg.c      |    2 +
 drivers/tty/Kconfig             |   33 ++
 drivers/tty/Makefile            |    1 +
 drivers/tty/ehv_bytechan.c      |  888 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 925 insertions(+), 0 deletions(-)
 create mode 100644 drivers/tty/ehv_bytechan.c

diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h
index 11ae699..bb9f6b1 100644
--- a/arch/powerpc/include/asm/udbg.h
+++ b/arch/powerpc/include/asm/udbg.h
@@ -52,6 +52,7 @@ extern void __init udbg_init_44x_as1(void);
 extern void __init udbg_init_40x_realmode(void);
 extern void __init udbg_init_cpm(void);
 extern void __init udbg_init_usbgecko(void);
+extern void __init udbg_init_ehv_bc(void);
 
 #endif /* __KERNEL__ */
 #endif /* _ASM_POWERPC_UDBG_H */
diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c
index e39cad8..d117368 100644
--- a/arch/powerpc/kernel/udbg.c
+++ b/arch/powerpc/kernel/udbg.c
@@ -62,6 +62,8 @@ void __init udbg_early_init(void)
 	udbg_init_cpm();
 #elif defined(CONFIG_PPC_EARLY_DEBUG_USBGECKO)
 	udbg_init_usbgecko();
+#elif defined(CONFIG_PPC_EARLY_DEBUG_EHV_BC)
+	udbg_init_ehv_bc();
 #endif
 
 #ifdef CONFIG_PPC_EARLY_DEBUG
diff --git a/drivers/tty/Kconfig b/drivers/tty/Kconfig
index 3fd7199..9fe0212 100644
--- a/drivers/tty/Kconfig
+++ b/drivers/tty/Kconfig
@@ -319,3 +319,36 @@ config N_GSM
 	  This line discipline provides support for the GSM MUX protocol and
 	  presents the mux as a set of 61 individual tty devices.
 
+config PPC_EPAPR_HV_BYTECHAN
+	tristate "ePAPR hypervisor byte channel driver"
+	depends on PPC
+	help
+	  This driver creates /dev entries for each ePAPR hypervisor byte
+	  channel, thereby allowing applications to communicate with byte
+	  channels as if they were serial ports.
+
+config PPC_EARLY_DEBUG_EHV_BC
+	bool "Early console (udbg) support for ePAPR hypervisors"
+	depends on PPC_EPAPR_HV_BYTECHAN
+	help
+	  Select this option to enable early console (a.k.a. "udbg") support
+	  via an ePAPR byte channel.  You also need to choose the byte channel
+	  handle below.
+
+config PPC_EARLY_DEBUG_EHV_BC_HANDLE
+	int "Byte channel handle for early console (udbg)"
+	depends on PPC_EARLY_DEBUG_EHV_BC
+	default 0
+	help
+	  If you want early console (udbg) output through a byte channel,
+	  specify the handle of the byte channel to use.
+
+	  For this to work, the byte channel driver must be compiled
+	  in-kernel, not as a module.
+
+	  Note that only one early console driver can be enabled, so don't
+	  enable any others if you enable this one.
+
+	  If the number you specify is not a valid byte channel handle, then
+	  there simply will be no early console output.  This is true also
+	  if you don't boot under a hypervisor at all.
diff --git a/drivers/tty/Makefile b/drivers/tty/Makefile
index 690522f..4afebd2 100644
--- a/drivers/tty/Makefile
+++ b/drivers/tty/Makefile
@@ -24,5 +24,6 @@ obj-$(CONFIG_ROCKETPORT)	+= rocket.o
 obj-$(CONFIG_SYNCLINK_GT)	+= synclink_gt.o
 obj-$(CONFIG_SYNCLINKMP)	+= synclinkmp.o
 obj-$(CONFIG_SYNCLINK)		+= synclink.o
+obj-$(CONFIG_PPC_EPAPR_HV_BYTECHAN) += ehv_bytechan.o
 
 obj-y += ipwireless/
diff --git a/drivers/tty/ehv_bytechan.c b/drivers/tty/ehv_bytechan.c
new file mode 100644
index 0000000..e67f70b
--- /dev/null
+++ b/drivers/tty/ehv_bytechan.c
@@ -0,0 +1,888 @@
+/* ePAPR hypervisor byte channel device driver
+ *
+ * Copyright 2009-2011 Freescale Semiconductor, Inc.
+ *
+ * Author: Timur Tabi <timur@freescale.com>
+ *
+ * This file is licensed under the terms of the GNU General Public License
+ * version 2.  This program is licensed "as is" without any warranty of any
+ * kind, whether express or implied.
+ *
+ * This driver support three distinct interfaces, all of which are related to
+ * ePAPR hypervisor byte channels.
+ *
+ * 1) An early-console (udbg) driver.  This provides early console output
+ * through a byte channel.  The byte channel handle must be specified in a
+ * Kconfig option.
+ *
+ * 2) A normal console driver.  Output is sent to the byte channel designated
+ * for stdout in the device tree.  The console driver is for handling kernel
+ * printk calls.
+ *
+ * 3) A tty driver, which is used to handle user-space input and output.  The
+ * byte channel used for the console is designated as the default tty.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/interrupt.h>
+#include <linux/fs.h>
+#include <linux/poll.h>
+#include <asm/epapr_hcalls.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/cdev.h>
+#include <linux/console.h>
+#include <linux/tty.h>
+#include <linux/tty_flip.h>
+#include <linux/circ_buf.h>
+#include <asm/udbg.h>
+
+/* The size of the transmit circular buffer.  This must be a power of two. */
+#define BUF_SIZE	2048
+
+/* Per-byte channel private data */
+struct ehv_bc_data {
+	struct device *dev;
+	struct tty_port port;
+	uint32_t handle;
+	unsigned int rx_irq;
+	unsigned int tx_irq;
+
+	spinlock_t lock;	/* lock for transmit buffer */
+	unsigned char buf[BUF_SIZE];	/* transmit circular buffer */
+	unsigned int head;	/* circular buffer head */
+	unsigned int tail;	/* circular buffer tail */
+
+	int tx_irq_enabled;	/* true == TX interrupt is enabled */
+};
+
+/* Array of byte channel objects */
+static struct ehv_bc_data *bcs;
+
+/* Byte channel handle for stdout (and stdin), taken from device tree */
+static unsigned int stdout_bc;
+
+/* Virtual IRQ for the byte channel handle for stdin, taken from device tree */
+static unsigned int stdout_irq;
+
+/**************************** SUPPORT FUNCTIONS ****************************/
+
+/*
+ * Enable the transmit interrupt
+ *
+ * Unlike a serial device, byte channels have no mechanism for disabling their
+ * own receive or transmit interrupts.  To emulate that feature, we toggle
+ * the IRQ in the kernel.
+ *
+ * We cannot just blindly call enable_irq() or disable_irq(), because these
+ * calls are reference counted.  This means that we cannot call enable_irq()
+ * if interrupts are already enabled.  This can happen in two situations:
+ *
+ * 1. The tty layer makes two back-to-back calls to ehv_bc_tty_write()
+ * 2. A transmit interrupt occurs while executing ehv_bc_tx_dequeue()
+ *
+ * To work around this, we keep a flag to tell us if the IRQ is enabled or not.
+ */
+static void enable_tx_interrupt(struct ehv_bc_data *bc)
+{
+	if (!bc->tx_irq_enabled) {
+		enable_irq(bc->tx_irq);
+		bc->tx_irq_enabled = 1;
+	}
+}
+
+static void disable_tx_interrupt(struct ehv_bc_data *bc)
+{
+	if (bc->tx_irq_enabled) {
+		disable_irq_nosync(bc->tx_irq);
+		bc->tx_irq_enabled = 0;
+	}
+}
+
+/*
+ * find the byte channel handle to use for the console
+ *
+ * The byte channel to be used for the console is specified via a "stdout"
+ * property in the /chosen node.
+ *
+ * For compatible with legacy device trees, we also look for a "stdout" alias.
+ */
+static int find_console_handle(void)
+{
+	struct device_node *np, *np2;
+	const char *sprop = NULL;
+	const uint32_t *iprop;
+
+	np = of_find_node_by_path("/chosen");
+	if (np)
+		sprop = of_get_property(np, "stdout-path", NULL);
+
+	if (!np || !sprop) {
+		of_node_put(np);
+		np = of_find_node_by_name(NULL, "aliases");
+		if (np)
+			sprop = of_get_property(np, "stdout", NULL);
+	}
+
+	if (!sprop) {
+		of_node_put(np);
+		return 0;
+	}
+
+	/* We don't care what the aliased node is actually called.  We only
+	 * care if it's compatible with "epapr,hv-byte-channel", because that
+	 * indicates that it's a byte channel node.  We use a temporary
+	 * variable, 'np2', because we can't release 'np' until we're done with
+	 * 'sprop'.
+	 */
+	np2 = of_find_node_by_path(sprop);
+	of_node_put(np);
+	np = np2;
+	if (!np) {
+		pr_warning("ehv-bc: stdout node '%s' does not exist\n", sprop);
+		return 0;
+	}
+
+	/* Is it a byte channel? */
+	if (!of_device_is_compatible(np, "epapr,hv-byte-channel")) {
+		of_node_put(np);
+		return 0;
+	}
+
+	stdout_irq = irq_of_parse_and_map(np, 0);
+	if (stdout_irq == NO_IRQ) {
+		pr_err("ehv-bc: no 'interrupts' property in %s node\n", sprop);
+		of_node_put(np);
+		return 0;
+	}
+
+	/*
+	 * The 'hv-handle' property contains the handle for this byte channel.
+	 */
+	iprop = of_get_property(np, "hv-handle", NULL);
+	if (!iprop) {
+		pr_err("ehv-bc: no 'hv-handle' property in %s node\n",
+		       np->name);
+		of_node_put(np);
+		return 0;
+	}
+	stdout_bc = be32_to_cpu(*iprop);
+
+	of_node_put(np);
+	return 1;
+}
+
+/*************************** EARLY CONSOLE DRIVER ***************************/
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_EHV_BC
+
+/*
+ * send a byte to a byte channel, wait if necessary
+ *
+ * This function sends a byte to a byte channel, and it waits and
+ * retries if the byte channel is full.  It returns if the character
+ * has been sent, or if some error has occurred.
+ *
+ */
+static void byte_channel_spin_send(const char data)
+{
+	int ret, count;
+
+	do {
+		count = 1;
+		ret = ev_byte_channel_send(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE,
+					   &count, &data);
+	} while (ret == EV_EAGAIN);
+}
+
+/*
+ * The udbg subsystem calls this function to display a single character.
+ * We convert CR to a CR/LF.
+ */
+static void ehv_bc_udbg_putc(char c)
+{
+	if (c == '\n')
+		byte_channel_spin_send('\r');
+
+	byte_channel_spin_send(c);
+}
+
+/*
+ * early console initialization
+ *
+ * PowerPC kernels support an early printk console, also known as udbg.
+ * This function must be called via the ppc_md.init_early function pointer.
+ * At this point, the device tree has been unflattened, so we can obtain the
+ * byte channel handle for stdout.
+ *
+ * We only support displaying of characters (putc).  We do not support
+ * keyboard input.
+ */
+void __init udbg_init_ehv_bc(void)
+{
+	unsigned int rx_count, tx_count;
+	unsigned int ret;
+
+	/* Check if we're running as a guest of a hypervisor */
+	if (!(mfmsr() & MSR_GS))
+		return;
+
+	/* Verify the byte channel handle */
+	ret = ev_byte_channel_poll(CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE,
+				   &rx_count, &tx_count);
+	if (ret)
+		return;
+
+	udbg_putc = ehv_bc_udbg_putc;
+	register_early_udbg_console();
+
+	udbg_printf("ehv-bc: early console using byte channel handle %u\n",
+		    CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE);
+}
+
+#endif
+
+/****************************** CONSOLE DRIVER ******************************/
+
+static struct tty_driver *ehv_bc_driver;
+
+/*
+ * Byte channel console sending worker function.
+ *
+ * For consoles, if the output buffer is full, we should just spin until it
+ * clears.
+ */
+static int ehv_bc_console_byte_channel_send(unsigned int handle, const char *s,
+			     unsigned int count)
+{
+	unsigned int len;
+	int ret = 0;
+
+	while (count) {
+		len = min_t(unsigned int, count, EV_BYTE_CHANNEL_MAX_BYTES);
+		do {
+			ret = ev_byte_channel_send(handle, &len, s);
+		} while (ret == EV_EAGAIN);
+		count -= len;
+		s += len;
+	}
+
+	return ret;
+}
+
+/*
+ * write a string to the console
+ *
+ * This function gets called to write a string from the kernel, typically from
+ * a printk().  This function spins until all data is written.
+ *
+ * We copy the data to a temporary buffer because we need to insert a \r in
+ * front of every \n.  It's more efficient to copy the data to the buffer than
+ * it is to make multiple hcalls for each character or each newline.
+ */
+static void ehv_bc_console_write(struct console *co, const char *s,
+				 unsigned int count)
+{
+	unsigned int handle = (unsigned int)co->data;
+	char s2[EV_BYTE_CHANNEL_MAX_BYTES];
+	unsigned int i, j = 0;
+	char c;
+
+	for (i = 0; i < count; i++) {
+		c = *s++;
+
+		if (c == '\n')
+			s2[j++] = '\r';
+
+		s2[j++] = c;
+		if (j >= (EV_BYTE_CHANNEL_MAX_BYTES - 1)) {
+			if (ehv_bc_console_byte_channel_send(handle, s2, j))
+				return;
+			j = 0;
+		}
+	}
+
+	if (j)
+		ehv_bc_console_byte_channel_send(handle, s2, j);
+}
+
+/*
+ * When /dev/console is opened, the kernel iterates the console list looking
+ * for one with ->device and then calls that method. On success, it expects
+ * the passed-in int* to contain the minor number to use.
+ */
+static struct tty_driver *ehv_bc_console_device(struct console *co, int *index)
+{
+	*index = co->index;
+
+	return ehv_bc_driver;
+}
+
+static struct console ehv_bc_console = {
+	.name		= "ttyEHV",
+	.write		= ehv_bc_console_write,
+	.device		= ehv_bc_console_device,
+	.flags		= CON_PRINTBUFFER | CON_ENABLED,
+};
+
+/*
+ * Console initialization
+ *
+ * This is the first function that is called after the device tree is
+ * available, so here is where we determine the byte channel handle and IRQ for
+ * stdout/stdin, even though that information is used by the tty and character
+ * drivers.
+ */
+static int __init ehv_bc_console_init(void)
+{
+	if (!find_console_handle()) {
+		pr_debug("ehv-bc: stdout is not a byte channel\n");
+		return -ENODEV;
+	}
+
+#ifdef CONFIG_PPC_EARLY_DEBUG_EHV_BC
+	/* Print a friendly warning if the user chose the wrong byte channel
+	 * handle for udbg.
+	 */
+	if (stdout_bc != CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE)
+		pr_warning("ehv-bc: udbg handle %u is not the stdout handle\n",
+			   CONFIG_PPC_EARLY_DEBUG_EHV_BC_HANDLE);
+#endif
+
+	ehv_bc_console.data = (void *)stdout_bc;
+
+	/* add_preferred_console() must be called before register_console(),
+	   otherwise it won't work.  However, we don't want to enumerate all the
+	   byte channels here, either, since we only care about one. */
+
+	add_preferred_console(ehv_bc_console.name, ehv_bc_console.index, NULL);
+	register_console(&ehv_bc_console);
+
+	pr_info("ehv-bc: registered console driver for byte channel %u\n",
+		stdout_bc);
+
+	return 0;
+}
+console_initcall(ehv_bc_console_init);
+
+/******************************** TTY DRIVER ********************************/
+
+/*
+ * byte channel receive interupt handler
+ *
+ * This ISR is called whenever data is available on a byte channel.
+ */
+static irqreturn_t ehv_bc_tty_rx_isr(int irq, void *data)
+{
+	struct ehv_bc_data *bc = data;
+	struct tty_struct *ttys = tty_port_tty_get(&bc->port);
+	unsigned int rx_count, tx_count, len;
+	int count;
+	char buffer[EV_BYTE_CHANNEL_MAX_BYTES];
+	int ret;
+
+	/* ttys could be NULL during a hangup */
+	if (!ttys)
+		return IRQ_HANDLED;
+
+	/* Find out how much data needs to be read, and then ask the TTY layer
+	 * if it can handle that much.  We want to ensure that every byte we
+	 * read from the byte channel will be accepted by the TTY layer.
+	 */
+	ev_byte_channel_poll(bc->handle, &rx_count, &tx_count);
+	count = tty_buffer_request_room(ttys, rx_count);
+
+	/* 'count' is the maximum amount of data the TTY layer can accept at
+	 * this time.  However, during testing, I was never able to get 'count'
+	 * to be less than 'rx_count'.  I'm not sure whether I'm calling it
+	 * correctly.
+	 */
+
+	while (count > 0) {
+		len = min_t(unsigned int, count, sizeof(buffer));
+
+		/* Read some data from the byte channel.  This function will
+		 * never return more than EV_BYTE_CHANNEL_MAX_BYTES bytes.
+		 */
+		ev_byte_channel_receive(bc->handle, &len, buffer);
+
+		/* 'len' is now the amount of data that's been received. 'len'
+		 * can't be zero, and most likely it's equal to one.
+		 */
+
+		/* Pass the received data to the tty layer. */
+		ret = tty_insert_flip_string(ttys, buffer, len);
+
+		/* 'ret' is the number of bytes that the TTY layer accepted.
+		 * If it's not equal to 'len', then it means the buffer is
+		 * full, which should never happen.  If it does happen, we can
+		 * exit gracefully, but we drop the last 'len - ret' characters
+		 * that we read from the byte channel.
+		 */
+		if (ret != len)
+			break;
+
+		count -= len;
+	}
+
+	/* Tell the tty layer that we're done. */
+	tty_flip_buffer_push(ttys);
+
+	tty_kref_put(ttys);
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * dequeue the transmit buffer to the hypervisor
+ *
+ * This function, which can be called in interrupt context, dequeues as much
+ * data as possible from the transmit buffer to the byte channel.
+ */
+static void ehv_bc_tx_dequeue(struct ehv_bc_data *bc)
+{
+	unsigned int count;
+	unsigned int len, ret;
+	unsigned long flags;
+
+	do {
+		spin_lock_irqsave(&bc->lock, flags);
+		len = min_t(unsigned int,
+			    CIRC_CNT_TO_END(bc->head, bc->tail, BUF_SIZE),
+			    EV_BYTE_CHANNEL_MAX_BYTES);
+
+		ret = ev_byte_channel_send(bc->handle, &len, bc->buf + bc->tail);
+
+		/* 'len' is valid only if the return code is 0 or EV_EAGAIN */
+		if (!ret || (ret == EV_EAGAIN))
+			bc->tail = (bc->tail + len) & (BUF_SIZE - 1);
+
+		count = CIRC_CNT(bc->head, bc->tail, BUF_SIZE);
+		spin_unlock_irqrestore(&bc->lock, flags);
+	} while (count && !ret);
+
+	spin_lock_irqsave(&bc->lock, flags);
+	if (CIRC_CNT(bc->head, bc->tail, BUF_SIZE))
+		/*
+		 * If we haven't emptied the buffer, then enable the TX IRQ.
+		 * We'll get an interrupt when there's more room in the
+		 * hypervisor's output buffer.
+		 */
+		enable_tx_interrupt(bc);
+	else
+		disable_tx_interrupt(bc);
+	spin_unlock_irqrestore(&bc->lock, flags);
+}
+
+/*
+ * byte channel transmit interupt handler
+ *
+ * This ISR is called whenever space becomes available for transmitting
+ * characters on a byte channel.
+ */
+static irqreturn_t ehv_bc_tty_tx_isr(int irq, void *data)
+{
+	struct ehv_bc_data *bc = data;
+	struct tty_struct *ttys = tty_port_tty_get(&bc->port);
+
+	ehv_bc_tx_dequeue(bc);
+	if (ttys) {
+		tty_wakeup(ttys);
+		tty_kref_put(ttys);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/*
+ * This function is called when the tty layer has data for us send.  We store
+ * the data first in a circular buffer, and then dequeue as much of that data
+ * as possible.
+ *
+ * We don't need to worry about whether there is enough room in the buffer for
+ * all the data.  The purpose of ehv_bc_tty_write_room() is to tell the tty
+ * layer how much data it can safely send to us.  We guarantee that
+ * ehv_bc_tty_write_room() will never lie, so the tty layer will never send us
+ * too much data.
+ */
+static int ehv_bc_tty_write(struct tty_struct *ttys, const unsigned char *s,
+			    int count)
+{
+	struct ehv_bc_data *bc = ttys->driver_data;
+	unsigned long flags;
+	unsigned int len;
+	unsigned int written = 0;
+
+	while (1) {
+		spin_lock_irqsave(&bc->lock, flags);
+		len = CIRC_SPACE_TO_END(bc->head, bc->tail, BUF_SIZE);
+		if (count < len)
+			len = count;
+		if (len) {
+			memcpy(bc->buf + bc->head, s, len);
+			bc->head = (bc->head + len) & (BUF_SIZE - 1);
+		}
+		spin_unlock_irqrestore(&bc->lock, flags);
+		if (!len)
+			break;
+
+		s += len;
+		count -= len;
+		written += len;
+	}
+
+	ehv_bc_tx_dequeue(bc);
+
+	return written;
+}
+
+/*
+ * This function can be called multiple times for a given tty_struct, which is
+ * why we initialize bc->ttys in ehv_bc_tty_port_activate() instead.
+ *
+ * The tty layer will still call this function even if the device was not
+ * registered (i.e. tty_register_device() was not called).  This happens
+ * because tty_register_device() is optional and some legacy drivers don't
+ * use it.  So we need to check for that.
+ */
+static int ehv_bc_tty_open(struct tty_struct *ttys, struct file *filp)
+{
+	struct ehv_bc_data *bc = &bcs[ttys->index];
+
+	if (!bc->dev)
+		return -ENODEV;
+
+	return tty_port_open(&bc->port, ttys, filp);
+}
+
+/*
+ * Amazingly, if ehv_bc_tty_open() returns an error code, the tty layer will
+ * still call this function to close the tty device.  So we can't assume that
+ * the tty port has been initialized.
+ */
+static void ehv_bc_tty_close(struct tty_struct *ttys, struct file *filp)
+{
+	struct ehv_bc_data *bc = &bcs[ttys->index];
+
+	if (bc->dev)
+		tty_port_close(&bc->port, ttys, filp);
+}
+
+/*
+ * Return the amount of space in the output buffer
+ *
+ * This is actually a contract between the driver and the tty layer outlining
+ * how much write room the driver can guarantee will be sent OR BUFFERED.  This
+ * driver MUST honor the return value.
+ */
+static int ehv_bc_tty_write_room(struct tty_struct *ttys)
+{
+	struct ehv_bc_data *bc = ttys->driver_data;
+	unsigned long flags;
+	int count;
+
+	spin_lock_irqsave(&bc->lock, flags);
+	count = CIRC_SPACE(bc->head, bc->tail, BUF_SIZE);
+	spin_unlock_irqrestore(&bc->lock, flags);
+
+	return count;
+}
+
+/*
+ * Stop sending data to the tty layer
+ *
+ * This function is called when the tty layer's input buffers are getting full,
+ * so the driver should stop sending it data.  The easiest way to do this is to
+ * disable the RX IRQ, which will prevent ehv_bc_tty_rx_isr() from being
+ * called.
+ *
+ * The hypervisor will continue to queue up any incoming data.  If there is any
+ * data in the queue when the RX interrupt is enabled, we'll immediately get an
+ * RX interrupt.
+ */
+static void ehv_bc_tty_throttle(struct tty_struct *ttys)
+{
+	struct ehv_bc_data *bc = ttys->driver_data;
+
+	disable_irq(bc->rx_irq);
+}
+
+/*
+ * Resume sending data to the tty layer
+ *
+ * This function is called after previously calling ehv_bc_tty_throttle().  The
+ * tty layer's input buffers now have more room, so the driver can resume
+ * sending it data.
+ */
+static void ehv_bc_tty_unthrottle(struct tty_struct *ttys)
+{
+	struct ehv_bc_data *bc = ttys->driver_data;
+
+	/* If there is any data in the queue when the RX interrupt is enabled,
+	 * we'll immediately get an RX interrupt.
+	 */
+	enable_irq(bc->rx_irq);
+}
+
+static void ehv_bc_tty_hangup(struct tty_struct *ttys)
+{
+	struct ehv_bc_data *bc = ttys->driver_data;
+
+	ehv_bc_tx_dequeue(bc);
+	tty_port_hangup(&bc->port);
+}
+
+/*
+ * TTY driver operations
+ *
+ * If we could ask the hypervisor how much data is still in the TX buffer, or
+ * at least how big the TX buffers are, then we could implement the
+ * .wait_until_sent and .chars_in_buffer functions.
+ */
+static const struct tty_operations ehv_bc_ops = {
+	.open		= ehv_bc_tty_open,
+	.close		= ehv_bc_tty_close,
+	.write		= ehv_bc_tty_write,
+	.write_room	= ehv_bc_tty_write_room,
+	.throttle	= ehv_bc_tty_throttle,
+	.unthrottle	= ehv_bc_tty_unthrottle,
+	.hangup		= ehv_bc_tty_hangup,
+};
+
+/*
+ * initialize the TTY port
+ *
+ * This function will only be called once, no matter how many times
+ * ehv_bc_tty_open() is called.  That's why we register the ISR here, and also
+ * why we initialize tty_struct-related variables here.
+ */
+static int ehv_bc_tty_port_activate(struct tty_port *port,
+				    struct tty_struct *ttys)
+{
+	struct ehv_bc_data *bc = container_of(port, struct ehv_bc_data, port);
+	int ret;
+
+	ttys->driver_data = bc;
+
+	ret = request_irq(bc->rx_irq, ehv_bc_tty_rx_isr, 0, "ehv-bc", bc);
+	if (ret < 0) {
+		dev_err(bc->dev, "could not request rx irq %u (ret=%i)\n",
+		       bc->rx_irq, ret);
+		return ret;
+	}
+
+	/* request_irq also enables the IRQ */
+	bc->tx_irq_enabled = 1;
+
+	ret = request_irq(bc->tx_irq, ehv_bc_tty_tx_isr, 0, "ehv-bc", bc);
+	if (ret < 0) {
+		dev_err(bc->dev, "could not request tx irq %u (ret=%i)\n",
+		       bc->tx_irq, ret);
+		free_irq(bc->rx_irq, bc);
+		return ret;
+	}
+
+	/* The TX IRQ is enabled only when we can't write all the data to the
+	 * byte channel at once, so by default it's disabled.
+	 */
+	disable_tx_interrupt(bc);
+
+	return 0;
+}
+
+static void ehv_bc_tty_port_shutdown(struct tty_port *port)
+{
+	struct ehv_bc_data *bc = container_of(port, struct ehv_bc_data, port);
+
+	free_irq(bc->tx_irq, bc);
+	free_irq(bc->rx_irq, bc);
+}
+
+static const struct tty_port_operations ehv_bc_tty_port_ops = {
+	.activate = ehv_bc_tty_port_activate,
+	.shutdown = ehv_bc_tty_port_shutdown,
+};
+
+static int __devinit ehv_bc_tty_probe(struct platform_device *pdev)
+{
+	struct device_node *np = pdev->dev.of_node;
+	struct ehv_bc_data *bc;
+	const uint32_t *iprop;
+	unsigned int handle;
+	int ret;
+	static unsigned int index = 1;
+	unsigned int i;
+
+	iprop = of_get_property(np, "hv-handle", NULL);
+	if (!iprop) {
+		dev_err(&pdev->dev, "no 'hv-handle' property in %s node\n",
+			np->name);
+		return -ENODEV;
+	}
+
+	/* We already told the console layer that the index for the console
+	 * device is zero, so we need to make sure that we use that index when
+	 * we probe the console byte channel node.
+	 */
+	handle = be32_to_cpu(*iprop);
+	i = (handle == stdout_bc) ? 0 : index++;
+	bc = &bcs[i];
+
+	bc->handle = handle;
+	bc->head = 0;
+	bc->tail = 0;
+	spin_lock_init(&bc->lock);
+
+	bc->rx_irq = irq_of_parse_and_map(np, 0);
+	bc->tx_irq = irq_of_parse_and_map(np, 1);
+	if ((bc->rx_irq == NO_IRQ) || (bc->tx_irq == NO_IRQ)) {
+		dev_err(&pdev->dev, "no 'interrupts' property in %s node\n",
+			np->name);
+		ret = -ENODEV;
+		goto error;
+	}
+
+	bc->dev = tty_register_device(ehv_bc_driver, i, &pdev->dev);
+	if (IS_ERR(bc->dev)) {
+		ret = PTR_ERR(bc->dev);
+		dev_err(&pdev->dev, "could not register tty (ret=%i)\n", ret);
+		goto error;
+	}
+
+	tty_port_init(&bc->port);
+	bc->port.ops = &ehv_bc_tty_port_ops;
+
+	dev_set_drvdata(&pdev->dev, bc);
+
+	dev_info(&pdev->dev, "registered /dev/%s%u for byte channel %u\n",
+		ehv_bc_driver->name, i, bc->handle);
+
+	return 0;
+
+error:
+	irq_dispose_mapping(bc->tx_irq);
+	irq_dispose_mapping(bc->rx_irq);
+
+	memset(bc, 0, sizeof(struct ehv_bc_data));
+	return ret;
+}
+
+static int ehv_bc_tty_remove(struct platform_device *pdev)
+{
+	struct ehv_bc_data *bc = dev_get_drvdata(&pdev->dev);
+
+	tty_unregister_device(ehv_bc_driver, bc - bcs);
+
+	irq_dispose_mapping(bc->tx_irq);
+	irq_dispose_mapping(bc->rx_irq);
+
+	return 0;
+}
+
+static const struct of_device_id ehv_bc_tty_of_ids[] = {
+	{ .compatible = "epapr,hv-byte-channel" },
+	{}
+};
+
+static struct platform_driver ehv_bc_tty_driver = {
+	.driver = {
+		.owner = THIS_MODULE,
+		.name = "ehv-bc",
+		.of_match_table = ehv_bc_tty_of_ids,
+	},
+	.probe		= ehv_bc_tty_probe,
+	.remove		= ehv_bc_tty_remove,
+};
+
+/**
+ * ehv_bc_init - ePAPR hypervisor byte channel driver initialization
+ *
+ * This function is called when this module is loaded.
+ */
+static int __init ehv_bc_init(void)
+{
+	struct device_node *np;
+	unsigned int count = 0; /* Number of elements in bcs[] */
+	int ret;
+
+	pr_info("ePAPR hypervisor byte channel driver\n");
+
+	/* Count the number of byte channels */
+	for_each_compatible_node(np, NULL, "epapr,hv-byte-channel")
+		count++;
+
+	if (!count)
+		return -ENODEV;
+
+	/* The array index of an element in bcs[] is the same as the tty index
+	 * for that element.  If you know the address of an element in the
+	 * array, then you can use pointer math (e.g. "bc - bcs") to get its
+	 * tty index.
+	 */
+	bcs = kzalloc(count * sizeof(struct ehv_bc_data), GFP_KERNEL);
+	if (!bcs)
+		return -ENOMEM;
+
+	ehv_bc_driver = alloc_tty_driver(count);
+	if (!ehv_bc_driver) {
+		ret = -ENOMEM;
+		goto error;
+	}
+
+	ehv_bc_driver->owner = THIS_MODULE;
+	ehv_bc_driver->driver_name = "ehv-bc";
+	ehv_bc_driver->name = ehv_bc_console.name;
+	ehv_bc_driver->type = TTY_DRIVER_TYPE_CONSOLE;
+	ehv_bc_driver->subtype = SYSTEM_TYPE_CONSOLE;
+	ehv_bc_driver->init_termios = tty_std_termios;
+	ehv_bc_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV;
+	tty_set_operations(ehv_bc_driver, &ehv_bc_ops);
+
+	ret = tty_register_driver(ehv_bc_driver);
+	if (ret) {
+		pr_err("ehv-bc: could not register tty driver (ret=%i)\n", ret);
+		goto error;
+	}
+
+	ret = platform_driver_register(&ehv_bc_tty_driver);
+	if (ret) {
+		pr_err("ehv-bc: could not register platform driver (ret=%i)\n",
+		       ret);
+		goto error;
+	}
+
+	return 0;
+
+error:
+	if (ehv_bc_driver) {
+		tty_unregister_driver(ehv_bc_driver);
+		put_tty_driver(ehv_bc_driver);
+	}
+
+	kfree(bcs);
+
+	return ret;
+}
+
+
+/**
+ * ehv_bc_exit - ePAPR hypervisor byte channel driver termination
+ *
+ * This function is called when this driver is unloaded.
+ */
+static void __exit ehv_bc_exit(void)
+{
+	tty_unregister_driver(ehv_bc_driver);
+	put_tty_driver(ehv_bc_driver);
+	kfree(bcs);
+}
+
+module_init(ehv_bc_init);
+module_exit(ehv_bc_exit);
+
+MODULE_AUTHOR("Timur Tabi <timur@freescale.com>");
+MODULE_DESCRIPTION("ePAPR hypervisor byte channel driver");
+MODULE_LICENSE("GPL v2");
-- 
1.7.3.4

^ permalink raw reply related

* Re: [PATCH 1/3] mpt2sas: remove the use of writeq, since writeq is not atomic
From: Ingo Molnar @ 2011-05-19 18:15 UTC (permalink / raw)
  To: Benjamin Herrenschmidt, Thomas Gleixner, H. Peter Anvin
  Cc: Roland Dreier, Prakash, Sathya, linux-arch, Desai, Kashyap,
	linux scsi dev, Matthew Wilcox, Hitoshi Mitake, linux powerpc dev,
	Milton Miller, linux kernel, James Bottomley, Ingo Molnar,
	paulus@samba.org, linux pci, Sam Ravnborg
In-Reply-To: <1305783242.7481.42.camel@pasglop>


* Benjamin Herrenschmidt <benh@kernel.crashing.org> wrote:

> On Wed, 2011-05-18 at 21:16 -0700, Roland Dreier wrote:
> > On Wed, May 18, 2011 at 11:31 AM, Milton Miller <miltonm@bga.com> wrote:
> > > So the real question should be why is x86-32 supplying a broken writeq
> > > instead of letting drivers work out what to do it when needed?
> > 
> > Sounds a lot like what I was asking a couple of years ago :)
> > http://lkml.org/lkml/2009/4/19/164
> > 
> > But Ingo insisted that non-atomic writeq would be fine:
> > http://lkml.org/lkml/2009/4/19/167
> 
> Yuck... Ingo, I think that was very wrong.
> 
> Those are for MMIO, which must almost ALWAYS know precisely what the
> resulting access size is going to be. It's not even about atomicity
> between multiple CPUs. I have seen plenty of HW for which a 64-bit
> access to a register is -not- equivalent to two 32-bit ones. In fact, in
> some case, you can get the side effects twice ... or none at all.
> 
> The only case where you can be lax is when you explicitely know that
> there is no side effects -and- the HW cope with different access sizes.
> This is not the general case and drivers need at the very least a way to
> know what the behaviour will be.

Ok, that's pretty convincing.

Unless hpa or tglx disagrees with reverting this, could any of you send a patch 
with a proper changelog etc. that applies cleanly to v2.6.39?

Thanks,

	Ingo

^ permalink raw reply

* [11/71] hw_breakpoints, powerpc: Fix CONFIG_HAVE_HW_BREAKPOINT off-case in ptrace_set_debugreg()
From: Greg KH @ 2011-05-19 18:04 UTC (permalink / raw)
  To: linux-kernel, stable
  Cc: Frederic Weisbecker, Ingo Molnar, torvalds, Prasad, akpm, LPPC,
	stable-review, alan
In-Reply-To: <20110519180626.GA16555@kroah.com>

2.6.38-stable review patch.  If anyone has any objections, please let us know.

------------------

From: Frederic Weisbecker <fweisbec@gmail.com>

commit 925f83c085e1bb08435556c5b4844a60de002e31 upstream.

We make use of ptrace_get_breakpoints() / ptrace_put_breakpoints() to
protect ptrace_set_debugreg() even if CONFIG_HAVE_HW_BREAKPOINT if off.
However in this case, these APIs are not implemented.

To fix this, push the protection down inside the relevant ifdef.
Best would be to export the code inside
CONFIG_HAVE_HW_BREAKPOINT into a standalone function to cleanup
the ifdefury there and call the breakpoint ref API inside. But
as it is more invasive, this should be rather made in an -rc1.

Fixes this build error:

  arch/powerpc/kernel/ptrace.c:1594: error: implicit declaration of function 'ptrace_get_breakpoints' make[2]: ***

Reported-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: LPPC <linuxppc-dev@lists.ozlabs.org>
Cc: Prasad <prasad@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1304639598-4707-1-git-send-email-fweisbec@gmail.com
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
 arch/powerpc/kernel/ptrace.c |   12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -924,12 +924,16 @@ int ptrace_set_debugreg(struct task_stru
 	if (data && !(data & DABR_TRANSLATION))
 		return -EIO;
 #ifdef CONFIG_HAVE_HW_BREAKPOINT
+	if (ptrace_get_breakpoints(task) < 0)
+		return -ESRCH;
+
 	bp = thread->ptrace_bps[0];
 	if ((!data) || !(data & (DABR_DATA_WRITE | DABR_DATA_READ))) {
 		if (bp) {
 			unregister_hw_breakpoint(bp);
 			thread->ptrace_bps[0] = NULL;
 		}
+		ptrace_put_breakpoints(task);
 		return 0;
 	}
 	if (bp) {
@@ -939,9 +943,12 @@ int ptrace_set_debugreg(struct task_stru
 					(DABR_DATA_WRITE | DABR_DATA_READ),
 							&attr.bp_type);
 		ret =  modify_user_hw_breakpoint(bp, &attr);
-		if (ret)
+		if (ret) {
+			ptrace_put_breakpoints(task);
 			return ret;
+		}
 		thread->ptrace_bps[0] = bp;
+		ptrace_put_breakpoints(task);
 		thread->dabr = data;
 		return 0;
 	}
@@ -956,9 +963,12 @@ int ptrace_set_debugreg(struct task_stru
 							ptrace_triggered, task);
 	if (IS_ERR(bp)) {
 		thread->ptrace_bps[0] = NULL;
+		ptrace_put_breakpoints(task);
 		return PTR_ERR(bp);
 	}
 
+	ptrace_put_breakpoints(task);
+
 #endif /* CONFIG_HAVE_HW_BREAKPOINT */
 
 	/* Move contents to the DABR register */

^ permalink raw reply

* Re: Kernel cannot see PCI device
From: Bjorn Helgaas @ 2011-05-19 17:58 UTC (permalink / raw)
  To: Prashant Bhole; +Cc: linux-pci, linuxppc-dev
In-Reply-To: <BANLkTinNSXCQJ=WJm6CkfttOgpVz1XOCBQ@mail.gmail.com>

On Thu, May 19, 2011 at 6:41 AM, Prashant Bhole
<prashantsmailcenter@gmail.com> wrote:
> On Wed, May 18, 2011 at 7:44 PM, Bjorn Helgaas <bhelgaas@google.com> wrot=
e:
>> On Wed, May 18, 2011 at 4:02 AM, Prashant Bhole
>> <prashantsmailcenter@gmail.com> wrote:
>>> On Mon, May 2, 2011 at 10:21 AM, Prashant Bhole
>>> <prashantsmailcenter@gmail.com> wrote:
>>>>
>>>> I have a custom made powerpc 460EX board. On that board u-boot
>>>> can see a PCI device but Linux kernel cannot see it. What could be the=
 problem?
>>>>
>>>> On u-boot "pci =A02" commands displays following device:
>>>> Scanning PCI devices on bus 2
>>>> BusDevFun =A0VendorId =A0 DeviceId =A0 Device Class =A0 =A0 =A0 Sub-Cl=
ass
>>>> _____________________________________________________________
>>>> 02.00.00 =A0 0x1000 =A0 =A0 0x0072 =A0 =A0 Mass storage controller 0x0=
0
>>>>
>>>> And when the kernel is booted, there is only one pci device (bridge):
>>>> #ls /sys/bus/pci/devices
>>>> 0000:80:00.0
>>>
>>> a call to pci_bus_read_config_dword(bus, devfn, PCI_VENDOR_ID, &l) retu=
rns
>>> positive value in the function pci_scan_device(), which means VENDOR_ID=
 reading
>>> failed. I could not find the reason. Any hints?
>>
>> My guess would be that Linux didn't find the host bridge to the
>> hierarchy containing bus 2. =A0I would guess the host bridge info is
>> supposed to come from OF. =A0More information, like the complete u-boot
>> PCI scan and the kernel dmesg log, would be useful. =A0And maybe u-boot
>> has a way to dump the OF device tree?
>
> I am not using OF for booting u-boot. OF is being used for booting the ke=
rnel.
> Not sure whether the host bridge info is coming from OF.
> Device tree has two pciex nodes with following property:
>
> compatible =3D "ibm,plb-pciex-460ex", "ibm,plb-pciex"
> I think that node represents a pciex bus not the bridge.

I think you have three PCI host bridges:

  PCIE1 to domain 0000 [bus 80-bf]  /plb/pciex@d20000000
  PCIX0 to domain 0001 [bus 00-3f]  /plb/pci@c0ec00000
  PCIE0 to [bus 40-7f] ("disabled via device-tree")

The scan below PCIE1 seems to work; at least, we found a P2P bridge at
0000:80:00.0.

The scan below PCIX0 (bus 0001:00) doesn't find anything.  You really
need a powerpc expert to help here, but in their absence, my guess
would be something's wrong with config space access, so I would start
by just adding some printks to ppc4xx_probe_pcix_bridge() to see if
the rsrc_cfg address looks reasonable.  You might need a chip spec or
maybe you can compare it to the device tree (I have no idea what the
relation between the device tree and OF is).

You mentioned the u-boot "pci 2" command earlier.  It found a device
on bus 2, which means there must be at least one P2P bridge to get you
from bus 0 to bus 2.  So the output of "pci 0", "pci 1", "pci 80", and
"pci 81" (to compare with what Linux found) would be interesting.

Bjorn

^ permalink raw reply

* Re: [PATCH 5/8] powerpc: override dma_get_required_mask by platform hook and ops
From: Nishanth Aravamudan @ 2011-05-19 17:46 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: cbe-oss-dev, FUJITA Tomonori, Arnd Bergmann, Sonny Rao,
	devicetree-discuss, linux-kernel, Milton Miller, Paul Mackerras,
	Anton Blanchard, Will Schmidt, Andrew Morton, linuxppc-dev
In-Reply-To: <1305791036.7481.52.camel@pasglop>

On 19.05.2011 [17:43:56 +1000], Benjamin Herrenschmidt wrote:
> On Wed, 2011-05-11 at 15:25 -0700, Nishanth Aravamudan wrote:
> > From: Milton Miller <miltonm@bga.com>
> > 
> > The hook dma_get_required_mask is supposed to return the mask required
> > by the platform to operate efficently.  The generic version of
> > dma_get_required_mask in driver/base/platform.c returns a mask based
> > only on max_pfn.  However, this is likely too big for iommu systems
> > and could be too small for platforms that require a dma offset or have
> > a secondary window at a high offset.
> 
> The result of those 3 patches doesn't build on top of my current tree,
> the generic dma_ops lacks the dma_get_required_mask hook. I'll have a
> look again after the merge window.

Hrm, I think it's because for whatever reason [1] I forgot to cc you on 6/8?

https://lkml.org/lkml/2011/5/11/473

-Nish

[1] AKA over-reliance on get_maintainer.pl and lack of mental oversight!

-- 
Nishanth Aravamudan <nacc@us.ibm.com>
IBM Linux Technology Center

^ permalink raw reply

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Alan Cox @ 2011-05-19 17:25 UTC (permalink / raw)
  To: Timur Tabi
  Cc: kumar.gala, linux-kernel, akpm, linux-console, greg, linuxppc-dev
In-Reply-To: <4DD545E4.9090408@freescale.com>

On Thu, 19 May 2011 11:31:32 -0500
Timur Tabi <timur@freescale.com> wrote:

> Alan Cox wrote:
> > You really also need a hangup method so vhangup() does the right thing
> > and you can securely do logins etc and sessions on your console. As
> > you've got no hardware entangled in this and you already use tty_port
> > helpers the hangup helper will do the work for you.
> 
> So all I need is this?
> 
> static void ehv_bc_tty_hangup(struct tty_struct *ttys)
> {
> 	struct ehv_bc_data *bc = ttys->driver_data;
> 
> 	tty_port_hangup(&bc->port);
> }
> 
> I've noticed that some drivers flush their transmit buffers before calling
> tty_port_hangup(), but some others don't.  Should I do this too?  I don't know
> if hangup should be as quick as possible.

Doesn't matter too much. If you can flush it quickly then do so

^ permalink raw reply

* Re: [PATCH v2 0/7] Consolidate sdhci pltfm & OF drivers and get them self registered
From: Wolfram Sang @ 2011-05-19 17:05 UTC (permalink / raw)
  To: Shawn Guo
  Cc: Chris Ball, Anton Vorontsov, sameo, Arnd Bergmann, patches,
	devicetree-discuss, linux-mmc, Saeed Bishara, Xiaobo Xie, kernel,
	Mike Rapoport, Olof Johansson, Shawn Guo, linuxppc-dev,
	Albert Herranz, linux-arm-kernel
In-Reply-To: <20110519160901.GC26816@S2100-06.ap.freescale.net>

[-- Attachment #1: Type: text/plain, Size: 784 bytes --]

Hi Shawn,

> Should I go for v3 right now to address the patch applying problems
> and that ESDHC_IMX build issue, or hold for a while to see if you
> have more comments on v2?

Please wait a little bit more.

> And what is your position on patch #5 which merges esdhc imx and mpc
> support into one?  As Anton has voted a NO there, I would probably
> drop the patch if there is another person has strong opinion to get
> imx and mpc stay separated.

This was the other main issue I spotted so far. I wanted to have another look
tomorrow, yet the tendency is that I agree with Anton.

Regards,

   Wolfram

-- 
Pengutronix e.K.                           | Wolfram Sang                |
Industrial Linux Solutions                 | http://www.pengutronix.de/  |

[-- Attachment #2: Digital signature --]
[-- Type: application/pgp-signature, Size: 198 bytes --]

^ permalink raw reply

* [PATCH] Maple: register CPC925 EDAC device on all boards with CPC925
From: Dmitry Eremin-Solenikov @ 2011-05-19 16:36 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: Paul Mackerras

Currently Maple setup code creates cpc925_edac device only on
Motorola ATCA-6101 blade. Make setup code check bridge revision
and enable EDAC on all U3 bridges.

Verified on Momentum MapleD (ppc970fx kit) board.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
---
 arch/powerpc/platforms/maple/setup.c |   41 +++++++++++++++------------------
 1 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c
index fe34c3d..2c48a91 100644
--- a/arch/powerpc/platforms/maple/setup.c
+++ b/arch/powerpc/platforms/maple/setup.c
@@ -338,35 +338,16 @@ define_machine(maple) {
 #ifdef CONFIG_EDAC
 /*
  * Register a platform device for CPC925 memory controller on
- * Motorola ATCA-6101 blade.
+ * all boards with U3 (CPC925) bridge.
  */
-#define MAPLE_CPC925_MODEL	"Motorola,ATCA-6101"
 static int __init maple_cpc925_edac_setup(void)
 {
 	struct platform_device *pdev;
 	struct device_node *np = NULL;
 	struct resource r;
-	const unsigned char *model;
 	int ret;
-
-	np = of_find_node_by_path("/");
-	if (!np) {
-		printk(KERN_ERR "%s: Unable to get root node\n", __func__);
-		return -ENODEV;
-	}
-
-	model = (const unsigned char *)of_get_property(np, "model", NULL);
-	if (!model) {
-		printk(KERN_ERR "%s: Unabel to get model info\n", __func__);
-		of_node_put(np);
-		return -ENODEV;
-	}
-
-	ret = strcmp(model, MAPLE_CPC925_MODEL);
-	of_node_put(np);
-
-	if (ret != 0)
-		return 0;
+	volatile void __iomem *mem;
+	u32 rev;
 
 	np = of_find_node_by_type(NULL, "memory-controller");
 	if (!np) {
@@ -384,6 +365,22 @@ static int __init maple_cpc925_edac_setup(void)
 		return -ENODEV;
 	}
 
+	mem = ioremap(r.start, resource_size(&r));
+	if (!mem) {
+		printk(KERN_ERR "%s: Unable to map memory-controller memory\n",
+				__func__);
+		return -ENOMEM;
+	}
+
+	rev = __raw_readl(mem);
+	iounmap(mem);
+
+	if ((rev & 0xf0) != 0x30) { /* U3 */
+		printk(KERN_ERR "%s: Non-CPC925(U3) bridge revision: %02x\n",
+			__func__, rev);
+		return -ENODEV;
+	}
+
 	pdev = platform_device_register_simple("cpc925_edac", 0, &r, 1);
 	if (IS_ERR(pdev))
 		return PTR_ERR(pdev);
-- 
1.7.4.4

^ permalink raw reply related

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Timur Tabi @ 2011-05-19 16:31 UTC (permalink / raw)
  To: Alan Cox
  Cc: kumar.gala, linux-kernel, akpm, linux-console, greg, linuxppc-dev
In-Reply-To: <20110519153358.5876f310@lxorguk.ukuu.org.uk>

Alan Cox wrote:
> You really also need a hangup method so vhangup() does the right thing
> and you can securely do logins etc and sessions on your console. As
> you've got no hardware entangled in this and you already use tty_port
> helpers the hangup helper will do the work for you.

So all I need is this?

static void ehv_bc_tty_hangup(struct tty_struct *ttys)
{
	struct ehv_bc_data *bc = ttys->driver_data;

	tty_port_hangup(&bc->port);
}

I've noticed that some drivers flush their transmit buffers before calling
tty_port_hangup(), but some others don't.  Should I do this too?  I don't know
if hangup should be as quick as possible.

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Timur Tabi @ 2011-05-19 16:18 UTC (permalink / raw)
  To: Greg KH
  Cc: kumar.gala, linux-kernel, akpm, linux-console, linuxppc-dev,
	Alan Cox
In-Reply-To: <20110519160239.GB25606@kroah.com>

Greg KH wrote:
> It's too late, it needed to be in linux-next _before_ the window opened.
> 
> sorry,

Curses!  Foiled again!

Well, then I'd like to get this patchset fixed up and approved soon after the
window closes, so that there's no excuse for missing 2.6.41.

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [PATCH RFCv7 0/2] CARMA Board Support
From: Ira W. Snyder @ 2011-05-19 16:10 UTC (permalink / raw)
  To: Benjamin Herrenschmidt; +Cc: dmitry.torokhov, linuxppc-dev, linux-kernel
In-Reply-To: <1305778421.7481.38.camel@pasglop>

On Thu, May 19, 2011 at 02:13:41PM +1000, Benjamin Herrenschmidt wrote:
> On Fri, 2011-02-11 at 15:34 -0800, Ira W. Snyder wrote:
> > Hello everyone,
> > 
> > This is the seventh posting of these drivers, taking into account comments
> > from earlier postings. I've made sure that the drivers both pass checkpatch
> > without any errors or warnings. I would appreciate as much review as you
> > can offer, so that these can get into the next merge cycle. They've been
> > sitting outside mainline for far too long.
> 
> This has been bitrotting for way too long indeed. I'm sticking this into
> powerpc -next today.
> 

Thanks Ben.

I'll grab the -next tree and make sure it builds on my board. I don't
think any API's have changed, but I will send an updated version if they
have.

Thanks,
Ira

> > RFCv6 -> RFCv7:
> > - reference count private data structure (to support unbind)
> > - use #defines instead of hex values for registers
> > - keep lines <=80 characters
> > 
> > RFCv5 -> RFCv6:
> > - change locking in several functions
> > - use list_move_tail() to simplify code
> > - remove unused helper functions
> > 
> > RFCv4 -> RFCv5:
> > - remove unecessary locking per review comments
> > - do not clobber return values from *_interruptible()
> > - explicitly track buffer DMA mapping
> > - use #defines instead of raw hex addresses
> > - change enable sysfs attribute to root-writeable only
> > 
> > RFCv3 -> RFCv4:
> > - updates for DATA-FPGA version 2
> > 
> > RFCv2 -> RFCv3:
> > - use miscdevice framework (removing the carma class)
> > - add bitfile readback capability to the programmer
> > 
> > RFCv1 -> RFCv2:
> > - change comments to kerneldoc format
> > - Kconfig improvements
> > - use the videobuf_dma_sg API in the programmer
> > - updates for Freescale DMAEngine DMA_SLAVE API changes
> > 
> > KNOWN ISSUES:
> > - untested with a setup that can generate interrupts (will get access soon)
> > - does not handle runtime "unbind"
> > 
> > Information about the CARMA board:
> > 
> > The CARMA board is essentially an MPC8349EA MDS reference design with a
> > 1GHz ADC and 4 high powered data processing FPGAs connected to the local
> > bus. It is all packed into a compact PCI form factor. It is used at the
> > Owens Valley Radio Observatory as the main component in the correlator
> > system.
> > 
> > For board information, see:
> > http://www.mmarray.org/~dwh/carma_board/index.html
> > 
> > For DATA-FPGA register layout, see:
> > http://www.mmarray.org/memos/carma_memo46.pdf
> > 
> > These drivers are the necessary pieces to get the data processing FPGAs
> > working and producing data. Despite the fact that the hardware is custom
> > and we are the only users, I'd still like to get the drivers upstream.
> > Several people have suggested that this is possible.
> > 
> > Some further patches will be forthcoming. I have a driver for the LED
> > subsystem and the PPS subsystem. The LED register layout is expected to
> > change soon, so I won't post the driver until that is finished. The PPS
> > driver will be posted seperately from this patch series; it is very
> > generic.
> > 
> > Thanks to everyone who has provided comments on earlier versions!
> > 
> > Ira W. Snyder (2):
> >   misc: add CARMA DATA-FPGA Access Driver
> >   misc: add CARMA DATA-FPGA Programmer support
> > 
> >  drivers/misc/Kconfig                    |    1 +
> >  drivers/misc/Makefile                   |    1 +
> >  drivers/misc/carma/Kconfig              |   18 +
> >  drivers/misc/carma/Makefile             |    2 +
> >  drivers/misc/carma/carma-fpga-program.c | 1141 ++++++++++++++++++++++++
> >  drivers/misc/carma/carma-fpga.c         | 1433 +++++++++++++++++++++++++++++++
> >  6 files changed, 2596 insertions(+), 0 deletions(-)
> >  create mode 100644 drivers/misc/carma/Kconfig
> >  create mode 100644 drivers/misc/carma/Makefile
> >  create mode 100644 drivers/misc/carma/carma-fpga-program.c
> >  create mode 100644 drivers/misc/carma/carma-fpga.c
> > 
> 
> 

^ permalink raw reply

* Re: [PATCH v2 0/7] Consolidate sdhci pltfm & OF drivers and get them self registered
From: Shawn Guo @ 2011-05-19 16:09 UTC (permalink / raw)
  To: Wolfram Sang
  Cc: Chris Ball, Anton Vorontsov, sameo, Arnd Bergmann, patches,
	devicetree-discuss, linux-mmc, Saeed Bishara, Xiaobo Xie, kernel,
	Mike Rapoport, Olof Johansson, Shawn Guo, linuxppc-dev,
	Albert Herranz, linux-arm-kernel
In-Reply-To: <20110519094046.GA2219@pengutronix.de>

Hi Wolfram,

On Thu, May 19, 2011 at 11:40:46AM +0200, Wolfram Sang wrote:
> Hi Shawn,
> 
> > Changes since v1:
> >  * Rebase on cjb's mmc-next tree
> 
> Is it maybe possible that you get access to
> http://opensource.freescale.com/git or another machine? A branch to pull
> from would be more convenient, because the series does not apply to
> mmc-next anymore, so an extra step to go "back in time" is needed.
> 
> (minor) When applying I got:
> 
> Applying: mmc: sdhci: make sdhci-pltfm device drivers self registered
> /home/wsa/Kernel/linux-2.6/.git/rebase-apply/patch:384: trailing whitespace.
> /home/wsa/Kernel/linux-2.6/.git/rebase-apply/patch:817: space before tab in indent.
>  	struct tegra_sdhci_platform_data *plat;
> /home/wsa/Kernel/linux-2.6/.git/rebase-apply/patch:867: trailing whitespace.
> 
> Applying: sdhci: rename sdhci-esdhc-imx.c to sdhci-esdhc.c
> /home/wsa/Kernel/linux-2.6/.git/rebase-apply/patch:780: trailing whitespace.
> 
> See later comments for further issues.
> 
Should I go for v3 right now to address the patch applying problems
and that ESDHC_IMX build issue, or hold for a while to see if you
have more comments on v2?

And what is your position on patch #5 which merges esdhc imx and mpc
support into one?  As Anton has voted a NO there, I would probably
drop the patch if there is another person has strong opinion to get
imx and mpc stay separated.

-- 
Regards,
Shawn

^ permalink raw reply

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Timur Tabi @ 2011-05-19 16:05 UTC (permalink / raw)
  To: Alan Cox
  Cc: kumar.gala, linux-kernel, akpm, linux-console, greg, linuxppc-dev
In-Reply-To: <20110519153358.5876f310@lxorguk.ukuu.org.uk>

Alan Cox wrote:
>> > +		/* Pass the received data to the tty layer.  Note that this
>> > +		 * function calls tty_buffer_request_room(), so I'm not sure if
>> > +		 * we should have also called tty_buffer_request_room().
>> > +		 */
>> > +		ret = tty_insert_flip_string(ttys, buffer, len);

> You only need to request_room in advance if you can't handle the case
> where the insert_flip_string returns less than you stuffed down it.

If tty_insert_flip_string() returns less than I stuffed down it, the characters
it didn't accept will be dropped.  That's because once I receive them, I have
nowhere else to put them.  I suppose I could implement a receive FIFO, but that
seems overkill.  If calling tty_buffer_request_room() ensures that
tty_insert_flip_string() always accepts all the characters, I would rather do that.

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Greg KH @ 2011-05-19 16:02 UTC (permalink / raw)
  To: Timur Tabi
  Cc: kumar.gala, linux-kernel, akpm, linux-console, linuxppc-dev,
	Alan Cox
In-Reply-To: <4DD53D1B.4060600@freescale.com>

On Thu, May 19, 2011 at 10:54:03AM -0500, Timur Tabi wrote:
> > Depends if the functionality is useful in your environment or not
> 
> It is, but I'd like to add it later so that I can make the 2.6.40 window (if it
> isn't already too late).

It's too late, it needed to be in linux-next _before_ the window opened.

sorry,

greg k-h

^ permalink raw reply

* Re: [PATCH] powerpc/e5500: set non-base IVORs
From: Scott Wood @ 2011-05-19 15:59 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev
In-Reply-To: <B794A6BF-1B01-4764-92BA-A486A0166EB8@kernel.crashing.org>

On Thu, 19 May 2011 00:41:29 -0500
Kumar Gala <galak@kernel.crashing.org> wrote:

> 
> On May 9, 2011, at 4:26 PM, Scott Wood wrote:
> 
> > Without this, we attempt to use doorbells for IPIs, and end up
> > branching to some bad address.  Plus, even for the exceptions
> > we don't implement, it's good to handle it and get a message out.
> > 
> > Signed-off-by: Scott Wood <scottwood@freescale.com>
> > ---
> > arch/powerpc/include/asm/reg_booke.h      |    4 ++
> > arch/powerpc/kernel/cpu_setup_fsl_booke.S |    3 ++
> > arch/powerpc/kernel/exceptions-64e.S      |   47 +++++++++++++++++++++++++++++
> > 3 files changed, 54 insertions(+), 0 deletions(-)
> 
> applied to next

It's actually not quite right, as I misused MASKABLE_EXCEPTION and some of
those ivors are critical/guest rather than normal.  I'll send a followup.

-Scott

^ permalink raw reply

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Alan Cox @ 2011-05-19 16:00 UTC (permalink / raw)
  To: Timur Tabi
  Cc: kumar.gala, linux-kernel, akpm, linux-console, greg, linuxppc-dev
In-Reply-To: <4DD53D1B.4060600@freescale.com>

> Ok, I can do that.
> 
> > Depends if the functionality is useful in your environment or not
> 
> It is, but I'd like to add it later so that I can make the 2.6.40 window (if it
> isn't already too late).

Seems sensible.

Alan

^ permalink raw reply

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Timur Tabi @ 2011-05-19 15:54 UTC (permalink / raw)
  To: Alan Cox
  Cc: kumar.gala, linux-kernel, akpm, linux-console, greg, linuxppc-dev
In-Reply-To: <20110519165039.7fd8ec34@lxorguk.ukuu.org.uk>

Alan Cox wrote:
>>> > > The kfifo API is probably faster and cleaner. Much of tty still uses
>>> > > CIRC_* because they predate the new APIs.
>> > 
>> > Ok, I'll change it.
> I flag that one up as a general comment - don't feel you need to change
> it if CIRC_* works in your case.

CIRC_* does work for me, so I'll keep it as-is.

>>> > > I guess the only other thing to consider is whether you want to implement
>>> > > a SYSRQ interface on your console ?
>> > 
>> > I don't think byte channels can support SYSRQ, but I'll look into it.

> What some drivers do in this case is nominate some obscure ctrl sequence
> to mean 'sysrq' unless doubled (eg ctrl-^ etc)

Ok, I can do that.

> Depends if the functionality is useful in your environment or not

It is, but I'd like to add it later so that I can make the 2.6.40 window (if it
isn't already too late).


-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Alan Cox @ 2011-05-19 15:50 UTC (permalink / raw)
  To: Timur Tabi
  Cc: kumar.gala, linux-kernel, akpm, linux-console, greg, linuxppc-dev
In-Reply-To: <4DD533DE.1020705@freescale.com>

> Under what circumstances can ttys be NULL?  I currently only use this code in
> the RX and TX interrupt handlers, which are both enabled in the
> tty_port_operations.activate() function.

When you add hangup support.

> 
> Is this right for the TX handler:
> 
> static irqreturn_t ehv_bc_tty_tx_isr(int irq, void *data)
> {
> 	struct ehv_bc_data *bc = data;
> 	struct tty_struct *ttys = tty_port_tty_get(&bc->port);
> 
> 	ehv_bc_tx_dequeue(bc);
> 	if (ttys) {
> 		tty_wakeup(ttys);
> 		tty_kref_put(ttys);
> 	}
> 
> 	return IRQ_HANDLED;

Yes.
		    EV_BYTE_CHANNEL_MAX_BYTES);
> > The kfifo API is probably faster and cleaner. Much of tty still uses
> > CIRC_* because they predate the new APIs.
> 
> Ok, I'll change it.

I flag that one up as a general comment - don't feel you need to change
it if CIRC_* works in your case.

> > I guess the only other thing to consider is whether you want to implement
> > a SYSRQ interface on your console ?
> 
> I don't think byte channels can support SYSRQ, but I'll look into it.

What some drivers do in this case is nominate some obscure ctrl sequence
to mean 'sysrq' unless doubled (eg ctrl-^ etc)

Depends if the functionality is useful in your environment or not

^ permalink raw reply

* Re: [bg-linux] [PATCH 3/7] [RFC] add support for BlueGene/P FPU
From: Kazutomo Yoshii @ 2011-05-19 15:22 UTC (permalink / raw)
  To: linuxppc-dev; +Cc: bg-linux
In-Reply-To: <BANLkTimKhApFW8G1-pG0u_9Kv2YB0R1O0w@mail.gmail.com>

On 05/19/2011 08:53 AM, Eric Van Hensbergen wrote:
>>> >>  +#ifdef CONFIG_BGP
>>> >>  +#define SAVE_FPR(n, b, base) li b, THREAD_FPR0+(16*(n)); STFPDX(n, base, b)
>>> >>  +#define REST_FPR(n, b, base) li b, THREAD_FPR0+(16*(n)); LFPDX(n, base, b)
>>>        
>> >
>> >  16*?  Are these FP regs 64 or 128 bits wide?  If 128 you are doing to
>> >  have to play with TS_WIDTH to get the size of the FPs correct in the
>> >  thread_struct.
>> >
>> >  I think there's a bug here.
>> >
>>      
> I actually have three different versions of this code from different
> source patches that I'm drawing from - so your help in figuring out
> the best way to approach this is appreciated.  The kittyhawk version
> of the code has 8* instead of 16*.  According to the docs:
> "Each of the two FPU units contains 32 64-bit floating point registers
> for a total of 64 FP registers per processor." which would seem to
> point to the kittyhawk version - but they have a second SAVE_32SFPRS
> for the second hummer.  What wasn't clear to me with this version of
> the code was whether or not they were doing something clever like
> saving the pair of the 64-bit FPU registers in a single 128-bit slot
> (seems plausible).
Yes, it does.    SIMD like instructions are added to BGP PPC.
stdpdx or lfpdx, for example, handle two FPU registers (primary and 
secondary).

Thanks,
Kaz

^ permalink raw reply

* Re: [PATCH 6/7] tty/powerpc: introduce the ePAPR embedded hypervisor byte channel driver
From: Timur Tabi @ 2011-05-19 15:14 UTC (permalink / raw)
  To: Alan Cox
  Cc: kumar.gala, linux-kernel, akpm, linux-console, greg, linuxppc-dev
In-Reply-To: <20110519153358.5876f310@lxorguk.ukuu.org.uk>

Alan Cox wrote:
> ttys = tty_port_tty_get(&bc->port);
> stuff
> if (ttys != NULL)
> 	tty stuff
> 	tty_kref_put(ttys);

Under what circumstances can ttys be NULL?  I currently only use this code in
the RX and TX interrupt handlers, which are both enabled in the
tty_port_operations.activate() function.

Is this right for the TX handler:

static irqreturn_t ehv_bc_tty_tx_isr(int irq, void *data)
{
	struct ehv_bc_data *bc = data;
	struct tty_struct *ttys = tty_port_tty_get(&bc->port);

	ehv_bc_tx_dequeue(bc);
	if (ttys) {
		tty_wakeup(ttys);
		tty_kref_put(ttys);
	}

	return IRQ_HANDLED;
}

I just want to make sure that testing for NULL is really necessary in my
interrupt handlers.

>> > +		len = min_t(unsigned int,
>> > +			    CIRC_CNT_TO_END(bc->head, bc->tail, BUF_SIZE),
>> > +			    EV_BYTE_CHANNEL_MAX_BYTES);
> The kfifo API is probably faster and cleaner. Much of tty still uses
> CIRC_* because they predate the new APIs.

Ok, I'll change it.

> You really also need a hangup method so vhangup() does the right thing
> and you can securely do logins etc and sessions on your console. As
> you've got no hardware entangled in this and you already use tty_port
> helpers the hangup helper will do the work for you.

Ok.

> 
> I guess the only other thing to consider is whether you want to implement
> a SYSRQ interface on your console ?

I don't think byte channels can support SYSRQ, but I'll look into it.

-- 
Timur Tabi
Linux kernel developer at Freescale

^ permalink raw reply

page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox