linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
@ 2008-04-30  9:27 Kumar Gala
  2008-04-30 21:54 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 26+ messages in thread
From: Kumar Gala @ 2008-04-30  9:27 UTC (permalink / raw)
  To: Paul Mackerras, Benjamin Herrenschmidt; +Cc: linuxppc-dev

* Cleanup the code a bit my allocating an INT_FRAME on our exception
  stack there by make references go from GPR11-INT_FRAME_SIZE(r8) to
  just GPR(r8)
* simplify {lvl}_transfer_to_handler code by moving the copying of the
  temp registers we use if we come from user space into the PROLOG
* If the exception came from kernel mode copy thread_info flags,
  preempt, and task pointer from the process thread_info.

Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
---

I'm not sure if the copying of TI_FLAGS, TI_PREEMPT, and TI_TASK
are really needed.  I'm a bit concerned what to do if we get a
CriticalInput while in kernel mode and the handler causes a reschedule.

 arch/powerpc/kernel/entry_32.S   |   13 ----------
 arch/powerpc/kernel/head_booke.h |   47 ++++++++++++++++++++++++-------------
 2 files changed, 30 insertions(+), 30 deletions(-)

diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 0c8614d..816dd54 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -44,29 +44,16 @@
 #endif

 #ifdef CONFIG_BOOKE
-#include "head_booke.h"
-#define TRANSFER_TO_HANDLER_EXC_LEVEL(exc_level)	\
-	mtspr	exc_level##_SPRG,r8;			\
-	BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);		\
-	lwz	r0,GPR10-INT_FRAME_SIZE(r8);		\
-	stw	r0,GPR10(r11);				\
-	lwz	r0,GPR11-INT_FRAME_SIZE(r8);		\
-	stw	r0,GPR11(r11);				\
-	mfspr	r8,exc_level##_SPRG
-
 	.globl	mcheck_transfer_to_handler
 mcheck_transfer_to_handler:
-	TRANSFER_TO_HANDLER_EXC_LEVEL(MCHECK)
 	b	transfer_to_handler_full

 	.globl	debug_transfer_to_handler
 debug_transfer_to_handler:
-	TRANSFER_TO_HANDLER_EXC_LEVEL(DEBUG)
 	b	transfer_to_handler_full

 	.globl	crit_transfer_to_handler
 crit_transfer_to_handler:
-	TRANSFER_TO_HANDLER_EXC_LEVEL(CRIT)
 	/* fall through */
 #endif

diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
index d647e05..78baec5 100644
--- a/arch/powerpc/kernel/head_booke.h
+++ b/arch/powerpc/kernel/head_booke.h
@@ -78,12 +78,12 @@
 	slwi	r8,r8,2;				\
 	addis	r8,r8,level##_STACK_TOP@ha;		\
 	lwz	r8,level##_STACK_TOP@l(r8);		\
-	addi	r8,r8,THREAD_SIZE;
+	addi	r8,r8,THREAD_SIZE-INT_FRAME_SIZE;
 #else
 #define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
 	lis	r8,level##_STACK_TOP@ha;		\
 	lwz	r8,level##_STACK_TOP@l(r8);		\
-	addi	r8,r8,THREAD_SIZE;
+	addi	r8,r8,THREAD_SIZE-INT_FRAME_SIZE;
 #endif

 /*
@@ -97,22 +97,35 @@
 #define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
 	mtspr	exc_level##_SPRG,r8;					     \
 	BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
-	stw	r10,GPR10-INT_FRAME_SIZE(r8);				     \
-	stw	r11,GPR11-INT_FRAME_SIZE(r8);				     \
+	stw	r9,GPR9(r8);		/* save various registers	   */\
+	stw	r10,GPR10(r8);						     \
+	stw	r11,GPR11(r8);						     \
+	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
+	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
+	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc exception frm */\
 	mfcr	r10;			/* save CR in r10 for now	   */\
-	mfspr	r11,exc_level_srr1;	/* check whether user or kernel    */\
-	andi.	r11,r11,MSR_PR;						     \
-	mr	r11,r8;							     \
-	mfspr	r8,exc_level##_SPRG;					     \
+	mfspr	r9,exc_level_srr1;	/* check whether user or kernel    */\
+	andi.	r9,r9,MSR_PR;						     \
 	beq	1f;							     \
 	/* COMING FROM USER MODE */					     \
-	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
-	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
-	addi	r11,r11,THREAD_SIZE;					     \
-1:	subi	r11,r11,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
+	lwz	r9,GPR9(r8);		/* copy regs from exception stack  */\
+	stw	r9,GPR9(r11);						     \
+	lwz	r9,GPR10(r8);						     \
+	stw	r9,GPR10(r11);						     \
+	lwz	r9,GPR11(r8);						     \
+	stw	r9,GPR11(r11);						     \
+	b	2f;							     \
+	/* COMING FROM PRIV MODE */					     \
+1:	lwz	r9,TI_FLAGS-THREAD_SIZE(r11);				     \
+	stw	r9,TI_FLAGS-THREAD_SIZE(r8);				     \
+	lwz	r9,TI_PREEMPT-THREAD_SIZE(r11);				     \
+	stw	r9,TI_PREEMPT-THREAD_SIZE(r8);				     \
+	lwz	r9,TI_TASK-THREAD_SIZE(r11);				     \
+	stw	r9,TI_TASK-THREAD_SIZE(r8);				     \
+	mr	r11,r8;							     \
+2:	mfspr	r8,exc_level##_SPRG;					     \
 	stw	r10,_CCR(r11);          /* save various registers	   */\
 	stw	r12,GPR12(r11);						     \
-	stw	r9,GPR9(r11);						     \
 	mflr	r10;							     \
 	stw	r10,_LINK(r11);						     \
 	mfspr	r12,SPRN_DEAR;		/* save DEAR and ESR in the frame  */\
@@ -255,8 +268,8 @@ label:
 	lwz	r12,GPR12(r11);						      \
 	mtspr	DEBUG_SPRG,r8;						      \
 	BOOKE_LOAD_EXC_LEVEL_STACK(DEBUG); /* r8 points to the debug stack */ \
-	lwz	r10,GPR10-INT_FRAME_SIZE(r8);				      \
-	lwz	r11,GPR11-INT_FRAME_SIZE(r8);				      \
+	lwz	r10,GPR10(r8);						      \
+	lwz	r11,GPR11(r8);						      \
 	mfspr	r8,DEBUG_SPRG;						      \
 									      \
 	RFDI;								      \
@@ -308,8 +321,8 @@ label:
 	lwz	r12,GPR12(r11);						      \
 	mtspr	CRIT_SPRG,r8;						      \
 	BOOKE_LOAD_EXC_LEVEL_STACK(CRIT); /* r8 points to the debug stack */  \
-	lwz	r10,GPR10-INT_FRAME_SIZE(r8);				      \
-	lwz	r11,GPR11-INT_FRAME_SIZE(r8);				      \
+	lwz	r10,GPR10(r8);						      \
+	lwz	r11,GPR11(r8);						      \
 	mfspr	r8,CRIT_SPRG;						      \
 									      \
 	rfci;								      \
-- 
1.5.4.1

^ permalink raw reply related	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30  9:27 [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code Kumar Gala
@ 2008-04-30 21:54 ` Benjamin Herrenschmidt
  2008-04-30 22:13   ` Kumar Gala
  0 siblings, 1 reply; 26+ messages in thread
From: Benjamin Herrenschmidt @ 2008-04-30 21:54 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras


On Wed, 2008-04-30 at 04:27 -0500, Kumar Gala wrote:
> * Cleanup the code a bit my allocating an INT_FRAME on our exception
>   stack there by make references go from GPR11-INT_FRAME_SIZE(r8) to
>   just GPR(r8)
            ^^ 11 ?

> * simplify {lvl}_transfer_to_handler code by moving the copying of the
>   temp registers we use if we come from user space into the PROLOG
> * If the exception came from kernel mode copy thread_info flags,
>   preempt, and task pointer from the process thread_info.
> 
> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
> ---
> 
> I'm not sure if the copying of TI_FLAGS, TI_PREEMPT, and TI_TASK
> are really needed.  I'm a bit concerned what to do if we get a
> CriticalInput while in kernel mode and the handler causes a reschedule.

Well, reschedule or signal, either way, you can't handle them on the way
out. Maybe an option there is to set the flag in the normal kernel
stack's thread info and trigger an interrupt asap via the PIT so things
get handled ?

> 
> diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/head_booke.h
> index d647e05..78baec5 100644
> --- a/arch/powerpc/kernel/head_booke.h
> +++ b/arch/powerpc/kernel/head_booke.h
> @@ -78,12 +78,12 @@
>  	slwi	r8,r8,2;				\
>  	addis	r8,r8,level##_STACK_TOP@ha;		\
>  	lwz	r8,level##_STACK_TOP@l(r8);		\
> -	addi	r8,r8,THREAD_SIZE;
> +	addi	r8,r8,THREAD_SIZE-INT_FRAME_SIZE;
>  #else
>  #define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
>  	lis	r8,level##_STACK_TOP@ha;		\
>  	lwz	r8,level##_STACK_TOP@l(r8);		\
> -	addi	r8,r8,THREAD_SIZE;
> +	addi	r8,r8,THREAD_SIZE-INT_FRAME_SIZE;
>  #endif

Nothing specific to your patch, but those level##_STACK_TOP seem to
be pretty badly named if you end up -adding- THREAD_SIZE to actually
get to the stack's top. Do they really contain the stack top or do
they in fact contain the stack base/bottom ?

>  /*
> @@ -97,22 +97,35 @@
>  #define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0, exc_level_srr1) \
>  	mtspr	exc_level##_SPRG,r8;					     \
>  	BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the exc_level stack*/ \
> -	stw	r10,GPR10-INT_FRAME_SIZE(r8);				     \
> -	stw	r11,GPR11-INT_FRAME_SIZE(r8);				     \
> +	stw	r9,GPR9(r8);		/* save various registers	   */\
> +	stw	r10,GPR10(r8);						     \
> +	stw	r11,GPR11(r8);						     \
> +	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
> +	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
> +	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc exception frm */\

So you do the above whether it will actually be needed or not right ?
 
>  	mfcr	r10;			/* save CR in r10 for now	   */\
> -	mfspr	r11,exc_level_srr1;	/* check whether user or kernel    */\
> -	andi.	r11,r11,MSR_PR;						     \
> -	mr	r11,r8;							     \
> -	mfspr	r8,exc_level##_SPRG;					     \
> +	mfspr	r9,exc_level_srr1;	/* check whether user or kernel    */\
> +	andi.	r9,r9,MSR_PR;						     \
>  	beq	1f;							     \
>  	/* COMING FROM USER MODE */					     \
> -	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
> -	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
> -	addi	r11,r11,THREAD_SIZE;					     \
> -1:	subi	r11,r11,INT_FRAME_SIZE;	/* Allocate an exception frame     */\
> +	lwz	r9,GPR9(r8);		/* copy regs from exception stack  */\
> +	stw	r9,GPR9(r11);						     \
> +	lwz	r9,GPR10(r8);						     \
> +	stw	r9,GPR10(r11);						     \
> +	lwz	r9,GPR11(r8);						     \
> +	stw	r9,GPR11(r11);						     \
> +	b	2f;							     \

Are we concerned with performances here or not ? Because using the same
reg all along isn't the best ... 

> +	/* COMING FROM PRIV MODE */					     \
> +1:	lwz	r9,TI_FLAGS-THREAD_SIZE(r11);				     \
> +	stw	r9,TI_FLAGS-THREAD_SIZE(r8);				     \
> +	lwz	r9,TI_PREEMPT-THREAD_SIZE(r11);				     \
> +	stw	r9,TI_PREEMPT-THREAD_SIZE(r8);				     \
> +	lwz	r9,TI_TASK-THREAD_SIZE(r11);				     \
> +	stw	r9,TI_TASK-THREAD_SIZE(r8);				     \
> +	mr	r11,r8;							     \

Don't you want to also stick in HARDIRQ_OFFSET in preempt_count or
leave that to C code ? Also, you might want at one point to tell
lockdep about IRQs being disabled in case they were enabled when
you took the exception. (Do that after you've saved enough
stuff of crouse)

Ben.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30 21:54 ` Benjamin Herrenschmidt
@ 2008-04-30 22:13   ` Kumar Gala
  2008-04-30 22:18     ` Benjamin Herrenschmidt
                       ` (2 more replies)
  0 siblings, 3 replies; 26+ messages in thread
From: Kumar Gala @ 2008-04-30 22:13 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev, Paul Mackerras


On Apr 30, 2008, at 4:54 PM, Benjamin Herrenschmidt wrote:
>
> On Wed, 2008-04-30 at 04:27 -0500, Kumar Gala wrote:
>> * Cleanup the code a bit my allocating an INT_FRAME on our exception
>>  stack there by make references go from GPR11-INT_FRAME_SIZE(r8) to
>>  just GPR(r8)
>            ^^ 11 ?

r8 is correct for my example.  My point is s/GPR11-INT_FRAME_SIZE/GPR11/

>> * simplify {lvl}_transfer_to_handler code by moving the copying of  
>> the
>>  temp registers we use if we come from user space into the PROLOG
>> * If the exception came from kernel mode copy thread_info flags,
>>  preempt, and task pointer from the process thread_info.
>>
>> Signed-off-by: Kumar Gala <galak@kernel.crashing.org>
>> ---
>>
>> I'm not sure if the copying of TI_FLAGS, TI_PREEMPT, and TI_TASK
>> are really needed.  I'm a bit concerned what to do if we get a
>> CriticalInput while in kernel mode and the handler causes a  
>> reschedule.
>
> Well, reschedule or signal, either way, you can't handle them on the  
> way
> out. Maybe an option there is to set the flag in the normal kernel
> stack's thread info and trigger an interrupt asap via the PIT so  
> things
> get handled ?

If we don't handle reschedule or signal will we actually not function  
properly?  I assume reschedule isn't an issue, but could we lose a  
signal?

>> diff --git a/arch/powerpc/kernel/head_booke.h b/arch/powerpc/kernel/ 
>> head_booke.h
>> index d647e05..78baec5 100644
>> --- a/arch/powerpc/kernel/head_booke.h
>> +++ b/arch/powerpc/kernel/head_booke.h
>> @@ -78,12 +78,12 @@
>> 	slwi	r8,r8,2;				\
>> 	addis	r8,r8,level##_STACK_TOP@ha;		\
>> 	lwz	r8,level##_STACK_TOP@l(r8);		\
>> -	addi	r8,r8,THREAD_SIZE;
>> +	addi	r8,r8,THREAD_SIZE-INT_FRAME_SIZE;
>> #else
>> #define BOOKE_LOAD_EXC_LEVEL_STACK(level)		\
>> 	lis	r8,level##_STACK_TOP@ha;		\
>> 	lwz	r8,level##_STACK_TOP@l(r8);		\
>> -	addi	r8,r8,THREAD_SIZE;
>> +	addi	r8,r8,THREAD_SIZE-INT_FRAME_SIZE;
>> #endif
>
> Nothing specific to your patch, but those level##_STACK_TOP seem to
> be pretty badly named if you end up -adding- THREAD_SIZE to actually
> get to the stack's top. Do they really contain the stack top or do
> they in fact contain the stack base/bottom ?

That might be stale from how the old code worked.  I can never  
remember what we consider the top and bottom of the stack.  (please  
remind me and I'll fixup the comments and variables).

>> /*
>> @@ -97,22 +97,35 @@
>> #define EXC_LEVEL_EXCEPTION_PROLOG(exc_level, exc_level_srr0,  
>> exc_level_srr1) \
>> 	mtspr	exc_level##_SPRG,r8;					     \
>> 	BOOKE_LOAD_EXC_LEVEL_STACK(exc_level);/* r8 points to the  
>> exc_level stack*/ \
>> -	stw	r10,GPR10-INT_FRAME_SIZE(r8);				     \
>> -	stw	r11,GPR11-INT_FRAME_SIZE(r8);				     \
>> +	stw	r9,GPR9(r8);		/* save various registers	   */\
>> +	stw	r10,GPR10(r8);						     \
>> +	stw	r11,GPR11(r8);						     \
>> +	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
>> +	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
>> +	addi	r11,r11,THREAD_SIZE-INT_FRAME_SIZE; /* Alloc exception frm */\
>
> So you do the above whether it will actually be needed or not right ?

good point I can move this down.

>> 	mfcr	r10;			/* save CR in r10 for now	   */\
>> -	mfspr	r11,exc_level_srr1;	/* check whether user or kernel    */\
>> -	andi.	r11,r11,MSR_PR;						     \
>> -	mr	r11,r8;							     \
>> -	mfspr	r8,exc_level##_SPRG;					     \
>> +	mfspr	r9,exc_level_srr1;	/* check whether user or kernel    */\
>> +	andi.	r9,r9,MSR_PR;						     \
>> 	beq	1f;							     \
>> 	/* COMING FROM USER MODE */					     \
>> -	mfspr	r11,SPRN_SPRG3;		/* if from user, start at top of   */\
>> -	lwz	r11,THREAD_INFO-THREAD(r11); /* this thread's kernel stack */\
>> -	addi	r11,r11,THREAD_SIZE;					     \
>> -1:	subi	r11,r11,INT_FRAME_SIZE;	/* Allocate an exception frame      
>> */\
>> +	lwz	r9,GPR9(r8);		/* copy regs from exception stack  */\
>> +	stw	r9,GPR9(r11);						     \
>> +	lwz	r9,GPR10(r8);						     \
>> +	stw	r9,GPR10(r11);						     \
>> +	lwz	r9,GPR11(r8);						     \
>> +	stw	r9,GPR11(r11);						     \
>> +	b	2f;							     \
>
> Are we concerned with performances here or not ? Because using the  
> same
> reg all along isn't the best ...

I thought about that.  The only case its a bit of an issue is for  
CriticalInput.  I don't see Watchdog, Debug, or MachineCheck as being  
performance critical.  I can use r10 if I save and restored the CR or  
some other register.

>> +	/* COMING FROM PRIV MODE */					     \
>> +1:	lwz	r9,TI_FLAGS-THREAD_SIZE(r11);				     \
>> +	stw	r9,TI_FLAGS-THREAD_SIZE(r8);				     \
>> +	lwz	r9,TI_PREEMPT-THREAD_SIZE(r11);				     \
>> +	stw	r9,TI_PREEMPT-THREAD_SIZE(r8);				     \
>> +	lwz	r9,TI_TASK-THREAD_SIZE(r11);				     \
>> +	stw	r9,TI_TASK-THREAD_SIZE(r8);				     \
>> +	mr	r11,r8;							     \
>
> Don't you want to also stick in HARDIRQ_OFFSET in preempt_count or
> leave that to C code ?

just leaving it to C code.  I assume that preempt_count should be the  
same value on entry and exit.  Do think we should set HARDIRQ_OFFSET  
for debug level exceptions?

> Also, you might want at one point to tell
> lockdep about IRQs being disabled in case they were enabled when
> you took the exception. (Do that after you've saved enough
> stuff of crouse)

Where should I look for an example of how to convey that information  
to lockdep?

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30 22:13   ` Kumar Gala
@ 2008-04-30 22:18     ` Benjamin Herrenschmidt
  2008-04-30 23:24       ` Kumar Gala
  2008-04-30 22:22     ` Benjamin Herrenschmidt
  2008-04-30 23:47     ` Paul Mackerras
  2 siblings, 1 reply; 26+ messages in thread
From: Benjamin Herrenschmidt @ 2008-04-30 22:18 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras


On Wed, 2008-04-30 at 17:13 -0500, Kumar Gala wrote:
> >>  stack there by make references go from GPR11-INT_FRAME_SIZE(r8) to
> >>  just GPR(r8)
> >            ^^ 11 ?
> 
> r8 is correct for my example.  My point is
> s/GPR11-INT_FRAME_SIZE/GPR11/

Sorry, I put the ^^ in the wrong place :-) I meant the example should
say GPR11(r8)

Ben.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30 22:13   ` Kumar Gala
  2008-04-30 22:18     ` Benjamin Herrenschmidt
@ 2008-04-30 22:22     ` Benjamin Herrenschmidt
  2008-04-30 23:28       ` Kumar Gala
  2008-04-30 23:47     ` Paul Mackerras
  2 siblings, 1 reply; 26+ messages in thread
From: Benjamin Herrenschmidt @ 2008-04-30 22:22 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras


On Wed, 2008-04-30 at 17:13 -0500, Kumar Gala wrote:
> 
> If we don't handle reschedule or signal will we actually not
> function  
> properly?  I assume reschedule isn't an issue, but could we lose a  
> signal?

Well, you can be in trouble if you lose TIF_SIGPENDING. You don't -have-
to actually take the signal, it will then be done on the next return to
userspace (next timer interrupt, next syscall, ...), but TIF_SIGPENDING
must not be lost. 

> > Nothing specific to your patch, but those level##_STACK_TOP seem to
> > be pretty badly named if you end up -adding- THREAD_SIZE to actually
> > get to the stack's top. Do they really contain the stack top or do
> > they in fact contain the stack base/bottom ?
> 
> That might be stale from how the old code worked.  I can never  
> remember what we consider the top and bottom of the stack.  (please  
> remind me and I'll fixup the comments and variables).

Well, top is the high address and bottom is the low address in my view
of things :-)

> I thought about that.  The only case its a bit of an issue is for  
> CriticalInput.  I don't see Watchdog, Debug, or MachineCheck as
> being  
> performance critical.  I can use r10 if I save and restored the CR
> or  
> some other register.

You can save CR first yeah and then interleave a bit using 2 registers
(ie. 2 loads, 2 stores, or something like load 1, load 2, store 1, load
3, store 2, etc...)

> just leaving it to C code.  I assume that preempt_count should be
> the  
> same value on entry and exit.  Do think we should set HARDIRQ_OFFSET  
> for debug level exceptions?

Well... I think all those things should run with EE disabled, and thus
be considered as far as linux is concerned, as interrupts. So the C code
should do irq_enter/exit.

> Where should I look for an example of how to convey that information  
> to lockdep?

The irqtrace stuff in the asm would do maybe unless transfer_to_handler
does it already, I have to look at the code with all your patches
applied...

Ben.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30 22:18     ` Benjamin Herrenschmidt
@ 2008-04-30 23:24       ` Kumar Gala
  0 siblings, 0 replies; 26+ messages in thread
From: Kumar Gala @ 2008-04-30 23:24 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev, Paul Mackerras


On Apr 30, 2008, at 5:18 PM, Benjamin Herrenschmidt wrote:
>
> On Wed, 2008-04-30 at 17:13 -0500, Kumar Gala wrote:
>>>> stack there by make references go from GPR11-INT_FRAME_SIZE(r8) to
>>>> just GPR(r8)
>>>           ^^ 11 ?
>>
>> r8 is correct for my example.  My point is
>> s/GPR11-INT_FRAME_SIZE/GPR11/
>
> Sorry, I put the ^^ in the wrong place :-) I meant the example should
> say GPR11(r8)

ah, that makes sense.  Will fix in the next revision.

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30 22:22     ` Benjamin Herrenschmidt
@ 2008-04-30 23:28       ` Kumar Gala
  2008-04-30 23:52         ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 26+ messages in thread
From: Kumar Gala @ 2008-04-30 23:28 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev, Paul Mackerras


On Apr 30, 2008, at 5:22 PM, Benjamin Herrenschmidt wrote:
>
> On Wed, 2008-04-30 at 17:13 -0500, Kumar Gala wrote:
>>
>> If we don't handle reschedule or signal will we actually not
>> function
>> properly?  I assume reschedule isn't an issue, but could we lose a
>> signal?
>
> Well, you can be in trouble if you lose TIF_SIGPENDING. You don't - 
> have-
> to actually take the signal, it will then be done on the next return  
> to
> userspace (next timer interrupt, next syscall, ...), but  
> TIF_SIGPENDING
> must not be lost.

Interesting.  It seems like causing a signal from an async interrupt  
from kernel space shouldn't be allowed.  At worse we can store back  
the flags into the "real" thread_info.

>>> Nothing specific to your patch, but those level##_STACK_TOP seem to
>>> be pretty badly named if you end up -adding- THREAD_SIZE to actually
>>> get to the stack's top. Do they really contain the stack top or do
>>> they in fact contain the stack base/bottom ?
>>
>> That might be stale from how the old code worked.  I can never
>> remember what we consider the top and bottom of the stack.  (please
>> remind me and I'll fixup the comments and variables).
>
> Well, top is the high address and bottom is the low address in my view
> of things :-)

yeah, so we are now pointing to stack bottom.  will fix this up.

>> I thought about that.  The only case its a bit of an issue is for
>> CriticalInput.  I don't see Watchdog, Debug, or MachineCheck as
>> being
>> performance critical.  I can use r10 if I save and restored the CR
>> or
>> some other register.
>
> You can save CR first yeah and then interleave a bit using 2 registers
> (ie. 2 loads, 2 stores, or something like load 1, load 2, store 1,  
> load
> 3, store 2, etc...)

Ok, will look at that.

>> just leaving it to C code.  I assume that preempt_count should be
>> the
>> same value on entry and exit.  Do think we should set HARDIRQ_OFFSET
>> for debug level exceptions?
>
> Well... I think all those things should run with EE disabled, and thus
> be considered as far as linux is concerned, as interrupts. So the C  
> code
> should do irq_enter/exit.

I agree we run all these interrupt levels with EE disabled.  Not sure  
I follow what you mean by irq_enter/ext.

>> Where should I look for an example of how to convey that information
>> to lockdep?
>
> The irqtrace stuff in the asm would do maybe unless  
> transfer_to_handler
> does it already, I have to look at the code with all your patches
> applied...

ok.

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30 22:13   ` Kumar Gala
  2008-04-30 22:18     ` Benjamin Herrenschmidt
  2008-04-30 22:22     ` Benjamin Herrenschmidt
@ 2008-04-30 23:47     ` Paul Mackerras
  2008-05-01  6:01       ` Kumar Gala
  2 siblings, 1 reply; 26+ messages in thread
From: Paul Mackerras @ 2008-04-30 23:47 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev

Kumar Gala writes:

> If we don't handle reschedule or signal will we actually not function  
> properly?  I assume reschedule isn't an issue, but could we lose a  
> signal?

It depends on whether a critical or machine check handler can ever do
anything to generate a signal or a reschedule.  If they can't, then
there is no problem.

If they can, then we have to be very careful.  If a critical or
machine check happens at a point where normal interrupts are disabled
then we have to be extremely careful not to do anything that the code
we've interrupted assumes can't happen - so we'd better not try to
take any spinlocks, for example.  That severely limits what the
handler can do.  It probably shouldn't even call printk, for
instance, or wake any process up, and definitely shouldn't call
schedule (or schedule_preempt) on the way out.

If the critical/mcheck interrupt happens at a point where a normal
interrupt could have happened (including when userspace is running)
then we could treat it pretty much as a normal interrupt, including
handling signals or reschedules on the way out if appropriate.

Paul.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30 23:28       ` Kumar Gala
@ 2008-04-30 23:52         ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 26+ messages in thread
From: Benjamin Herrenschmidt @ 2008-04-30 23:52 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras


On Wed, 2008-04-30 at 18:28 -0500, Kumar Gala wrote:
> > Well, you can be in trouble if you lose TIF_SIGPENDING. You don't - 
> > have-
> > to actually take the signal, it will then be done on the next
> return  
> > to
> > userspace (next timer interrupt, next syscall, ...), but  
> > TIF_SIGPENDING
> > must not be lost.
> 
> Interesting.  It seems like causing a signal from an async interrupt  
> from kernel space shouldn't be allowed.  At worse we can store back  
> the flags into the "real" thread_info.

Why shouldn't it be allowed ? We do store back flags in the real thread
info when doing irqstacks... though I just noticed we don't do that for
softirqs... paulus, isn't there a chance that we may lose a signal
there ? I wouldn't expect softirq's to often send signals to current
thread info (ie, they would use targetted flag sending which means the
SIGPENDING flag will be set in the right thread_info obtained from the
target task struct) but it still sounds safer to copy the flags back
just in case...

> yeah, so we are now pointing to stack bottom.  will fix this up.

Call it "base" rather than "bottom", sounds nicer :-)

> I agree we run all these interrupt levels with EE disabled.  Not
> sure  
> I follow what you mean by irq_enter/ext.

That should do the right thing with preempt_count(), look at do_IRQ().

Ben.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-04-30 23:47     ` Paul Mackerras
@ 2008-05-01  6:01       ` Kumar Gala
  2008-05-01  7:53         ` Benjamin Herrenschmidt
  2008-05-01  8:24         ` Paul Mackerras
  0 siblings, 2 replies; 26+ messages in thread
From: Kumar Gala @ 2008-05-01  6:01 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev


On Apr 30, 2008, at 6:47 PM, Paul Mackerras wrote:
> Kumar Gala writes:
>
>> If we don't handle reschedule or signal will we actually not function
>> properly?  I assume reschedule isn't an issue, but could we lose a
>> signal?
>
> It depends on whether a critical or machine check handler can ever do
> anything to generate a signal or a reschedule.  If they can't, then
> there is no problem.

They can if the come from user space.  I'm question what it means to  
send a signal based on receiving an async exception.

> If they can, then we have to be very careful.  If a critical or
> machine check happens at a point where normal interrupts are disabled
> then we have to be extremely careful not to do anything that the code
> we've interrupted assumes can't happen - so we'd better not try to
> take any spinlocks, for example.  That severely limits what the
> handler can do.  It probably shouldn't even call printk, for
> instance, or wake any process up, and definitely shouldn't call
> schedule (or schedule_preempt) on the way out.

how do we provide someone stick a kprobe on such code today?

> If the critical/mcheck interrupt happens at a point where a normal
> interrupt could have happened (including when userspace is running)
> then we could treat it pretty much as a normal interrupt, including
> handling signals or reschedules on the way out if appropriate.

So we have 4 actual exceptions:
* CriticalInput (some external device signaled this.  There are two  
concepts of critical.  One is error the other is high priority)   
However this would have the same caveats as any ExternalInput handler.

* Watchdog - pretty severe if this fires.

* Debug - user space debug is pretty straight forward.  However we  
have features like kprobes that require kernel level support.

* MachineCheck - pretty serve if this fires.

So I'm not if there is any good way to preclude the handlers  
associated with these exceptions from doing the things you listed.

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01  6:01       ` Kumar Gala
@ 2008-05-01  7:53         ` Benjamin Herrenschmidt
  2008-05-01 13:22           ` Kumar Gala
  2008-05-01  8:24         ` Paul Mackerras
  1 sibling, 1 reply; 26+ messages in thread
From: Benjamin Herrenschmidt @ 2008-05-01  7:53 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras


On Thu, 2008-05-01 at 01:01 -0500, Kumar Gala wrote:
> On Apr 30, 2008, at 6:47 PM, Paul Mackerras wrote:
> > Kumar Gala writes:
> >
> >> If we don't handle reschedule or signal will we actually not function
> >> properly?  I assume reschedule isn't an issue, but could we lose a
> >> signal?
> >
> > It depends on whether a critical or machine check handler can ever do
> > anything to generate a signal or a reschedule.  If they can't, then
> > there is no problem.
> 
> They can if the come from user space.  I'm question what it means to  
> send a signal based on receiving an async exception.

For "normal" async exceptions (ie. external interrupts), it could be
anything... EE -> network rx -> SIGIO, or you get out of EE, hit the
softirq path, figure out there was something there waiting processing
(ie, some tty task) and that results in a signal being sent...

> So we have 4 actual exceptions:
> * CriticalInput (some external device signaled this.  There are two  
> concepts of critical.  One is error the other is high priority)   
> However this would have the same caveats as any ExternalInput handler.

No, it's worse. It can interrupt code that normally has
local_irq_disabled() and thus doesn't expect to be interrupted. That
means that everything becomes unsafe including locks etc....

Note that driver that want to make active use of that probably want some
explicit local_crit_irq_disable/enable functions to be able to implement
some sort of synchronization.

> * Watchdog - pretty severe if this fires.
> 
> * Debug - user space debug is pretty straight forward.  However we  
> have features like kprobes that require kernel level support.

Which means we have to be extra careful, in fact, I consider it a design
bug of BookE to have made debug be a critical interrupt...

> * MachineCheck - pretty serve if this fires.
> 
> So I'm not if there is any good way to preclude the handlers  
> associated with these exceptions from doing the things you listed.

Ben.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01  6:01       ` Kumar Gala
  2008-05-01  7:53         ` Benjamin Herrenschmidt
@ 2008-05-01  8:24         ` Paul Mackerras
  2008-05-01 13:17           ` Kumar Gala
  1 sibling, 1 reply; 26+ messages in thread
From: Paul Mackerras @ 2008-05-01  8:24 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev

Kumar Gala writes:

> > It depends on whether a critical or machine check handler can ever do
> > anything to generate a signal or a reschedule.  If they can't, then
> > there is no problem.
> 
> They can if the come from user space.  I'm question what it means to  
> send a signal based on receiving an async exception.

The most common cases are (a) something that ultimately generates
input on a tty (e.g. a character arriving on a serial port) and that
input turns out to be a ^C or similar, or (b) something that signals
I/O completion and the program doing the I/O has requested
notification by a SIGIO.  But in general any driver code can send a
signal to userspace if it wants.

> > If they can, then we have to be very careful.  If a critical or
> > machine check happens at a point where normal interrupts are disabled
> > then we have to be extremely careful not to do anything that the code
> > we've interrupted assumes can't happen - so we'd better not try to
> > take any spinlocks, for example.  That severely limits what the
> > handler can do.  It probably shouldn't even call printk, for
> > instance, or wake any process up, and definitely shouldn't call
> > schedule (or schedule_preempt) on the way out.
> 
> how do we provide someone stick a kprobe on such code today?

-ENOPARSE

> So I'm not if there is any good way to preclude the handlers  
> associated with these exceptions from doing the things you listed.

In that case, you'd better expect to see system freezes, memory
corruption and general instability.

Paul.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01  8:24         ` Paul Mackerras
@ 2008-05-01 13:17           ` Kumar Gala
  2008-05-01 16:14             ` Scott Wood
  2008-05-01 23:34             ` Paul Mackerras
  0 siblings, 2 replies; 26+ messages in thread
From: Kumar Gala @ 2008-05-01 13:17 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev


On May 1, 2008, at 3:24 AM, Paul Mackerras wrote:

> Kumar Gala writes:
>
>>> It depends on whether a critical or machine check handler can ever  
>>> do
>>> anything to generate a signal or a reschedule.  If they can't, then
>>> there is no problem.
>>
>> They can if the come from user space.  I'm question what it means to
>> send a signal based on receiving an async exception.
>
> The most common cases are (a) something that ultimately generates
> input on a tty (e.g. a character arriving on a serial port) and that
> input turns out to be a ^C or similar, or (b) something that signals
> I/O completion and the program doing the I/O has requested
> notification by a SIGIO.  But in general any driver code can send a
> signal to userspace if it wants.

ok.  Was just wondering how the async exception know that the signal  
it wanted to send belonged to the particular process that is running.   
But I guess there are cases that the signal is really intended for who  
ever is currently running?

>>> If they can, then we have to be very careful.  If a critical or
>>> machine check happens at a point where normal interrupts are  
>>> disabled
>>> then we have to be extremely careful not to do anything that the  
>>> code
>>> we've interrupted assumes can't happen - so we'd better not try to
>>> take any spinlocks, for example.  That severely limits what the
>>> handler can do.  It probably shouldn't even call printk, for
>>> instance, or wake any process up, and definitely shouldn't call
>>> schedule (or schedule_preempt) on the way out.

Do we ensure that synchronous exceptions will not occur in these cases  
that kernel code things interrupts are disabled in?

>> how do we provide someone stick a kprobe on such code today?
>
> -ENOPARSE

I was asking how we prevent the cases you were describing working w/ 
kprobes today.  Since it ends up single stepping in kernel codes its  
possible that someone sets a kprobe in code that shouldn't be  
interrupted, yet we'd cause a SingleStep Exception.

>> So I'm not if there is any good way to preclude the handlers
>> associated with these exceptions from doing the things you listed.
>
> In that case, you'd better expect to see system freezes, memory
> corruption and general instability.

So the case I'm trying to make work is debug and kprobes.  This case  
seems like we have pretty good control over what the "handler" does.   
Are there checks we can add to BUG_ON() so we are at least aware of  
the code attempts to do something it shouldnt?

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01  7:53         ` Benjamin Herrenschmidt
@ 2008-05-01 13:22           ` Kumar Gala
  2008-05-01 14:26             ` Josh Boyer
  2008-05-05 11:57             ` Gabriel Paubert
  0 siblings, 2 replies; 26+ messages in thread
From: Kumar Gala @ 2008-05-01 13:22 UTC (permalink / raw)
  To: benh; +Cc: linuxppc-dev, Paul Mackerras

>> So we have 4 actual exceptions:
>> * CriticalInput (some external device signaled this.  There are two
>> concepts of critical.  One is error the other is high priority)
>> However this would have the same caveats as any ExternalInput  
>> handler.
>
> No, it's worse. It can interrupt code that normally has
> local_irq_disabled() and thus doesn't expect to be interrupted. That
> means that everything becomes unsafe including locks etc....

Fair.  Should local_irq_disable() clear MSR_SE & MSR_BE on classic  
parts?

> Note that driver that want to make active use of that probably want  
> some
> explicit local_crit_irq_disable/enable functions to be able to  
> implement
> some sort of synchronization.

Or we could just have local_irq_disable -- clear MSR_EE and MSR_CE

>> * Watchdog - pretty severe if this fires.
>>
>> * Debug - user space debug is pretty straight forward.  However we
>> have features like kprobes that require kernel level support.
>
> Which means we have to be extra careful, in fact, I consider it a  
> design
> bug of BookE to have made debug be a critical interrupt...

I consider the whole BookE debug arch a design bug :)

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 13:22           ` Kumar Gala
@ 2008-05-01 14:26             ` Josh Boyer
  2008-05-01 14:31               ` Kumar Gala
  2008-05-05 11:57             ` Gabriel Paubert
  1 sibling, 1 reply; 26+ messages in thread
From: Josh Boyer @ 2008-05-01 14:26 UTC (permalink / raw)
  To: Kumar Gala; +Cc: Paul Mackerras, linuxppc-dev

On Thu, 2008-05-01 at 08:22 -0500, Kumar Gala wrote:
> >> So we have 4 actual exceptions:
> >> * CriticalInput (some external device signaled this.  There are two
> >> concepts of critical.  One is error the other is high priority)
> >> However this would have the same caveats as any ExternalInput  
> >> handler.
> >
> > No, it's worse. It can interrupt code that normally has
> > local_irq_disabled() and thus doesn't expect to be interrupted. That
> > means that everything becomes unsafe including locks etc....
> 
> Fair.  Should local_irq_disable() clear MSR_SE & MSR_BE on classic  
> parts?
> 
> > Note that driver that want to make active use of that probably want  
> > some
> > explicit local_crit_irq_disable/enable functions to be able to  
> > implement
> > some sort of synchronization.
> 
> Or we could just have local_irq_disable -- clear MSR_EE and MSR_CE

That sort of defeats the purpose of using critical interrupts in the
"higher priority" sense, doesn't it?  Seems it would effectively
eliminate that level altogether.  And it would also mask off the
watchdog interrupt, which I don't think you want to do.

Ben suggested local_crit_irq_disable.  That would suffice for drivers
that know they are configured as CE and need to do locking primitives,
but that isn't very common.  On 4xx there isn't even an interface for a
driver to request it's interrupts be set to CE and we init all UICs to
have UIC_CR be 0.

So for 4xx at least, most of this discussion is theoretical for now.
However, I can see value in having CEs being used for certain devices,
etc so it would be nice to figure it out in the long run.

josh

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 14:26             ` Josh Boyer
@ 2008-05-01 14:31               ` Kumar Gala
  0 siblings, 0 replies; 26+ messages in thread
From: Kumar Gala @ 2008-05-01 14:31 UTC (permalink / raw)
  To: jwboyer; +Cc: Paul Mackerras, linuxppc-dev


On May 1, 2008, at 9:26 AM, Josh Boyer wrote:

> On Thu, 2008-05-01 at 08:22 -0500, Kumar Gala wrote:
>>>> So we have 4 actual exceptions:
>>>> * CriticalInput (some external device signaled this.  There are two
>>>> concepts of critical.  One is error the other is high priority)
>>>> However this would have the same caveats as any ExternalInput
>>>> handler.
>>>
>>> No, it's worse. It can interrupt code that normally has
>>> local_irq_disabled() and thus doesn't expect to be interrupted. That
>>> means that everything becomes unsafe including locks etc....
>>
>> Fair.  Should local_irq_disable() clear MSR_SE & MSR_BE on classic
>> parts?
>>
>>> Note that driver that want to make active use of that probably want
>>> some
>>> explicit local_crit_irq_disable/enable functions to be able to
>>> implement
>>> some sort of synchronization.
>>
>> Or we could just have local_irq_disable -- clear MSR_EE and MSR_CE
>
> That sort of defeats the purpose of using critical interrupts in the
> "higher priority" sense, doesn't it?  Seems it would effectively
> eliminate that level altogether.  And it would also mask off the
> watchdog interrupt, which I don't think you want to do.
>
> Ben suggested local_crit_irq_disable.  That would suffice for drivers
> that know they are configured as CE and need to do locking primitives,
> but that isn't very common.  On 4xx there isn't even an interface  
> for a
> driver to request it's interrupts be set to CE and we init all UICs to
> have UIC_CR be 0.
>
> So for 4xx at least, most of this discussion is theoretical for now.
> However, I can see value in having CEs being used for certain devices,
> etc so it would be nice to figure it out in the long run.

I agree.  my first priority is to get debug interrupts cleaned up so  
we can kprobe and do other debug things in the kernel.

I'm only aware of one case of someone asking for critical inputs on  
our parts w/linux.

- k

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 13:17           ` Kumar Gala
@ 2008-05-01 16:14             ` Scott Wood
  2008-05-01 16:22               ` Scott Wood
  2008-05-01 16:33               ` Kumar Gala
  2008-05-01 23:34             ` Paul Mackerras
  1 sibling, 2 replies; 26+ messages in thread
From: Scott Wood @ 2008-05-01 16:14 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras

On Thu, May 01, 2008 at 08:17:07AM -0500, Kumar Gala wrote:
> On May 1, 2008, at 3:24 AM, Paul Mackerras wrote:
> >The most common cases are (a) something that ultimately generates
> >input on a tty (e.g. a character arriving on a serial port) and that
> >input turns out to be a ^C or similar, or (b) something that signals
> >I/O completion and the program doing the I/O has requested
> >notification by a SIGIO.  But in general any driver code can send a
> >signal to userspace if it wants.

And, of course, SIGALRM and similar timer mechanisms.

> ok.  Was just wondering how the async exception know that the signal  
> it wanted to send belonged to the particular process that is running.   
> But I guess there are cases that the signal is really intended for who  
> ever is currently running?

No, it knows based on its own data structures who it's intended for --
and sometimes that happens to be the currently running process.

-Scott

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 16:14             ` Scott Wood
@ 2008-05-01 16:22               ` Scott Wood
  2008-05-01 16:33               ` Kumar Gala
  1 sibling, 0 replies; 26+ messages in thread
From: Scott Wood @ 2008-05-01 16:22 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras

On Thu, May 01, 2008 at 11:14:51AM -0500, Scott Wood wrote:
> On Thu, May 01, 2008 at 08:17:07AM -0500, Kumar Gala wrote:
> > ok.  Was just wondering how the async exception know that the signal  
> > it wanted to send belonged to the particular process that is running.   
> > But I guess there are cases that the signal is really intended for who  
> > ever is currently running?
> 
> No, it knows based on its own data structures who it's intended for --
> and sometimes that happens to be the currently running process.

Oh, except for things like SIGXCPU and SIGVTALRM, which do target the
currently running process.

-Scott

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 16:14             ` Scott Wood
  2008-05-01 16:22               ` Scott Wood
@ 2008-05-01 16:33               ` Kumar Gala
  2008-05-01 16:42                 ` Scott Wood
  1 sibling, 1 reply; 26+ messages in thread
From: Kumar Gala @ 2008-05-01 16:33 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc-dev, Paul Mackerras


On May 1, 2008, at 11:14 AM, Scott Wood wrote:

> On Thu, May 01, 2008 at 08:17:07AM -0500, Kumar Gala wrote:
>> On May 1, 2008, at 3:24 AM, Paul Mackerras wrote:
>>> The most common cases are (a) something that ultimately generates
>>> input on a tty (e.g. a character arriving on a serial port) and that
>>> input turns out to be a ^C or similar, or (b) something that signals
>>> I/O completion and the program doing the I/O has requested
>>> notification by a SIGIO.  But in general any driver code can send a
>>> signal to userspace if it wants.
>
> And, of course, SIGALRM and similar timer mechanisms.
>
>> ok.  Was just wondering how the async exception know that the signal
>> it wanted to send belonged to the particular process that is running.
>> But I guess there are cases that the signal is really intended for  
>> who
>> ever is currently running?
>
> No, it knows based on its own data structures who it's intended for --
> and sometimes that happens to be the currently running process.

Let me ask the question differently.  Are there cases that some event  
occurs in the system and a signal is delivered to the current process  
regardless of what that process is.  I'm guessing so, based on the tty  
example.

So for the specific case I'm looking at (kprobes & debug exceptions  
from kernel space), I think its reasonable to BUG_ON() if thread_info- 
 >flags changes such that TIF_SIGPENDING or TIF_NEED_RESCHED get set  
we aren't from user-space.

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 16:33               ` Kumar Gala
@ 2008-05-01 16:42                 ` Scott Wood
  2008-05-01 17:48                   ` Kumar Gala
  0 siblings, 1 reply; 26+ messages in thread
From: Scott Wood @ 2008-05-01 16:42 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras

On Thu, May 01, 2008 at 11:33:34AM -0500, Kumar Gala wrote:
> Let me ask the question differently.  Are there cases that some event  
> occurs in the system and a signal is delivered to the current process  
> regardless of what that process is.

Yes, timers that expire based on CPU usage.

>  I'm guessing so, based on the tty example.

I don't think tty interrupts would -- it'd go to the process group
associated with the tty.

> So for the specific case I'm looking at (kprobes & debug exceptions  
> from kernel space), I think its reasonable to BUG_ON() if thread_info- 
> >flags changes such that TIF_SIGPENDING or TIF_NEED_RESCHED get set  
> we aren't from user-space.

Why?  It may not happen currently, but it seems more future-proof to just
copy the flags.  And it's certainly not reasonable for normal interrupts.

-Scott

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 16:42                 ` Scott Wood
@ 2008-05-01 17:48                   ` Kumar Gala
  2008-05-01 19:02                     ` Scott Wood
  0 siblings, 1 reply; 26+ messages in thread
From: Kumar Gala @ 2008-05-01 17:48 UTC (permalink / raw)
  To: Scott Wood; +Cc: linuxppc-dev, Paul Mackerras


On May 1, 2008, at 11:42 AM, Scott Wood wrote:

> On Thu, May 01, 2008 at 11:33:34AM -0500, Kumar Gala wrote:
>> Let me ask the question differently.  Are there cases that some event
>> occurs in the system and a signal is delivered to the current process
>> regardless of what that process is.
>
> Yes, timers that expire based on CPU usage.
>
>> I'm guessing so, based on the tty example.
>
> I don't think tty interrupts would -- it'd go to the process group
> associated with the tty.
>
>> So for the specific case I'm looking at (kprobes & debug exceptions
>> from kernel space), I think its reasonable to BUG_ON() if  
>> thread_info-
>>> flags changes such that TIF_SIGPENDING or TIF_NEED_RESCHED get set
>> we aren't from user-space.
>
> Why?  It may not happen currently, but it seems more future-proof to  
> just
> copy the flags.  And it's certainly not reasonable for normal  
> interrupts.

copying the flags isn't the issue.  Its acting on the flags thats the  
problem.  I'm not 100% sure the C code that might clear the flags is  
consistent on how it access them.  So if one bit of code clears  
task_struct->stack->thread_info->flags and other clears  
thread_info(STACK)->flags we get into an issue on how to merge after  
that.

I'd rather be safe for now since we don't need to send a signal in the  
debug from kernel case.

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 17:48                   ` Kumar Gala
@ 2008-05-01 19:02                     ` Scott Wood
  0 siblings, 0 replies; 26+ messages in thread
From: Scott Wood @ 2008-05-01 19:02 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev, Paul Mackerras

Kumar Gala wrote:
> copying the flags isn't the issue.  Its acting on the flags thats the 
> problem.  I'm not 100% sure the C code that might clear the flags is 
> consistent on how it access them.  

Actually *delivering* the signal should never be done except when 
returning to user.  That's different from sending the signal, though.

BTW, it doesn't seem all that unreasonable for a kernel 
profiling/tracing exception to signal a process that, for example, an 
event buffer is over a certain threshold.

 > So if one bit of code clears
 > task_struct->stack->thread_info->flags and other clears
 > thread_info(STACK)->flags we get into an issue on how to merge after
 > that.

It appears that TIF_SIGPENDING is always accessed through the task 
struct, though not so for TIF_NEED_RESCHED.

-Scott

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 13:17           ` Kumar Gala
  2008-05-01 16:14             ` Scott Wood
@ 2008-05-01 23:34             ` Paul Mackerras
  2008-05-02  4:02               ` Kumar Gala
  1 sibling, 1 reply; 26+ messages in thread
From: Paul Mackerras @ 2008-05-01 23:34 UTC (permalink / raw)
  To: Kumar Gala; +Cc: linuxppc-dev

Kumar Gala writes:

> ok.  Was just wondering how the async exception know that the signal  
> it wanted to send belonged to the particular process that is running.   

Usually the driver would have a reference to the task_struct of the
task that should get the signal, and it would send the signal to that
task, rather than the current task.  That task could be the current
task, of course, but it might not be.

I can't think of a case where an asynchronous interrupt would always
result in a signal being sent to the current task.

> >> how do we provide someone stick a kprobe on such code today?
> >
> > -ENOPARSE
> 
> I was asking how we prevent the cases you were describing working w/ 
> kprobes today.  Since it ends up single stepping in kernel codes its  
> possible that someone sets a kprobe in code that shouldn't be  
> interrupted, yet we'd cause a SingleStep Exception.

I'd have to look more closely at the kprobes code to answer that in
detail.  I assume the kprobes exception handlers are sufficiently
careful about what they do because they are aware they could have
interrupted interrupt-disabled code.

> >> So I'm not if there is any good way to preclude the handlers
> >> associated with these exceptions from doing the things you listed.
> >
> > In that case, you'd better expect to see system freezes, memory
> > corruption and general instability.
> 
> So the case I'm trying to make work is debug and kprobes.  This case  
> seems like we have pretty good control over what the "handler" does.   
> Are there checks we can add to BUG_ON() so we are at least aware of  
> the code attempts to do something it shouldnt?

Well, there's in_interrupt(), and there's the __kprobes marking, which
is used to mark functions where kprobes must not put a breakpoint.
Apart from that I would need to read through the kprobes code to
comment further...

Paul.

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 23:34             ` Paul Mackerras
@ 2008-05-02  4:02               ` Kumar Gala
  0 siblings, 0 replies; 26+ messages in thread
From: Kumar Gala @ 2008-05-02  4:02 UTC (permalink / raw)
  To: Paul Mackerras; +Cc: linuxppc-dev


On May 1, 2008, at 6:34 PM, Paul Mackerras wrote:

> Kumar Gala writes:
>
>> ok.  Was just wondering how the async exception know that the signal
>> it wanted to send belonged to the particular process that is running.
>
> Usually the driver would have a reference to the task_struct of the
> task that should get the signal, and it would send the signal to that
> task, rather than the current task.  That task could be the current
> task, of course, but it might not be.
>
> I can't think of a case where an asynchronous interrupt would always
> result in a signal being sent to the current task.
>
>>>> how do we provide someone stick a kprobe on such code today?
>>>
>>> -ENOPARSE
>>
>> I was asking how we prevent the cases you were describing working w/
>> kprobes today.  Since it ends up single stepping in kernel codes its
>> possible that someone sets a kprobe in code that shouldn't be
>> interrupted, yet we'd cause a SingleStep Exception.
>
> I'd have to look more closely at the kprobes code to answer that in
> detail.  I assume the kprobes exception handlers are sufficiently
> careful about what they do because they are aware they could have
> interrupted interrupt-disabled code.

Can you be a bit more specific about what we have to be careful about?

Also, how does this work in a hypervisor world that has no idea about  
when it might interrupt a guest.

>>>> So I'm not if there is any good way to preclude the handlers
>>>> associated with these exceptions from doing the things you listed.
>>>
>>> In that case, you'd better expect to see system freezes, memory
>>> corruption and general instability.
>>
>> So the case I'm trying to make work is debug and kprobes.  This case
>> seems like we have pretty good control over what the "handler" does.
>> Are there checks we can add to BUG_ON() so we are at least aware of
>> the code attempts to do something it shouldnt?
>
> Well, there's in_interrupt(), and there's the __kprobes marking, which
> is used to mark functions where kprobes must not put a breakpoint.
> Apart from that I would need to read through the kprobes code to
> comment further...

I would appreciate a review here to make sure we are ok.  I'm assuming  
if the current code is already safe that calling into it from a  
separate exception stack isn't going to make any difference.

- k

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-01 13:22           ` Kumar Gala
  2008-05-01 14:26             ` Josh Boyer
@ 2008-05-05 11:57             ` Gabriel Paubert
  2008-05-05 12:06               ` Benjamin Herrenschmidt
  1 sibling, 1 reply; 26+ messages in thread
From: Gabriel Paubert @ 2008-05-05 11:57 UTC (permalink / raw)
  To: Kumar Gala; +Cc: Paul Mackerras, linuxppc-dev

On Thu, May 01, 2008 at 08:22:08AM -0500, Kumar Gala wrote:
> >>So we have 4 actual exceptions:
> >>* CriticalInput (some external device signaled this.  There are two
> >>concepts of critical.  One is error the other is high priority)
> >>However this would have the same caveats as any ExternalInput  
> >>handler.
> >
> >No, it's worse. It can interrupt code that normally has
> >local_irq_disabled() and thus doesn't expect to be interrupted. That
> >means that everything becomes unsafe including locks etc....
> 
> Fair.  Should local_irq_disable() clear MSR_SE & MSR_BE on classic  
> parts?
> 
> >Note that driver that want to make active use of that probably want  
> >some
> >explicit local_crit_irq_disable/enable functions to be able to  
> >implement
> >some sort of synchronization.
> 
> Or we could just have local_irq_disable -- clear MSR_EE and MSR_CE
> 
> >>* Watchdog - pretty severe if this fires.
> >>
> >>* Debug - user space debug is pretty straight forward.  However we
> >>have features like kprobes that require kernel level support.
> >
> >Which means we have to be extra careful, in fact, I consider it a  
> >design
> >bug of BookE to have made debug be a critical interrupt...
> 
> I consider the whole BookE debug arch a design bug :)

Why only the debug part? :-)

	Gabriel

^ permalink raw reply	[flat|nested] 26+ messages in thread

* Re: [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code
  2008-05-05 11:57             ` Gabriel Paubert
@ 2008-05-05 12:06               ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 26+ messages in thread
From: Benjamin Herrenschmidt @ 2008-05-05 12:06 UTC (permalink / raw)
  To: Gabriel Paubert; +Cc: linuxppc-dev, Paul Mackerras


On Mon, 2008-05-05 at 13:57 +0200, Gabriel Paubert wrote:
> > I consider the whole BookE debug arch a design bug :)
> 
> Why only the debug part? :-)

Hehe :-)

At least the old broken 64 bits BookE was ditched before any
implementation was done :-)

Ben.

^ permalink raw reply	[flat|nested] 26+ messages in thread

end of thread, other threads:[~2008-05-05 12:06 UTC | newest]

Thread overview: 26+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-04-30  9:27 [PATCH] [POWERPC] Rework EXC_LEVEL_EXCEPTION_PROLOG code Kumar Gala
2008-04-30 21:54 ` Benjamin Herrenschmidt
2008-04-30 22:13   ` Kumar Gala
2008-04-30 22:18     ` Benjamin Herrenschmidt
2008-04-30 23:24       ` Kumar Gala
2008-04-30 22:22     ` Benjamin Herrenschmidt
2008-04-30 23:28       ` Kumar Gala
2008-04-30 23:52         ` Benjamin Herrenschmidt
2008-04-30 23:47     ` Paul Mackerras
2008-05-01  6:01       ` Kumar Gala
2008-05-01  7:53         ` Benjamin Herrenschmidt
2008-05-01 13:22           ` Kumar Gala
2008-05-01 14:26             ` Josh Boyer
2008-05-01 14:31               ` Kumar Gala
2008-05-05 11:57             ` Gabriel Paubert
2008-05-05 12:06               ` Benjamin Herrenschmidt
2008-05-01  8:24         ` Paul Mackerras
2008-05-01 13:17           ` Kumar Gala
2008-05-01 16:14             ` Scott Wood
2008-05-01 16:22               ` Scott Wood
2008-05-01 16:33               ` Kumar Gala
2008-05-01 16:42                 ` Scott Wood
2008-05-01 17:48                   ` Kumar Gala
2008-05-01 19:02                     ` Scott Wood
2008-05-01 23:34             ` Paul Mackerras
2008-05-02  4:02               ` Kumar Gala

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).