linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed
* [RFC patch powerpc,trace] Avoid suspicious RCU usage reporting for some tracepoints
@ 2012-09-10  4:58 Li Zhong
  2012-09-10  5:10 ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 6+ messages in thread
From: Li Zhong @ 2012-09-10  4:58 UTC (permalink / raw)
  To: PowerPC email list; +Cc: Paul E. McKenney, Paul Mackerras, Steven Rostedt

There are a few tracepoints in the interrupt code path, which is before
irq_enter(), or after irq_exit(), like
trace_irq_entry()/trace_irq_exit() in do_IRQ(),
trace_timer_interrupt_entry()/trace_timer_interrupt_exit() in
timer_interrupt(). 

If the interrupt is from idle(), and because tracepoint contains RCU
read-side critical section, we could see following suspicious RCU usage
reported:

[  145.127743] ===============================
[  145.127747] [ INFO: suspicious RCU usage. ]
[  145.127752] 3.6.0-rc3+ #1 Not tainted
[  145.127755] -------------------------------
[  145.127759] /root/.workdir/linux/arch/powerpc/include/asm/trace.h:33
suspicious rcu_dereference_check() usage!
[  145.127765] 
[  145.127765] other info that might help us debug this:
[  145.127765] 
[  145.127771] 
[  145.127771] RCU used illegally from idle CPU!
[  145.127771] rcu_scheduler_active = 1, debug_locks = 0
[  145.127777] RCU used illegally from extended quiescent state!
[  145.127781] no locks held by swapper/0/0.
[  145.127785] 
[  145.127785] stack backtrace:
[  145.127789] Call Trace:
[  145.127796] [c00000000108b530] [c000000000013c40] .show_stack
+0x70/0x1c0 (unreliable)
[  145.127806] [c00000000108b5e0]
[c0000000000f59d8] .lockdep_rcu_suspicious+0x118/0x150
[  145.127813] [c00000000108b680] [c00000000000fc58] .do_IRQ+0x498/0x500
[  145.127820] [c00000000108b750] [c000000000003950]
hardware_interrupt_common+0x150/0x180
[  145.127828] --- Exception: 501 at .plpar_hcall_norets+0x84/0xd4
[  145.127828]     LR = .check_and_cede_processor+0x38/0x70
[  145.127836] [c00000000108bab0] [c0000000000665dc] .shared_cede_loop
+0x5c/0x100
[  145.127844] [c00000000108bb70] [c000000000588ab0] .cpuidle_enter
+0x30/0x50
[  145.127850] [c00000000108bbe0]
[c000000000588b0c] .cpuidle_enter_state+0x3c/0xb0
[  145.127857] [c00000000108bc60] [c000000000589730] .cpuidle_idle_call
+0x150/0x6c0
[  145.127863] [c00000000108bd30] [c000000000058440] .pSeries_idle
+0x10/0x40
[  145.127870] [c00000000108bda0] [c00000000001683c] .cpu_idle
+0x18c/0x2d0
[  145.127876] [c00000000108be60] [c00000000000b434] .rest_init
+0x124/0x1b0
[  145.127884] [c00000000108bef0] [c0000000009d0d28] .start_kernel
+0x568/0x588
[  145.127890] [c00000000108bf90] [c000000000009660] .start_here_common
+0x20/0x40

This is because the RCU usage in interrupt context should be used in
area marked by rcu_irq_enter()/rcu_irq_exit(), called in
irq_enter()/irq_exit() respectively. 

Could we add a new tracepoint trace_***_rcuirq, like trace_***_rcuidle
to avoid the report? like the code attached below.

Or could we just move these tracepoints inside the
irq_enter()/irq_exit() area? (Seems not good for the timer_interrupt
case). 

Thanks, Zhong

Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/irq.c  |    4 ++--
 arch/powerpc/kernel/time.c |    4 ++--
 include/linux/tracepoint.h |   10 ++++++++++
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 1f017bb..f0ac4e7 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -489,7 +489,7 @@ void do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	unsigned int irq;
 
-	trace_irq_entry(regs);
+	trace_irq_entry_rcuirq(regs);
 
 	irq_enter();
 
@@ -514,7 +514,7 @@ void do_IRQ(struct pt_regs *regs)
 	irq_exit();
 	set_irq_regs(old_regs);
 
-	trace_irq_exit(regs);
+	trace_irq_exit_rcuirq(regs);
 }
 
 void __init init_IRQ(void)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e49e931..cbd6607 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -493,7 +493,7 @@ void timer_interrupt(struct pt_regs * regs)
 	 */
 	may_hard_irq_enable();
 
-	trace_timer_interrupt_entry(regs);
+	trace_timer_interrupt_entry_rcuirq(regs);
 
 	__get_cpu_var(irq_stat).timer_irqs++;
 
@@ -532,7 +532,7 @@ void timer_interrupt(struct pt_regs * regs)
 	irq_exit();
 	set_irq_regs(old_regs);
 
-	trace_timer_interrupt_exit(regs);
+	trace_timer_interrupt_exit_rcuirq(regs);
 }
 
 /*
diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index 802de56..f7672d5 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -161,6 +161,16 @@ static inline void tracepoint_synchronize_unregister(void)
 				rcu_idle_exit(),			\
 				rcu_idle_enter());			\
 	}								\
+	static inline void trace_##name##_rcuirq(proto)			\
+	{								\
+		if (static_key_false(&__tracepoint_##name.key))		\
+			__DO_TRACE(&__tracepoint_##name,		\
+				TP_PROTO(data_proto),			\
+				TP_ARGS(data_args),			\
+				TP_CONDITION(cond),			\
+				rcu_irq_enter(),			\
+				rcu_irq_exit());			\
+	}								\
 	static inline int						\
 	register_trace_##name(void (*probe)(data_proto), void *data)	\
 	{								\
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [RFC patch powerpc,trace] Avoid suspicious RCU usage reporting for some tracepoints
  2012-09-10  4:58 [RFC patch powerpc,trace] Avoid suspicious RCU usage reporting for some tracepoints Li Zhong
@ 2012-09-10  5:10 ` Benjamin Herrenschmidt
  2012-09-10 14:02   ` Steven Rostedt
  2012-09-11  1:23   ` [RFC patch " Anton Blanchard
  0 siblings, 2 replies; 6+ messages in thread
From: Benjamin Herrenschmidt @ 2012-09-10  5:10 UTC (permalink / raw)
  To: Li Zhong
  Cc: Anton Blanchard, Paul Mackerras, Paul E. McKenney,
	PowerPC email list, Steven Rostedt

On Mon, 2012-09-10 at 12:58 +0800, Li Zhong wrote:
> There are a few tracepoints in the interrupt code path, which is before
> irq_enter(), or after irq_exit(), like
> trace_irq_entry()/trace_irq_exit() in do_IRQ(),
> trace_timer_interrupt_entry()/trace_timer_interrupt_exit() in
> timer_interrupt(). 
> 
> If the interrupt is from idle(), and because tracepoint contains RCU
> read-side critical section, we could see following suspicious RCU usage
> reported:

 .../...

> This is because the RCU usage in interrupt context should be used in
> area marked by rcu_irq_enter()/rcu_irq_exit(), called in
> irq_enter()/irq_exit() respectively. 
> 
> Could we add a new tracepoint trace_***_rcuirq, like trace_***_rcuidle
> to avoid the report? like the code attached below.
> 
> Or could we just move these tracepoints inside the
> irq_enter()/irq_exit() area? (Seems not good for the timer_interrupt
> case). 

I'd say just move them in. Anton, any objection ?

> Thanks, Zhong
> 
> Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
> ---
>  arch/powerpc/kernel/irq.c  |    4 ++--
>  arch/powerpc/kernel/time.c |    4 ++--
>  include/linux/tracepoint.h |   10 ++++++++++
>  3 files changed, 14 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
> index 1f017bb..f0ac4e7 100644
> --- a/arch/powerpc/kernel/irq.c
> +++ b/arch/powerpc/kernel/irq.c
> @@ -489,7 +489,7 @@ void do_IRQ(struct pt_regs *regs)
>  	struct pt_regs *old_regs = set_irq_regs(regs);
>  	unsigned int irq;
>  
> -	trace_irq_entry(regs);
> +	trace_irq_entry_rcuirq(regs);
>  
>  	irq_enter();
>  
> @@ -514,7 +514,7 @@ void do_IRQ(struct pt_regs *regs)
>  	irq_exit();
>  	set_irq_regs(old_regs);
>  
> -	trace_irq_exit(regs);
> +	trace_irq_exit_rcuirq(regs);
>  }
>  
>  void __init init_IRQ(void)
> diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
> index e49e931..cbd6607 100644
> --- a/arch/powerpc/kernel/time.c
> +++ b/arch/powerpc/kernel/time.c
> @@ -493,7 +493,7 @@ void timer_interrupt(struct pt_regs * regs)
>  	 */
>  	may_hard_irq_enable();
>  
> -	trace_timer_interrupt_entry(regs);
> +	trace_timer_interrupt_entry_rcuirq(regs);
>  
>  	__get_cpu_var(irq_stat).timer_irqs++;
>  
> @@ -532,7 +532,7 @@ void timer_interrupt(struct pt_regs * regs)
>  	irq_exit();
>  	set_irq_regs(old_regs);
>  
> -	trace_timer_interrupt_exit(regs);
> +	trace_timer_interrupt_exit_rcuirq(regs);
>  }
>  
>  /*
> diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
> index 802de56..f7672d5 100644
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -161,6 +161,16 @@ static inline void tracepoint_synchronize_unregister(void)
>  				rcu_idle_exit(),			\
>  				rcu_idle_enter());			\
>  	}								\
> +	static inline void trace_##name##_rcuirq(proto)			\
> +	{								\
> +		if (static_key_false(&__tracepoint_##name.key))		\
> +			__DO_TRACE(&__tracepoint_##name,		\
> +				TP_PROTO(data_proto),			\
> +				TP_ARGS(data_args),			\
> +				TP_CONDITION(cond),			\
> +				rcu_irq_enter(),			\
> +				rcu_irq_exit());			\
> +	}								\
>  	static inline int						\
>  	register_trace_##name(void (*probe)(data_proto), void *data)	\
>  	{								\

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC patch powerpc,trace] Avoid suspicious RCU usage reporting for some tracepoints
  2012-09-10  5:10 ` Benjamin Herrenschmidt
@ 2012-09-10 14:02   ` Steven Rostedt
  2012-09-10 22:14     ` Benjamin Herrenschmidt
  2012-09-11  1:23   ` [RFC patch " Anton Blanchard
  1 sibling, 1 reply; 6+ messages in thread
From: Steven Rostedt @ 2012-09-10 14:02 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Anton Blanchard, Paul Mackerras, Paul E. McKenney,
	PowerPC email list, Li Zhong

On Mon, 2012-09-10 at 15:10 +1000, Benjamin Herrenschmidt wrote:
> On Mon, 2012-09-10 at 12:58 +0800, Li Zhong wrote:
> > There are a few tracepoints in the interrupt code path, which is before
> > irq_enter(), or after irq_exit(), like
> > trace_irq_entry()/trace_irq_exit() in do_IRQ(),
> > trace_timer_interrupt_entry()/trace_timer_interrupt_exit() in
> > timer_interrupt(). 
> > 
> > If the interrupt is from idle(), and because tracepoint contains RCU
> > read-side critical section, we could see following suspicious RCU usage
> > reported:
> 
>  .../...
> 
> > This is because the RCU usage in interrupt context should be used in
> > area marked by rcu_irq_enter()/rcu_irq_exit(), called in
> > irq_enter()/irq_exit() respectively. 
> > 
> > Could we add a new tracepoint trace_***_rcuirq, like trace_***_rcuidle
> > to avoid the report? like the code attached below.
> > 
> > Or could we just move these tracepoints inside the
> > irq_enter()/irq_exit() area? (Seems not good for the timer_interrupt
> > case). 
> 
> I'd say just move them in. Anton, any objection ?
> 

I agree too. I'm a bit concerned about the lack of coverage the
irq_enter() and irq_exit() have in the timer interrupt:

	may_hard_irq_enable();

	trace_timer_interrupt_entry(regs);

	__get_cpu_var(irq_stat).timer_irqs++;

#if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
	if (atomic_read(&ppc_n_lost_interrupts) != 0)
		do_IRQ(regs);
#endif

	old_regs = set_irq_regs(regs);
	irq_enter();


I'm guessing that call to do_IRQ() has to do with lazy irq handling?

Anyway, there may be a reason to have the tracepoint before this call,
but I'm not sure it really is that important. It should probably be best
to move it after the irq_enter(). There's side-effects with calling
things from interrupt context outside of irq_enter/exit().

-- Steve

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC patch powerpc,trace] Avoid suspicious RCU usage reporting for some tracepoints
  2012-09-10 14:02   ` Steven Rostedt
@ 2012-09-10 22:14     ` Benjamin Herrenschmidt
  2012-09-11  1:37       ` [PATCH " Li Zhong
  0 siblings, 1 reply; 6+ messages in thread
From: Benjamin Herrenschmidt @ 2012-09-10 22:14 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Anton Blanchard, Paul Mackerras, Paul E. McKenney,
	PowerPC email list, Li Zhong

On Mon, 2012-09-10 at 10:02 -0400, Steven Rostedt wrote:
> I agree too. I'm a bit concerned about the lack of coverage the
> irq_enter() and irq_exit() have in the timer interrupt:
> 
> 	may_hard_irq_enable();
> 
> 	trace_timer_interrupt_entry(regs);
> 
> 	__get_cpu_var(irq_stat).timer_irqs++;
> 
> #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
> 	if (atomic_read(&ppc_n_lost_interrupts) != 0)
> 		do_IRQ(regs);
> #endif
> 
> 	old_regs = set_irq_regs(regs);
> 	irq_enter();
> 
> 
> I'm guessing that call to do_IRQ() has to do with lazy irq handling?

Not exactly. may_hard_irq_enable() does, it will hard-enable interrupts
(while keeping them soft-disabled) if there isn't a recorded "level"
interrupt pending already (in which case it's pointless, we'll just be
interrupted again and re-disable).

The do_IRQ() hack is an old hack for ancient 32-bit powermacs who could
lose interrupts, it's a sideband mechanism we have to re-emit them,
which nowadays could probably be replaced with something more modern,
it's just that nobody ever wants to touch that code.

> Anyway, there may be a reason to have the tracepoint before this call,
> but I'm not sure it really is that important. It should probably be best
> to move it after the irq_enter(). There's side-effects with calling
> things from interrupt context outside of irq_enter/exit().

Cheers,
Ben.

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [RFC patch powerpc,trace] Avoid suspicious RCU usage reporting for some tracepoints
  2012-09-10  5:10 ` Benjamin Herrenschmidt
  2012-09-10 14:02   ` Steven Rostedt
@ 2012-09-11  1:23   ` Anton Blanchard
  1 sibling, 0 replies; 6+ messages in thread
From: Anton Blanchard @ 2012-09-11  1:23 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Paul Mackerras, Paul E. McKenney, PowerPC email list,
	Steven Rostedt, Li Zhong

Hi Ben,

> > Or could we just move these tracepoints inside the
> > irq_enter()/irq_exit() area? (Seems not good for the timer_interrupt
> > case). 
> 
> I'd say just move them in. Anton, any objection ?

Sounds reasonable, no objections from me.

Anton

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [PATCH powerpc,trace] Avoid suspicious RCU usage reporting for some tracepoints
  2012-09-10 22:14     ` Benjamin Herrenschmidt
@ 2012-09-11  1:37       ` Li Zhong
  0 siblings, 0 replies; 6+ messages in thread
From: Li Zhong @ 2012-09-11  1:37 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: Anton Blanchard, Paul Mackerras, Paul E. McKenney,
	PowerPC email list, Steven Rostedt

Thank you all, this is the updated patch for your review:

There are a few tracepoints in the interrupt code path, which is before
irq_enter(), or after irq_exit(), like
trace_irq_entry()/trace_irq_exit() in do_IRQ(),
trace_timer_interrupt_entry()/trace_timer_interrupt_exit() in
timer_interrupt(). 

If the interrupt is from idle(), and because tracepoint contains RCU
read-side critical section, we could see following suspicious RCU usage
reported:

[  145.127743] ===============================
[  145.127747] [ INFO: suspicious RCU usage. ]
[  145.127752] 3.6.0-rc3+ #1 Not tainted
[  145.127755] -------------------------------
[  145.127759] /root/.workdir/linux/arch/powerpc/include/asm/trace.h:33
suspicious rcu_dereference_check() usage!
[  145.127765] 
[  145.127765] other info that might help us debug this:
[  145.127765] 
[  145.127771] 
[  145.127771] RCU used illegally from idle CPU!
[  145.127771] rcu_scheduler_active = 1, debug_locks = 0
[  145.127777] RCU used illegally from extended quiescent state!
[  145.127781] no locks held by swapper/0/0.
[  145.127785] 
[  145.127785] stack backtrace:
[  145.127789] Call Trace:
[  145.127796] [c00000000108b530] [c000000000013c40] .show_stack
+0x70/0x1c0 (unreliable)
[  145.127806] [c00000000108b5e0]
[c0000000000f59d8] .lockdep_rcu_suspicious+0x118/0x150
[  145.127813] [c00000000108b680] [c00000000000fc58] .do_IRQ+0x498/0x500
[  145.127820] [c00000000108b750] [c000000000003950]
hardware_interrupt_common+0x150/0x180
[  145.127828] --- Exception: 501 at .plpar_hcall_norets+0x84/0xd4
[  145.127828]     LR = .check_and_cede_processor+0x38/0x70
[  145.127836] [c00000000108bab0] [c0000000000665dc] .shared_cede_loop
+0x5c/0x100
[  145.127844] [c00000000108bb70] [c000000000588ab0] .cpuidle_enter
+0x30/0x50
[  145.127850] [c00000000108bbe0]
[c000000000588b0c] .cpuidle_enter_state+0x3c/0xb0
[  145.127857] [c00000000108bc60] [c000000000589730] .cpuidle_idle_call
+0x150/0x6c0
[  145.127863] [c00000000108bd30] [c000000000058440] .pSeries_idle
+0x10/0x40
[  145.127870] [c00000000108bda0] [c00000000001683c] .cpu_idle
+0x18c/0x2d0
[  145.127876] [c00000000108be60] [c00000000000b434] .rest_init
+0x124/0x1b0
[  145.127884] [c00000000108bef0] [c0000000009d0d28] .start_kernel
+0x568/0x588
[  145.127890] [c00000000108bf90] [c000000000009660] .start_here_common
+0x20/0x40

This is because the RCU usage in interrupt context should be used in
area marked by rcu_irq_enter()/rcu_irq_exit(), called in
irq_enter()/irq_exit() respectively. 

Move them into the irq_enter()/irq_exit() area to avoid the reporting.

Signed-off-by: Li Zhong <zhong@linux.vnet.ibm.com>
---
 arch/powerpc/kernel/irq.c  |    8 ++++----
 arch/powerpc/kernel/time.c |    8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c
index 1f017bb..71413f4 100644
--- a/arch/powerpc/kernel/irq.c
+++ b/arch/powerpc/kernel/irq.c
@@ -489,10 +489,10 @@ void do_IRQ(struct pt_regs *regs)
 	struct pt_regs *old_regs = set_irq_regs(regs);
 	unsigned int irq;
 
-	trace_irq_entry(regs);
-
 	irq_enter();
 
+	trace_irq_entry(regs);
+
 	check_stack_overflow();
 
 	/*
@@ -511,10 +511,10 @@ void do_IRQ(struct pt_regs *regs)
 	else
 		__get_cpu_var(irq_stat).spurious_irqs++;
 
+	trace_irq_exit(regs);
+
 	irq_exit();
 	set_irq_regs(old_regs);
-
-	trace_irq_exit(regs);
 }
 
 void __init init_IRQ(void)
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index e49e931..bd693a1 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -493,8 +493,6 @@ void timer_interrupt(struct pt_regs * regs)
 	 */
 	may_hard_irq_enable();
 
-	trace_timer_interrupt_entry(regs);
-
 	__get_cpu_var(irq_stat).timer_irqs++;
 
 #if defined(CONFIG_PPC32) && defined(CONFIG_PMAC)
@@ -505,6 +503,8 @@ void timer_interrupt(struct pt_regs * regs)
 	old_regs = set_irq_regs(regs);
 	irq_enter();
 
+	trace_timer_interrupt_entry(regs);
+
 	if (test_irq_work_pending()) {
 		clear_irq_work_pending();
 		irq_work_run();
@@ -529,10 +529,10 @@ void timer_interrupt(struct pt_regs * regs)
 	}
 #endif
 
+	trace_timer_interrupt_exit(regs);
+
 	irq_exit();
 	set_irq_regs(old_regs);
-
-	trace_timer_interrupt_exit(regs);
 }
 
 /*
-- 
1.7.1

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2012-09-11  1:37 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-09-10  4:58 [RFC patch powerpc,trace] Avoid suspicious RCU usage reporting for some tracepoints Li Zhong
2012-09-10  5:10 ` Benjamin Herrenschmidt
2012-09-10 14:02   ` Steven Rostedt
2012-09-10 22:14     ` Benjamin Herrenschmidt
2012-09-11  1:37       ` [PATCH " Li Zhong
2012-09-11  1:23   ` [RFC patch " Anton Blanchard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).