BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP)

linuxppc-dev.lists.ozlabs.org archive mirror
 help / color / mirror / Atom feed

* BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP)
       [not found]           ` <1286995066.4893.17.camel@subratamodak.linux.ibm.com>
@ 2010-10-18  3:19             ` Li Zefan
  2010-10-18 10:05               ` Benjamin Herrenschmidt
                                 ` (2 more replies)
  0 siblings, 3 replies; 9+ messages in thread
From: Li Zefan @ 2010-10-18  3:19 UTC (permalink / raw)
  To: subrata
  Cc: ltp-list, Peter Zijlstra, LKML, Steven Rostedt, Paul Mackerras,
	Anton Blanchard, Ingo Molnar, linuxppc-dev

Cc: Steven
Cc: Ingo
Cc: Peter
Cc: Anton Blanchard <anton@samba.org>
Cc: Paul Mackerras <paulus@samba.org>

For those Cced, More information here:

http://marc.info/?l=ltp-list&m=128569007015044&w=2
http://marc.info/?l=ltp-list&m=128696942432669&w=2

02:37, Subrata Modak wrote:
> I get a bigger trace with this patch:
> 
> Unable to handle kernel paging request for data at address 0x00000000
> Faulting instruction address: 0xc0000000002133f0
> cpu 0x2: Vector: 300 (Data Access) at [c0000000d9f8b560]
>     pc: c0000000002133f0: .trace_clock_global+0xb4/0x2a0
>     lr: c000000000213458: .trace_clock_global+0x11c/0x2a0
>     sp: c0000000d9f8b7e0
>    msr: 800000000200b032
>    dar: 0
>  dsisr: 40000000
>   current = 0xc0000000d9f7d100
>   paca    = 0xc000000007fc8e00
>     pid   = 1667, comm = ftrace_stack_tr
> Unrecoverable FP Unavailable Exception 800 at c0000000016a9540
> cpu 0x0: Vector: 8Unable to handle0 kernel paging r0 equest for data (at
> address 0xbffFffffe0175b688
> PU UnavaFaulting instruciltion address: 0xac0000000001017fcb
> le) at [c0000000d9f8a6a0]
> p   pc: c0000000016a9540: etnetre r?  ?f ofro rh ehlepl
> 
> 
>     lr: [c000000000016a9540: key_type_dns_resolver+0x15110/0x365f8
>     sp: c0000000018804e8
>    msr: 8000000000001032
>   current = 0xc0000000d838d100
>   paca    = 0xc000000007fc8000
>     pid   = 1668, comm = ftrace_stack_ma
>  pid   = 1668, cc0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> [c0000000d9f8b9b0] c000000000223178 .ring_buffer_lock_reserve
> +0x24c/0x2a4
> [c0000000d9f8ba40] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> [c0000000d9f8baf0] c00000000022ec9c .trace_current_buffer_lock_reserve
> +0x44/0x6c
> [c0000000d9f8bb80] c000000000011c5c .ftrace_raw_event_hcall_entry
> +0x7c/0x144
> [c0000000d9f8bc40] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> [c0000000d9f8bcd0] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> [c0000000d9f8bd40] c0000000000749c8 .__spin_yield+0x130/0x15c
> [c0000000d9f8bdd0] c000000000213458 .trace_clock_global+0x11c/0x2a0

This is a dead loop:

trace_hcall_entry() -> trace_clock_global() -> trace_hcall_entry() ..

And this is a PPC specific bug. Hope some ppc guys will fix it?
Or we kill trace_clock_global() if no one actually uses it..

--
Li Zefan

> [c0000000d9f8be90] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> [c0000000d9f8bfa0] c000000000223178 .ring_buffer_lock_reserve
> +0x24c/0x2a4
> [c0000000d9f8c030] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> [c0000000d9f8c0e0] c00000000022ec9c .trace_current_buffer_lock_reserve
> +0x44/0x6c
> [c0000000d9f8c170] c000000000011c5c .ftrace_raw_event_hcall_entry
> +0x7c/0x144
> [c0000000d9f8c230] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> [c0000000d9f8c2c0] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> [c0000000d9f8c330] c0000000000749c8 .__spin_yield+0x130/0x15c
> [c0000000d9f8c3c0] c000000000213458 .trace_clock_global+0x11c/0x2a0
> [c0000000d9f8c480] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> [c0000000d9f8c590] c000000000223178 .ring_buffer_lock_reserve
> +0x24c/0x2a4
> [c0000000d9f8c620] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> [c0000000d9f8c6d0] c00000000022ec9c .trace_current_buffer_lock_reserve
> +0x44/0x6c
> [c0000000d9f8c760] c000000000011c5c .ftrace_raw_event_hcall_entry
> +0x7c/0x144
> [c0000000d9f8c820] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> [c0000000d9f8c8b0] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> [c0000000d9f8c920] c0000000000749c8 .__spin_yield+0x130/0x15c
> [c0000000d9f8c9b0] c000000000213458 .trace_clock_global+0x11c/0x2a0
> [c0000000d9f8ca70] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> [c0000000d9f8cb80] c000000000223178 .ring_buffer_lock_reserve
> +0x24c/0x2a4
> [c0000000d9f8cc10] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> [c0000000d9f8ccc0] c00000000022ec9c .trace_current_buffer_lock_reserve
> +0x44/0x6c
> [c0000000d9f8cd50] c000000000011c5c .ftrace_raw_event_hcall_entry
> +0x7c/0x144
> [c0000000d9f8ce10] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> [c0000000d9f8cea0] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> [c0000000d9f8cf10] c0000000000749c8 .__spin_yield+0x130/0x15c
> [c0000000d9f8cfa0] c000000000213458 .trace_clock_global+0x11c/0x2a0
> [c0000000d9f8d060] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> [c0000000d9f8d170] c000000000223178 .ring_buffer_lock_reserve
> +0x24c/0x2a4
> [c0000000d9f8d200] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> [c0000000d9f8d2b0] c00000000022ec9c .trace_current_buffer_lock_reserve
> +0x44/0x6c
> [c0000000d9f8d340] c000000000011c5c .ftrace_raw_event_hcall_entry
> +0x7c/0x144
> [c0000000d9f8d400] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> [c0000000d9f8d490] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> [c0000000d9f8d500] c0000000000749c8 .__spin_yield+0x130/0x15c
> [c0000000d9f8d590] c000000000213458 .trace_clock_global+0x11c/0x2a0
> [c0000000d9f8d650] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> [c0000000d9f8d760] c000000000223178 .ring_buffer_lock_reserve
> +0x24c/0x2a4
> [c0000000d9f8d7f0] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> [c0000000d9f8d8a0] c00000000022ec9c .trace_current_buffer_lock_reserve
> +0x44/0x6c
> [c0000000d9f8d930] c000000000011c5c .ftrace_raw_event_hcall_entry
> +0x7c/0x144
> [c0000000d9f8d9f0] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> [c0000000d9f8da80] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> [c0000000d9f8daf0] c0000000000749c8 .__spin_yield+0x130/0x15c
> [c0000000d9f8db80] c000000000213458 .trace_clock_global+0x11c/0x2a0
> [c0000000d9f8dc40] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> [c0000000d9f8dd50] c000000000223178 .ring_buffer_lock_reserve
> +0x24c/0x2a4
> [c0000000d9f8dde0] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> [c0000000d9f8de90] c00000000022ec9c .trace_current_buffer_lock_reserve
> +0x44/0x6c
> [c0000000d9f8df20] c000000000011c5c .ftrace_raw_event_hcall_entry
> +0x7c/0x144
> [c0000000d9f8dfe0] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> [c0000000d9f8e070] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> [c0000000d9f8e0e0] c0000000000749c8 .__spin_yield+0x130/0x15c
> [c0000000d9f8e170] c000000000213458 .trace_clock_global+0x11c/0x2a0
> [c0000000d9f8e230] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> 
> Regards--
> Subrata
> 
> On Wed, 2010-10-13 at 19:29 +0800, Li Zefan wrote:
>> Li Zefan wrote:
>>> Li Zefan wrote:
>>>> Subrata Modak wrote:
>>>>> Li,
>>>>>
>>>>> Can you kindly fix the below issues and send me a new patch ?
>>>>>
>>>> Sure, I will.
>>>>
>>> At a first glance, the kernel bug doesn't seem to come from ftrace,
>>> at least not directly from it.
>>>
>>> Question:
>>>
>>> Can you trigger this bug in latest kernel (2.6.36-rc7)? If you
>>> haven't tried it, could you test it? Thanks!
>>>
>> I got a patch from Peter, though he's not sure about the bug yet.
>> So could you try this?
>>
>> Note, I don't know if this patch can be applied to 2.6.35-4..
>>
>> ======================
>>
>> (From Peter)
>>
>> Hrmm, not something I've seen before.. but since the migration thread
>> and stop_machine are involved, does the below patch which rewrites the
>> migration-thread/stop_macehine/scheduler interaction cure this?
>>
>> This patch is probably .37 fodder, but who knows..
>>
>> ---
>> Subject: sched: Create special class for stop/migrate work
>> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
>> Date: Wed Sep 22 13:53:15 CEST 2010
>>
>> In order to separate the stop/migrate work thread from the SCHED_FIFO
>> implementation, create a special class for it that is of higher priority
>> than SCHED_FIFO itself.
>>
>> This currently solves a problem where cpu-hotplug consumes so much cpu-time
>> that the SCHED_FIFO class gets throttled, but has the bandwidth replenishment
>> timer pending on the now dead cpu.
>>
>> It is also required for when we add the planned deadline scheduling class
>> above SCHED_FIFO, as the stop/migrate thread still needs to transcent those
>> tasks.
>>
>> Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
>> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
>> LKML-Reference: <1285165776.2275.1022.camel@laptop>
>> ---
>>  kernel/sched.c          |   54 ++++++++++++++++++++----
>>  kernel/sched_stoptask.c |  108 ++++++++++++++++++++++++++++++++++++++++++++++++
>>  kernel/stop_machine.c   |    8 ++-
>>  3 files changed, 158 insertions(+), 12 deletions(-)
>>
>> Index: linux-2.6/kernel/sched.c
>> ===================================================================
>> --- linux-2.6.orig/kernel/sched.c
>> +++ linux-2.6/kernel/sched.c
>> @@ -486,7 +486,7 @@ struct rq {
>>  	 */
>>  	unsigned long nr_uninterruptible;
>>
>> -	struct task_struct *curr, *idle;
>> +	struct task_struct *curr, *idle, *stop;
>>  	unsigned long next_balance;
>>  	struct mm_struct *prev_mm;
>>
>> @@ -1837,7 +1837,7 @@ static inline void __set_task_cpu(struct
>>
>>  static const struct sched_class rt_sched_class;
>>
>> -#define sched_class_highest (&rt_sched_class)
>> +#define sched_class_highest (&stop_sched_class)
>>  #define for_each_class(class) \
>>     for (class = sched_class_highest; class; class = class->next)
>>
>> @@ -1917,10 +1917,41 @@ static void deactivate_task(struct rq *r
>>  #include "sched_idletask.c"
>>  #include "sched_fair.c"
>>  #include "sched_rt.c"
>> +#include "sched_stoptask.c"
>>  #ifdef CONFIG_SCHED_DEBUG
>>  # include "sched_debug.c"
>>  #endif
>>
>> +void sched_set_stop_task(int cpu, struct task_struct *stop)
>> +{
>> +	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
>> +	struct task_struct *old_stop = cpu_rq(cpu)->stop;
>> +
>> +	if (stop) {
>> +		/*
>> +		 * Make it appear like a SCHED_FIFO task, its something
>> +		 * userspace knows about and won't get confused about.
>> +		 *
>> +		 * Also, it will make PI more or less work without too
>> +		 * much confusion -- but then, stop work should not
>> +		 * rely on PI working anyway.
>> +		 */
>> +		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
>> +
>> +		stop->sched_class = &stop_sched_class;
>> +	}
>> +
>> +	cpu_rq(cpu)->stop = stop;
>> +
>> +	if (old_stop) {
>> +		/*
>> +		 * Reset it back to a normal scheduling class so that
>> +		 * it can die in pieces.
>> +		 */
>> +		old_stop->sched_class = &rt_sched_class;
>> +	}
>> +}
>> +
>>  /*
>>   * __normal_prio - return the priority that is based on the static prio
>>   */
>> @@ -3720,17 +3751,13 @@ pick_next_task(struct rq *rq)
>>  			return p;
>>  	}
>>
>> -	class = sched_class_highest;
>> -	for ( ; ; ) {
>> +	for_each_class(class) {
>>  		p = class->pick_next_task(rq);
>>  		if (p)
>>  			return p;
>> -		/*
>> -		 * Will never be NULL as the idle class always
>> -		 * returns a non-NULL p:
>> -		 */
>> -		class = class->next;
>>  	}
>> +
>> +	BUG(); /* the idle class will always have a runnable task */
>>  }
>>
>>  /*
>> @@ -4659,6 +4686,15 @@ static int __sched_setscheduler(struct t
>>  	 */
>>  	rq = __task_rq_lock(p);
>>
>> +	/*
>> +	 * Changing the policy of the stop threads its a very bad idea
>> +	 */
>> +	if (p == rq->stop) {
>> +		__task_rq_unlock(rq);
>> +		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
>> +		return -EINVAL;
>> +	}
>> +
>>  #ifdef CONFIG_RT_GROUP_SCHED
>>  	if (user) {
>>  		/*
>> Index: linux-2.6/kernel/stop_machine.c
>> ===================================================================
>> --- linux-2.6.orig/kernel/stop_machine.c
>> +++ linux-2.6/kernel/stop_machine.c
>> @@ -287,11 +287,12 @@ static int cpu_stopper_thread(void *data
>>  	goto repeat;
>>  }
>>
>> +extern void sched_set_stop_task(int cpu, struct task_struct *stop);
>> +
>>  /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
>>  static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
>>  					   unsigned long action, void *hcpu)
>>  {
>> -	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
>>  	unsigned int cpu = (unsigned long)hcpu;
>>  	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
>>  	struct task_struct *p;
>> @@ -304,13 +305,13 @@ static int __cpuinit cpu_stop_cpu_callba
>>  				   cpu);
>>  		if (IS_ERR(p))
>>  			return NOTIFY_BAD;
>> -		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
>>  		get_task_struct(p);
>> +		kthread_bind(p, cpu);
>> +		sched_set_stop_task(cpu, p);
>>  		stopper->thread = p;
>>  		break;
>>
>>  	case CPU_ONLINE:
>> -		kthread_bind(stopper->thread, cpu);
>>  		/* strictly unnecessary, as first user will wake it */
>>  		wake_up_process(stopper->thread);
>>  		/* mark enabled */
>> @@ -325,6 +326,7 @@ static int __cpuinit cpu_stop_cpu_callba
>>  	{
>>  		struct cpu_stop_work *work;
>>
>> +		sched_set_stop_task(cpu, NULL);
>>  		/* kill the stopper */
>>  		kthread_stop(stopper->thread);
>>  		/* drain remaining works */
>> Index: linux-2.6/kernel/sched_stoptask.c
>> ===================================================================
>> --- /dev/null
>> +++ linux-2.6/kernel/sched_stoptask.c
>> @@ -0,0 +1,108 @@
>> +/*
>> + * stop-task scheduling class.
>> + *
>> + * The stop task is the highest priority task in the system, it preempts
>> + * everything and will be preempted by nothing.
>> + *
>> + * See kernel/stop_machine.c
>> + */
>> +
>> +#ifdef CONFIG_SMP
>> +static int
>> +select_task_rq_stop(struct rq *rq, struct task_struct *p,
>> +		    int sd_flag, int flags)
>> +{
>> +	return task_cpu(p); /* stop tasks as never migrate */
>> +}
>> +#endif /* CONFIG_SMP */
>> +
>> +static void
>> +check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
>> +{
>> +	resched_task(rq->curr); /* we preempt everything */
>> +}
>> +
>> +static struct task_struct *pick_next_task_stop(struct rq *rq)
>> +{
>> +	struct task_struct *stop = rq->stop;
>> +
>> +	if (stop && stop->state == TASK_RUNNING)
>> +		return stop;
>> +
>> +	return NULL;
>> +}
>> +
>> +static void
>> +enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
>> +{
>> +}
>> +
>> +static void
>> +dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
>> +{
>> +}
>> +
>> +static void yield_task_stop(struct rq *rq)
>> +{
>> +	BUG(); /* the stop task should never yield, its pointless. */
>> +}
>> +
>> +static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
>> +{
>> +}
>> +
>> +static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
>> +{
>> +}
>> +
>> +static void set_curr_task_stop(struct rq *rq)
>> +{
>> +}
>> +
>> +static void switched_to_stop(struct rq *rq, struct task_struct *p,
>> +			     int running)
>> +{
>> +	BUG(); /* its impossible to change to this class */
>> +}
>> +
>> +static void prio_changed_stop(struct rq *rq, struct task_struct *p,
>> +			      int oldprio, int running)
>> +{
>> +	BUG(); /* how!?, what priority? */
>> +}
>> +
>> +static unsigned int
>> +get_rr_interval_stop(struct rq *rq, struct task_struct *task)
>> +{
>> +	return 0;
>> +}
>> +
>> +/*
>> + * Simple, special scheduling class for the per-CPU stop tasks:
>> + */
>> +static const struct sched_class stop_sched_class = {
>> +	.next			= &rt_sched_class,
>> +
>> +	.enqueue_task		= enqueue_task_stop,
>> +	.dequeue_task		= dequeue_task_stop,
>> +	.yield_task		= yield_task_stop,
>> +
>> +	.check_preempt_curr	= check_preempt_curr_stop,
>> +
>> +	.pick_next_task		= pick_next_task_stop,
>> +	.put_prev_task		= put_prev_task_stop,
>> +
>> +#ifdef CONFIG_SMP
>> +	.select_task_rq		= select_task_rq_stop,
>> +#endif
>> +
>> +	.set_curr_task          = set_curr_task_stop,
>> +	.task_tick		= task_tick_stop,
>> +
>> +	.get_rr_interval	= get_rr_interval_stop,
>> +
>> +	.prio_changed		= prio_changed_stop,
>> +	.switched_to		= switched_to_stop,
>> +
>> +	/* no .task_new for stop tasks */
>> +};
>>
>>
>>
> 
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP)
  2010-10-18  3:19             ` BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP) Li Zefan
@ 2010-10-18 10:05               ` Benjamin Herrenschmidt
  2010-10-18 14:25               ` Steven Rostedt
  2010-10-21 10:52               ` [PATCH] powerpc: Fix hcall tracepoint recursion Anton Blanchard
  2 siblings, 0 replies; 9+ messages in thread
From: Benjamin Herrenschmidt @ 2010-10-18 10:05 UTC (permalink / raw)
  To: Li Zefan
  Cc: ltp-list, Peter Zijlstra, linuxppc-dev, LKML, Steven Rostedt,
	Paul Mackerras, Anton Blanchard, Ingo Molnar, subrata

On Mon, 2010-10-18 at 11:19 +0800, Li Zefan wrote:
> Cc: Steven
> Cc: Ingo
> Cc: Peter
> Cc: Anton Blanchard <anton@samba.org>
> Cc: Paul Mackerras <paulus@samba.org>
> 
> For those Cced, More information here:
> 
> http://marc.info/?l=ltp-list&m=128569007015044&w=2
> http://marc.info/?l=ltp-list&m=128696942432669&w=2

Hrm... that's nasty...

Should we have some kind of flag to avoid spin_yield() calling H_CEDE
(or whatever it calls) when tracing an hcall to prevent that ? Anton,
Paulus, any other smart idea ? A TLF_ would do...

Cheers,
Ben.

> 02:37, Subrata Modak wrote:
> > I get a bigger trace with this patch:
> > 
> > Unable to handle kernel paging request for data at address 0x00000000
> > Faulting instruction address: 0xc0000000002133f0
> > cpu 0x2: Vector: 300 (Data Access) at [c0000000d9f8b560]
> >     pc: c0000000002133f0: .trace_clock_global+0xb4/0x2a0
> >     lr: c000000000213458: .trace_clock_global+0x11c/0x2a0
> >     sp: c0000000d9f8b7e0
> >    msr: 800000000200b032
> >    dar: 0
> >  dsisr: 40000000
> >   current = 0xc0000000d9f7d100
> >   paca    = 0xc000000007fc8e00
> >     pid   = 1667, comm = ftrace_stack_tr
> > Unrecoverable FP Unavailable Exception 800 at c0000000016a9540
> > cpu 0x0: Vector: 8Unable to handle0 kernel paging r0 equest for data (at
> > address 0xbffFffffe0175b688
> > PU UnavaFaulting instruciltion address: 0xac0000000001017fcb
> > le) at [c0000000d9f8a6a0]
> > p   pc: c0000000016a9540: etnetre r?  ?f ofro rh ehlepl
> > 
> > 
> >     lr: [c000000000016a9540: key_type_dns_resolver+0x15110/0x365f8
> >     sp: c0000000018804e8
> >    msr: 8000000000001032
> >   current = 0xc0000000d838d100
> >   paca    = 0xc000000007fc8000
> >     pid   = 1668, comm = ftrace_stack_ma
> >  pid   = 1668, cc0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> > [c0000000d9f8b9b0] c000000000223178 .ring_buffer_lock_reserve
> > +0x24c/0x2a4
> > [c0000000d9f8ba40] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> > [c0000000d9f8baf0] c00000000022ec9c .trace_current_buffer_lock_reserve
> > +0x44/0x6c
> > [c0000000d9f8bb80] c000000000011c5c .ftrace_raw_event_hcall_entry
> > +0x7c/0x144
> > [c0000000d9f8bc40] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> > [c0000000d9f8bcd0] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> > [c0000000d9f8bd40] c0000000000749c8 .__spin_yield+0x130/0x15c
> > [c0000000d9f8bdd0] c000000000213458 .trace_clock_global+0x11c/0x2a0
> 
> This is a dead loop:
> 
> trace_hcall_entry() -> trace_clock_global() -> trace_hcall_entry() ..
> 
> And this is a PPC specific bug. Hope some ppc guys will fix it?
> Or we kill trace_clock_global() if no one actually uses it..
> 
> --
> Li Zefan
> 
> > [c0000000d9f8be90] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> > [c0000000d9f8bfa0] c000000000223178 .ring_buffer_lock_reserve
> > +0x24c/0x2a4
> > [c0000000d9f8c030] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> > [c0000000d9f8c0e0] c00000000022ec9c .trace_current_buffer_lock_reserve
> > +0x44/0x6c
> > [c0000000d9f8c170] c000000000011c5c .ftrace_raw_event_hcall_entry
> > +0x7c/0x144
> > [c0000000d9f8c230] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> > [c0000000d9f8c2c0] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> > [c0000000d9f8c330] c0000000000749c8 .__spin_yield+0x130/0x15c
> > [c0000000d9f8c3c0] c000000000213458 .trace_clock_global+0x11c/0x2a0
> > [c0000000d9f8c480] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> > [c0000000d9f8c590] c000000000223178 .ring_buffer_lock_reserve
> > +0x24c/0x2a4
> > [c0000000d9f8c620] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> > [c0000000d9f8c6d0] c00000000022ec9c .trace_current_buffer_lock_reserve
> > +0x44/0x6c
> > [c0000000d9f8c760] c000000000011c5c .ftrace_raw_event_hcall_entry
> > +0x7c/0x144
> > [c0000000d9f8c820] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> > [c0000000d9f8c8b0] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> > [c0000000d9f8c920] c0000000000749c8 .__spin_yield+0x130/0x15c
> > [c0000000d9f8c9b0] c000000000213458 .trace_clock_global+0x11c/0x2a0
> > [c0000000d9f8ca70] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> > [c0000000d9f8cb80] c000000000223178 .ring_buffer_lock_reserve
> > +0x24c/0x2a4
> > [c0000000d9f8cc10] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> > [c0000000d9f8ccc0] c00000000022ec9c .trace_current_buffer_lock_reserve
> > +0x44/0x6c
> > [c0000000d9f8cd50] c000000000011c5c .ftrace_raw_event_hcall_entry
> > +0x7c/0x144
> > [c0000000d9f8ce10] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> > [c0000000d9f8cea0] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> > [c0000000d9f8cf10] c0000000000749c8 .__spin_yield+0x130/0x15c
> > [c0000000d9f8cfa0] c000000000213458 .trace_clock_global+0x11c/0x2a0
> > [c0000000d9f8d060] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> > [c0000000d9f8d170] c000000000223178 .ring_buffer_lock_reserve
> > +0x24c/0x2a4
> > [c0000000d9f8d200] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> > [c0000000d9f8d2b0] c00000000022ec9c .trace_current_buffer_lock_reserve
> > +0x44/0x6c
> > [c0000000d9f8d340] c000000000011c5c .ftrace_raw_event_hcall_entry
> > +0x7c/0x144
> > [c0000000d9f8d400] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> > [c0000000d9f8d490] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> > [c0000000d9f8d500] c0000000000749c8 .__spin_yield+0x130/0x15c
> > [c0000000d9f8d590] c000000000213458 .trace_clock_global+0x11c/0x2a0
> > [c0000000d9f8d650] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> > [c0000000d9f8d760] c000000000223178 .ring_buffer_lock_reserve
> > +0x24c/0x2a4
> > [c0000000d9f8d7f0] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> > [c0000000d9f8d8a0] c00000000022ec9c .trace_current_buffer_lock_reserve
> > +0x44/0x6c
> > [c0000000d9f8d930] c000000000011c5c .ftrace_raw_event_hcall_entry
> > +0x7c/0x144
> > [c0000000d9f8d9f0] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> > [c0000000d9f8da80] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> > [c0000000d9f8daf0] c0000000000749c8 .__spin_yield+0x130/0x15c
> > [c0000000d9f8db80] c000000000213458 .trace_clock_global+0x11c/0x2a0
> > [c0000000d9f8dc40] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> > [c0000000d9f8dd50] c000000000223178 .ring_buffer_lock_reserve
> > +0x24c/0x2a4
> > [c0000000d9f8dde0] c00000000022d6f4 .trace_buffer_lock_reserve+0x58/0xe4
> > [c0000000d9f8de90] c00000000022ec9c .trace_current_buffer_lock_reserve
> > +0x44/0x6c
> > [c0000000d9f8df20] c000000000011c5c .ftrace_raw_event_hcall_entry
> > +0x7c/0x144
> > [c0000000d9f8dfe0] c000000000096624 .__trace_hcall_entry+0xa0/0xec
> > [c0000000d9f8e070] c00000000009786c .plpar_hcall_norets+0x50/0xd0
> > [c0000000d9f8e0e0] c0000000000749c8 .__spin_yield+0x130/0x15c
> > [c0000000d9f8e170] c000000000213458 .trace_clock_global+0x11c/0x2a0
> > [c0000000d9f8e230] c0000000002226b0 .rb_reserve_next_event+0x20c/0x804
> > 
> > Regards--
> > Subrata
> > 
> > On Wed, 2010-10-13 at 19:29 +0800, Li Zefan wrote:
> >> Li Zefan wrote:
> >>> Li Zefan wrote:
> >>>> Subrata Modak wrote:
> >>>>> Li,
> >>>>>
> >>>>> Can you kindly fix the below issues and send me a new patch ?
> >>>>>
> >>>> Sure, I will.
> >>>>
> >>> At a first glance, the kernel bug doesn't seem to come from ftrace,
> >>> at least not directly from it.
> >>>
> >>> Question:
> >>>
> >>> Can you trigger this bug in latest kernel (2.6.36-rc7)? If you
> >>> haven't tried it, could you test it? Thanks!
> >>>
> >> I got a patch from Peter, though he's not sure about the bug yet.
> >> So could you try this?
> >>
> >> Note, I don't know if this patch can be applied to 2.6.35-4..
> >>
> >> ======================
> >>
> >> (From Peter)
> >>
> >> Hrmm, not something I've seen before.. but since the migration thread
> >> and stop_machine are involved, does the below patch which rewrites the
> >> migration-thread/stop_macehine/scheduler interaction cure this?
> >>
> >> This patch is probably .37 fodder, but who knows..
> >>
> >> ---
> >> Subject: sched: Create special class for stop/migrate work
> >> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> >> Date: Wed Sep 22 13:53:15 CEST 2010
> >>
> >> In order to separate the stop/migrate work thread from the SCHED_FIFO
> >> implementation, create a special class for it that is of higher priority
> >> than SCHED_FIFO itself.
> >>
> >> This currently solves a problem where cpu-hotplug consumes so much cpu-time
> >> that the SCHED_FIFO class gets throttled, but has the bandwidth replenishment
> >> timer pending on the now dead cpu.
> >>
> >> It is also required for when we add the planned deadline scheduling class
> >> above SCHED_FIFO, as the stop/migrate thread still needs to transcent those
> >> tasks.
> >>
> >> Tested-by: Heiko Carstens <heiko.carstens@de.ibm.com>
> >> Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
> >> LKML-Reference: <1285165776.2275.1022.camel@laptop>
> >> ---
> >>  kernel/sched.c          |   54 ++++++++++++++++++++----
> >>  kernel/sched_stoptask.c |  108 ++++++++++++++++++++++++++++++++++++++++++++++++
> >>  kernel/stop_machine.c   |    8 ++-
> >>  3 files changed, 158 insertions(+), 12 deletions(-)
> >>
> >> Index: linux-2.6/kernel/sched.c
> >> ===================================================================
> >> --- linux-2.6.orig/kernel/sched.c
> >> +++ linux-2.6/kernel/sched.c
> >> @@ -486,7 +486,7 @@ struct rq {
> >>  	 */
> >>  	unsigned long nr_uninterruptible;
> >>
> >> -	struct task_struct *curr, *idle;
> >> +	struct task_struct *curr, *idle, *stop;
> >>  	unsigned long next_balance;
> >>  	struct mm_struct *prev_mm;
> >>
> >> @@ -1837,7 +1837,7 @@ static inline void __set_task_cpu(struct
> >>
> >>  static const struct sched_class rt_sched_class;
> >>
> >> -#define sched_class_highest (&rt_sched_class)
> >> +#define sched_class_highest (&stop_sched_class)
> >>  #define for_each_class(class) \
> >>     for (class = sched_class_highest; class; class = class->next)
> >>
> >> @@ -1917,10 +1917,41 @@ static void deactivate_task(struct rq *r
> >>  #include "sched_idletask.c"
> >>  #include "sched_fair.c"
> >>  #include "sched_rt.c"
> >> +#include "sched_stoptask.c"
> >>  #ifdef CONFIG_SCHED_DEBUG
> >>  # include "sched_debug.c"
> >>  #endif
> >>
> >> +void sched_set_stop_task(int cpu, struct task_struct *stop)
> >> +{
> >> +	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
> >> +	struct task_struct *old_stop = cpu_rq(cpu)->stop;
> >> +
> >> +	if (stop) {
> >> +		/*
> >> +		 * Make it appear like a SCHED_FIFO task, its something
> >> +		 * userspace knows about and won't get confused about.
> >> +		 *
> >> +		 * Also, it will make PI more or less work without too
> >> +		 * much confusion -- but then, stop work should not
> >> +		 * rely on PI working anyway.
> >> +		 */
> >> +		sched_setscheduler_nocheck(stop, SCHED_FIFO, &param);
> >> +
> >> +		stop->sched_class = &stop_sched_class;
> >> +	}
> >> +
> >> +	cpu_rq(cpu)->stop = stop;
> >> +
> >> +	if (old_stop) {
> >> +		/*
> >> +		 * Reset it back to a normal scheduling class so that
> >> +		 * it can die in pieces.
> >> +		 */
> >> +		old_stop->sched_class = &rt_sched_class;
> >> +	}
> >> +}
> >> +
> >>  /*
> >>   * __normal_prio - return the priority that is based on the static prio
> >>   */
> >> @@ -3720,17 +3751,13 @@ pick_next_task(struct rq *rq)
> >>  			return p;
> >>  	}
> >>
> >> -	class = sched_class_highest;
> >> -	for ( ; ; ) {
> >> +	for_each_class(class) {
> >>  		p = class->pick_next_task(rq);
> >>  		if (p)
> >>  			return p;
> >> -		/*
> >> -		 * Will never be NULL as the idle class always
> >> -		 * returns a non-NULL p:
> >> -		 */
> >> -		class = class->next;
> >>  	}
> >> +
> >> +	BUG(); /* the idle class will always have a runnable task */
> >>  }
> >>
> >>  /*
> >> @@ -4659,6 +4686,15 @@ static int __sched_setscheduler(struct t
> >>  	 */
> >>  	rq = __task_rq_lock(p);
> >>
> >> +	/*
> >> +	 * Changing the policy of the stop threads its a very bad idea
> >> +	 */
> >> +	if (p == rq->stop) {
> >> +		__task_rq_unlock(rq);
> >> +		raw_spin_unlock_irqrestore(&p->pi_lock, flags);
> >> +		return -EINVAL;
> >> +	}
> >> +
> >>  #ifdef CONFIG_RT_GROUP_SCHED
> >>  	if (user) {
> >>  		/*
> >> Index: linux-2.6/kernel/stop_machine.c
> >> ===================================================================
> >> --- linux-2.6.orig/kernel/stop_machine.c
> >> +++ linux-2.6/kernel/stop_machine.c
> >> @@ -287,11 +287,12 @@ static int cpu_stopper_thread(void *data
> >>  	goto repeat;
> >>  }
> >>
> >> +extern void sched_set_stop_task(int cpu, struct task_struct *stop);
> >> +
> >>  /* manage stopper for a cpu, mostly lifted from sched migration thread mgmt */
> >>  static int __cpuinit cpu_stop_cpu_callback(struct notifier_block *nfb,
> >>  					   unsigned long action, void *hcpu)
> >>  {
> >> -	struct sched_param param = { .sched_priority = MAX_RT_PRIO - 1 };
> >>  	unsigned int cpu = (unsigned long)hcpu;
> >>  	struct cpu_stopper *stopper = &per_cpu(cpu_stopper, cpu);
> >>  	struct task_struct *p;
> >> @@ -304,13 +305,13 @@ static int __cpuinit cpu_stop_cpu_callba
> >>  				   cpu);
> >>  		if (IS_ERR(p))
> >>  			return NOTIFY_BAD;
> >> -		sched_setscheduler_nocheck(p, SCHED_FIFO, &param);
> >>  		get_task_struct(p);
> >> +		kthread_bind(p, cpu);
> >> +		sched_set_stop_task(cpu, p);
> >>  		stopper->thread = p;
> >>  		break;
> >>
> >>  	case CPU_ONLINE:
> >> -		kthread_bind(stopper->thread, cpu);
> >>  		/* strictly unnecessary, as first user will wake it */
> >>  		wake_up_process(stopper->thread);
> >>  		/* mark enabled */
> >> @@ -325,6 +326,7 @@ static int __cpuinit cpu_stop_cpu_callba
> >>  	{
> >>  		struct cpu_stop_work *work;
> >>
> >> +		sched_set_stop_task(cpu, NULL);
> >>  		/* kill the stopper */
> >>  		kthread_stop(stopper->thread);
> >>  		/* drain remaining works */
> >> Index: linux-2.6/kernel/sched_stoptask.c
> >> ===================================================================
> >> --- /dev/null
> >> +++ linux-2.6/kernel/sched_stoptask.c
> >> @@ -0,0 +1,108 @@
> >> +/*
> >> + * stop-task scheduling class.
> >> + *
> >> + * The stop task is the highest priority task in the system, it preempts
> >> + * everything and will be preempted by nothing.
> >> + *
> >> + * See kernel/stop_machine.c
> >> + */
> >> +
> >> +#ifdef CONFIG_SMP
> >> +static int
> >> +select_task_rq_stop(struct rq *rq, struct task_struct *p,
> >> +		    int sd_flag, int flags)
> >> +{
> >> +	return task_cpu(p); /* stop tasks as never migrate */
> >> +}
> >> +#endif /* CONFIG_SMP */
> >> +
> >> +static void
> >> +check_preempt_curr_stop(struct rq *rq, struct task_struct *p, int flags)
> >> +{
> >> +	resched_task(rq->curr); /* we preempt everything */
> >> +}
> >> +
> >> +static struct task_struct *pick_next_task_stop(struct rq *rq)
> >> +{
> >> +	struct task_struct *stop = rq->stop;
> >> +
> >> +	if (stop && stop->state == TASK_RUNNING)
> >> +		return stop;
> >> +
> >> +	return NULL;
> >> +}
> >> +
> >> +static void
> >> +enqueue_task_stop(struct rq *rq, struct task_struct *p, int flags)
> >> +{
> >> +}
> >> +
> >> +static void
> >> +dequeue_task_stop(struct rq *rq, struct task_struct *p, int flags)
> >> +{
> >> +}
> >> +
> >> +static void yield_task_stop(struct rq *rq)
> >> +{
> >> +	BUG(); /* the stop task should never yield, its pointless. */
> >> +}
> >> +
> >> +static void put_prev_task_stop(struct rq *rq, struct task_struct *prev)
> >> +{
> >> +}
> >> +
> >> +static void task_tick_stop(struct rq *rq, struct task_struct *curr, int queued)
> >> +{
> >> +}
> >> +
> >> +static void set_curr_task_stop(struct rq *rq)
> >> +{
> >> +}
> >> +
> >> +static void switched_to_stop(struct rq *rq, struct task_struct *p,
> >> +			     int running)
> >> +{
> >> +	BUG(); /* its impossible to change to this class */
> >> +}
> >> +
> >> +static void prio_changed_stop(struct rq *rq, struct task_struct *p,
> >> +			      int oldprio, int running)
> >> +{
> >> +	BUG(); /* how!?, what priority? */
> >> +}
> >> +
> >> +static unsigned int
> >> +get_rr_interval_stop(struct rq *rq, struct task_struct *task)
> >> +{
> >> +	return 0;
> >> +}
> >> +
> >> +/*
> >> + * Simple, special scheduling class for the per-CPU stop tasks:
> >> + */
> >> +static const struct sched_class stop_sched_class = {
> >> +	.next			= &rt_sched_class,
> >> +
> >> +	.enqueue_task		= enqueue_task_stop,
> >> +	.dequeue_task		= dequeue_task_stop,
> >> +	.yield_task		= yield_task_stop,
> >> +
> >> +	.check_preempt_curr	= check_preempt_curr_stop,
> >> +
> >> +	.pick_next_task		= pick_next_task_stop,
> >> +	.put_prev_task		= put_prev_task_stop,
> >> +
> >> +#ifdef CONFIG_SMP
> >> +	.select_task_rq		= select_task_rq_stop,
> >> +#endif
> >> +
> >> +	.set_curr_task          = set_curr_task_stop,
> >> +	.task_tick		= task_tick_stop,
> >> +
> >> +	.get_rr_interval	= get_rr_interval_stop,
> >> +
> >> +	.prio_changed		= prio_changed_stop,
> >> +	.switched_to		= switched_to_stop,
> >> +
> >> +	/* no .task_new for stop tasks */
> >> +};
> >>
> >>
> >>
> > 
> > 
> > 
> _______________________________________________
> Linuxppc-dev mailing list
> Linuxppc-dev@lists.ozlabs.org
> https://lists.ozlabs.org/listinfo/linuxppc-dev

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP)
  2010-10-18  3:19             ` BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP) Li Zefan
  2010-10-18 10:05               ` Benjamin Herrenschmidt
@ 2010-10-18 14:25               ` Steven Rostedt
  2010-10-19  0:49                 ` Li Zefan
  2010-10-21 10:52               ` [PATCH] powerpc: Fix hcall tracepoint recursion Anton Blanchard
  2 siblings, 1 reply; 9+ messages in thread
From: Steven Rostedt @ 2010-10-18 14:25 UTC (permalink / raw)
  To: Li Zefan
  Cc: ltp-list, Peter Zijlstra, linuxppc-dev, LKML, Paul Mackerras,
	Anton Blanchard, Ingo Molnar, subrata

On Mon, 2010-10-18 at 11:19 +0800, Li Zefan wrote:

> This is a dead loop:
> 
> trace_hcall_entry() -> trace_clock_global() -> trace_hcall_entry() ..
> 
> And this is a PPC specific bug. Hope some ppc guys will fix it?
> Or we kill trace_clock_global() if no one actually uses it..

trace_clock_global() is used by many. I use it (and recommend using it)
on boxes where the TSC is horribly out of sync, and the trace needs
synchronization between CPUs.

The trace_hcall_entry and exit has wrappers already. Just add recursion
protection there.

Perhaps something like this:

(Not compiled nor ran)

+static DEFINE_PER_CPU(hcall_trace_disable);
+
 void hcall_tracepoint_regfunc(void)
 {
 	hcall_tracepoint_refcount++;
 }

 void hcall_tracepoint_unregfunc(void)
 {
 	hcall_tracepoint_refcount--;
 }

+int __trace_disable_check(void)
+{
+	if (!hcall_tracepoint_refcount)
+		return 1;
+
+	if (get_cpu_var(hcall_trace_disable)) {
+		put_cpu_var(hcall_trace_disable);
+		return 1;
+	}
+
+	__get_cpu_var(hcall_trace_disable)++;
+
+	return 0;
+}
+
+void __trace_disable_put(void)
+{
+	__get_cpu_var(hcall_trace_disable)--;
+	put_cpu_var(hcall_trace_disable);
+}
+
 void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 {
+	int trace_disable;
+
+	if (__trace_disable_check())
+		return;
+
 	trace_hcall_entry(opcode, args);
+	__trace_disable_put();
 }

 void __trace_hcall_exit(long opcode, unsigned long retval,
 			unsigned long *retbuf)
 {
+	if (__trace_disable_check())
+		return;
+
 	trace_hcall_exit(opcode, retval, retbuf);
+	__trace_disable_put();
 }

-- Steve

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP)
  2010-10-18 14:25               ` Steven Rostedt
@ 2010-10-19  0:49                 ` Li Zefan
  0 siblings, 0 replies; 9+ messages in thread
From: Li Zefan @ 2010-10-19  0:49 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: ltp-list, Peter Zijlstra, linuxppc-dev, LKML, Paul Mackerras,
	Anton Blanchard, Ingo Molnar, subrata

Steven Rostedt wrote:
> On Mon, 2010-10-18 at 11:19 +0800, Li Zefan wrote:
> 
>> This is a dead loop:
>>
>> trace_hcall_entry() -> trace_clock_global() -> trace_hcall_entry() ..
>>
>> And this is a PPC specific bug. Hope some ppc guys will fix it?
>> Or we kill trace_clock_global() if no one actually uses it..
> 
> trace_clock_global() is used by many. I use it (and recommend using it)
> on boxes where the TSC is horribly out of sync, and the trace needs
> synchronization between CPUs.
> 
> The trace_hcall_entry and exit has wrappers already. Just add recursion
> protection there.
> 

Right, I thought of this. But as I have no machine to test, I'll leave
this to others.

> Perhaps something like this:
> 
> (Not compiled nor ran)
> 
> +static DEFINE_PER_CPU(hcall_trace_disable);
> +
>  void hcall_tracepoint_regfunc(void)
>  {
>  	hcall_tracepoint_refcount++;
>  }
> 
>  void hcall_tracepoint_unregfunc(void)
>  {
>  	hcall_tracepoint_refcount--;
>  }
> 
> +int __trace_disable_check(void)
> +{
> +	if (!hcall_tracepoint_refcount)
> +		return 1;
> +
> +	if (get_cpu_var(hcall_trace_disable)) {
> +		put_cpu_var(hcall_trace_disable);
> +		return 1;
> +	}
> +
> +	__get_cpu_var(hcall_trace_disable)++;
> +
> +	return 0;
> +}
> +
> +void __trace_disable_put(void)
> +{
> +	__get_cpu_var(hcall_trace_disable)--;
> +	put_cpu_var(hcall_trace_disable);
> +}
> +
>  void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
>  {
> +	int trace_disable;
> +
> +	if (__trace_disable_check())
> +		return;
> +
>  	trace_hcall_entry(opcode, args);
> +	__trace_disable_put();
>  }
> 
>  void __trace_hcall_exit(long opcode, unsigned long retval,
>  			unsigned long *retbuf)
>  {
> +	if (__trace_disable_check())
> +		return;
> +
>  	trace_hcall_exit(opcode, retval, retbuf);
> +	__trace_disable_put();
>  }
> 
> -- Steve
> 
> 
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* [PATCH] powerpc: Fix hcall tracepoint recursion
  2010-10-18  3:19             ` BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP) Li Zefan
  2010-10-18 10:05               ` Benjamin Herrenschmidt
  2010-10-18 14:25               ` Steven Rostedt
@ 2010-10-21 10:52               ` Anton Blanchard
  2010-10-22  7:22                 ` Li Zefan
  2010-10-22 14:14                 ` Steven Rostedt
  2 siblings, 2 replies; 9+ messages in thread
From: Anton Blanchard @ 2010-10-21 10:52 UTC (permalink / raw)
  To: Li Zefan
  Cc: ltp-list, Peter Zijlstra, linuxppc-dev, LKML, Steven Rostedt,
	Paul Mackerras, Ingo Molnar, subrata


Hi,

> This is a dead loop:
> 
> trace_hcall_entry() -> trace_clock_global() -> trace_hcall_entry() ..
> 
> And this is a PPC specific bug. Hope some ppc guys will fix it?
> Or we kill trace_clock_global() if no one actually uses it..

Nasty! How does the patch below look? I had to disable irqs otherwise
we would sometimes drop valid events (if we take an interrupt anywhere
in the region where depth is elevated, then the entire interrupt will
be blocked from calling hcall tracepoints.

Anton
--

Subject: [PATCH] powerpc: Fix hcall tracepoint recursion

Spinlocks on shared processor partitions use H_YIELD to notify the
hypervisor we are waiting on another virtual CPU. Unfortunately this means
the hcall tracepoints can recurse.

The patch below adds a percpu depth and checks it on both the entry and
exit hcall tracepoints.

Signed-off-by: Anton Blanchard <anton@samba.org>
---

Index: powerpc.git/arch/powerpc/platforms/pseries/lpar.c
===================================================================
--- powerpc.git.orig/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:32:00.980003644 +1100
+++ powerpc.git/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:34:54.942681273 +1100
@@ -701,6 +701,13 @@ EXPORT_SYMBOL(arch_free_page);
 /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
 extern long hcall_tracepoint_refcount;
 
+/* 
+ * Since the tracing code might execute hcalls we need to guard against
+ * recursion. One example of this are spinlocks calling H_YIELD on
+ * shared processor partitions.
+ */
+static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
+
 void hcall_tracepoint_regfunc(void)
 {
 	hcall_tracepoint_refcount++;
@@ -713,12 +720,42 @@ void hcall_tracepoint_unregfunc(void)
 
 void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
 {
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = &__get_cpu_var(hcall_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
 	trace_hcall_entry(opcode, args);
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
 }
 
 void __trace_hcall_exit(long opcode, unsigned long retval,
 			unsigned long *retbuf)
 {
+	unsigned long flags;
+	unsigned int *depth;
+
+	local_irq_save(flags);
+
+	depth = &__get_cpu_var(hcall_trace_depth);
+
+	if (*depth)
+		goto out;
+
+	(*depth)++;
 	trace_hcall_exit(opcode, retval, retbuf);
+	(*depth)--;
+
+out:
+	local_irq_restore(flags);
 }
 #endif

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] powerpc: Fix hcall tracepoint recursion
  2010-10-21 10:52               ` [PATCH] powerpc: Fix hcall tracepoint recursion Anton Blanchard
@ 2010-10-22  7:22                 ` Li Zefan
  2010-10-22  7:25                   ` Subrata Modak
  2010-11-11  7:57                   ` Subrata Modak
  2010-10-22 14:14                 ` Steven Rostedt
  1 sibling, 2 replies; 9+ messages in thread
From: Li Zefan @ 2010-10-22  7:22 UTC (permalink / raw)
  To: Anton Blanchard, subrata
  Cc: ltp-list, Peter Zijlstra, LKML, Steven Rostedt, Paul Mackerras,
	Ingo Molnar, linuxppc-dev

Anton Blanchard wrote:
> Hi,
> 
>> This is a dead loop:
>>
>> trace_hcall_entry() -> trace_clock_global() -> trace_hcall_entry() ..
>>
>> And this is a PPC specific bug. Hope some ppc guys will fix it?
>> Or we kill trace_clock_global() if no one actually uses it..
> 
> Nasty! How does the patch below look? I had to disable irqs otherwise
> we would sometimes drop valid events (if we take an interrupt anywhere
> in the region where depth is elevated, then the entire interrupt will
> be blocked from calling hcall tracepoints.
> 

Thanks!

Subrata, could you test the patch below?

> Anton
> --
> 
> Subject: [PATCH] powerpc: Fix hcall tracepoint recursion
> 
> Spinlocks on shared processor partitions use H_YIELD to notify the
> hypervisor we are waiting on another virtual CPU. Unfortunately this means
> the hcall tracepoints can recurse.
> 
> The patch below adds a percpu depth and checks it on both the entry and
> exit hcall tracepoints.
> 
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
> 
> Index: powerpc.git/arch/powerpc/platforms/pseries/lpar.c
> ===================================================================
> --- powerpc.git.orig/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:32:00.980003644 +1100
> +++ powerpc.git/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:34:54.942681273 +1100
> @@ -701,6 +701,13 @@ EXPORT_SYMBOL(arch_free_page);
>  /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
>  extern long hcall_tracepoint_refcount;
>  
> +/* 
> + * Since the tracing code might execute hcalls we need to guard against
> + * recursion. One example of this are spinlocks calling H_YIELD on
> + * shared processor partitions.
> + */
> +static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
> +
>  void hcall_tracepoint_regfunc(void)
>  {
>  	hcall_tracepoint_refcount++;
> @@ -713,12 +720,42 @@ void hcall_tracepoint_unregfunc(void)
>  
>  void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
>  {
> +	unsigned long flags;
> +	unsigned int *depth;
> +
> +	local_irq_save(flags);
> +
> +	depth = &__get_cpu_var(hcall_trace_depth);
> +
> +	if (*depth)
> +		goto out;
> +
> +	(*depth)++;
>  	trace_hcall_entry(opcode, args);
> +	(*depth)--;
> +
> +out:
> +	local_irq_restore(flags);
>  }
>  
>  void __trace_hcall_exit(long opcode, unsigned long retval,
>  			unsigned long *retbuf)
>  {
> +	unsigned long flags;
> +	unsigned int *depth;
> +
> +	local_irq_save(flags);
> +
> +	depth = &__get_cpu_var(hcall_trace_depth);
> +
> +	if (*depth)
> +		goto out;
> +
> +	(*depth)++;
>  	trace_hcall_exit(opcode, retval, retbuf);
> +	(*depth)--;
> +
> +out:
> +	local_irq_restore(flags);
>  }
>  #endif
> 
> 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] powerpc: Fix hcall tracepoint recursion
  2010-10-22  7:22                 ` Li Zefan
@ 2010-10-22  7:25                   ` Subrata Modak
  2010-11-11  7:57                   ` Subrata Modak
  1 sibling, 0 replies; 9+ messages in thread
From: Subrata Modak @ 2010-10-22  7:25 UTC (permalink / raw)
  To: Li Zefan
  Cc: ltp-list, Peter Zijlstra, LKML, Steven Rostedt, Paul Mackerras,
	Anton Blanchard, Ingo Molnar, linuxppc-dev

On Fri, 2010-10-22 at 15:22 +0800, Li Zefan wrote:
> Anton Blanchard wrote:
> > Hi,
> > 
> >> This is a dead loop:
> >>
> >> trace_hcall_entry() -> trace_clock_global() -> trace_hcall_entry() ..
> >>
> >> And this is a PPC specific bug. Hope some ppc guys will fix it?
> >> Or we kill trace_clock_global() if no one actually uses it..
> > 
> > Nasty! How does the patch below look? I had to disable irqs otherwise
> > we would sometimes drop valid events (if we take an interrupt anywhere
> > in the region where depth is elevated, then the entire interrupt will
> > be blocked from calling hcall tracepoints.
> > 
> 
> Thanks!
> 
> Subrata, could you test the patch below?

Yes, definitely. Givmme some time.

Regards--
Subrata

> 
> > Anton
> > --
> > 
> > Subject: [PATCH] powerpc: Fix hcall tracepoint recursion
> > 
> > Spinlocks on shared processor partitions use H_YIELD to notify the
> > hypervisor we are waiting on another virtual CPU. Unfortunately this means
> > the hcall tracepoints can recurse.
> > 
> > The patch below adds a percpu depth and checks it on both the entry and
> > exit hcall tracepoints.
> > 
> > Signed-off-by: Anton Blanchard <anton@samba.org>
> > ---
> > 
> > Index: powerpc.git/arch/powerpc/platforms/pseries/lpar.c
> > ===================================================================
> > --- powerpc.git.orig/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:32:00.980003644 +1100
> > +++ powerpc.git/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:34:54.942681273 +1100
> > @@ -701,6 +701,13 @@ EXPORT_SYMBOL(arch_free_page);
> >  /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
> >  extern long hcall_tracepoint_refcount;
> >  
> > +/* 
> > + * Since the tracing code might execute hcalls we need to guard against
> > + * recursion. One example of this are spinlocks calling H_YIELD on
> > + * shared processor partitions.
> > + */
> > +static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
> > +
> >  void hcall_tracepoint_regfunc(void)
> >  {
> >  	hcall_tracepoint_refcount++;
> > @@ -713,12 +720,42 @@ void hcall_tracepoint_unregfunc(void)
> >  
> >  void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
> >  {
> > +	unsigned long flags;
> > +	unsigned int *depth;
> > +
> > +	local_irq_save(flags);
> > +
> > +	depth = &__get_cpu_var(hcall_trace_depth);
> > +
> > +	if (*depth)
> > +		goto out;
> > +
> > +	(*depth)++;
> >  	trace_hcall_entry(opcode, args);
> > +	(*depth)--;
> > +
> > +out:
> > +	local_irq_restore(flags);
> >  }
> >  
> >  void __trace_hcall_exit(long opcode, unsigned long retval,
> >  			unsigned long *retbuf)
> >  {
> > +	unsigned long flags;
> > +	unsigned int *depth;
> > +
> > +	local_irq_save(flags);
> > +
> > +	depth = &__get_cpu_var(hcall_trace_depth);
> > +
> > +	if (*depth)
> > +		goto out;
> > +
> > +	(*depth)++;
> >  	trace_hcall_exit(opcode, retval, retbuf);
> > +	(*depth)--;
> > +
> > +out:
> > +	local_irq_restore(flags);
> >  }
> >  #endif
> > 
> > 

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] powerpc: Fix hcall tracepoint recursion
  2010-10-21 10:52               ` [PATCH] powerpc: Fix hcall tracepoint recursion Anton Blanchard
  2010-10-22  7:22                 ` Li Zefan
@ 2010-10-22 14:14                 ` Steven Rostedt
  1 sibling, 0 replies; 9+ messages in thread
From: Steven Rostedt @ 2010-10-22 14:14 UTC (permalink / raw)
  To: Anton Blanchard
  Cc: ltp-list, Peter Zijlstra, linuxppc-dev, Li Zefan, LKML,
	Paul Mackerras, Ingo Molnar, subrata

On Thu, 2010-10-21 at 21:52 +1100, Anton Blanchard wrote:

> Anton
> --
> 
> Subject: [PATCH] powerpc: Fix hcall tracepoint recursion
> 
> Spinlocks on shared processor partitions use H_YIELD to notify the
> hypervisor we are waiting on another virtual CPU. Unfortunately this means
> the hcall tracepoints can recurse.
> 
> The patch below adds a percpu depth and checks it on both the entry and
> exit hcall tracepoints.
> 
> Signed-off-by: Anton Blanchard <anton@samba.org>
> ---
> 
> Index: powerpc.git/arch/powerpc/platforms/pseries/lpar.c
> ===================================================================
> --- powerpc.git.orig/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:32:00.980003644 +1100
> +++ powerpc.git/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:34:54.942681273 +1100
> @@ -701,6 +701,13 @@ EXPORT_SYMBOL(arch_free_page);
>  /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
>  extern long hcall_tracepoint_refcount;
>  
> +/* 
> + * Since the tracing code might execute hcalls we need to guard against
> + * recursion. One example of this are spinlocks calling H_YIELD on
> + * shared processor partitions.
> + */
> +static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
> +
>  void hcall_tracepoint_regfunc(void)
>  {
>  	hcall_tracepoint_refcount++;
> @@ -713,12 +720,42 @@ void hcall_tracepoint_unregfunc(void)
>  
>  void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
>  {
> +	unsigned long flags;
> +	unsigned int *depth;
> +
> +	local_irq_save(flags);
> +
> +	depth = &__get_cpu_var(hcall_trace_depth);
> +
> +	if (*depth)
> +		goto out;
> +
> +	(*depth)++;
>  	trace_hcall_entry(opcode, args);
> +	(*depth)--;
> +
> +out:
> +	local_irq_restore(flags);
>  }
>  
>  void __trace_hcall_exit(long opcode, unsigned long retval,
>  			unsigned long *retbuf)
>  {
> +	unsigned long flags;
> +	unsigned int *depth;
> +
> +	local_irq_save(flags);
> +
> +	depth = &__get_cpu_var(hcall_trace_depth);
> +
> +	if (*depth)
> +		goto out;
> +
> +	(*depth)++;
>  	trace_hcall_exit(opcode, retval, retbuf);
> +	(*depth)--;
> +
> +out:
> +	local_irq_restore(flags);
>  }
>  #endif

That's similar to the example that I wrote, but without the encapsulated
code and with disabling interrupts for the reasons you gave.

Acked-by: Steven Rostedt <rostedt@goodmis.org>

-- Steve

^ permalink raw reply	[flat|nested] 9+ messages in thread

* Re: [PATCH] powerpc: Fix hcall tracepoint recursion
  2010-10-22  7:22                 ` Li Zefan
  2010-10-22  7:25                   ` Subrata Modak
@ 2010-11-11  7:57                   ` Subrata Modak
  1 sibling, 0 replies; 9+ messages in thread
From: Subrata Modak @ 2010-11-11  7:57 UTC (permalink / raw)
  To: Li Zefan
  Cc: ltp-list, Peter Zijlstra, LKML, Steven Rostedt, Paul Mackerras,
	Anton Blanchard, Ingo Molnar, linuxppc-dev

I tested and reported back the results. The patch works fine. Can you
please find out if it has been committed to Linus tree and if yes, then
the commit id please ?

Regards--
Subrata

On Fri, 2010-10-22 at 15:22 +0800, Li Zefan wrote: 
> Anton Blanchard wrote:
> > Hi,
> > 
> >> This is a dead loop:
> >>
> >> trace_hcall_entry() -> trace_clock_global() -> trace_hcall_entry() ..
> >>
> >> And this is a PPC specific bug. Hope some ppc guys will fix it?
> >> Or we kill trace_clock_global() if no one actually uses it..
> > 
> > Nasty! How does the patch below look? I had to disable irqs otherwise
> > we would sometimes drop valid events (if we take an interrupt anywhere
> > in the region where depth is elevated, then the entire interrupt will
> > be blocked from calling hcall tracepoints.
> > 
> 
> Thanks!
> 
> Subrata, could you test the patch below?
> 
> > Anton
> > --
> > 
> > Subject: [PATCH] powerpc: Fix hcall tracepoint recursion
> > 
> > Spinlocks on shared processor partitions use H_YIELD to notify the
> > hypervisor we are waiting on another virtual CPU. Unfortunately this means
> > the hcall tracepoints can recurse.
> > 
> > The patch below adds a percpu depth and checks it on both the entry and
> > exit hcall tracepoints.
> > 
> > Signed-off-by: Anton Blanchard <anton@samba.org>
> > ---
> > 
> > Index: powerpc.git/arch/powerpc/platforms/pseries/lpar.c
> > ===================================================================
> > --- powerpc.git.orig/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:32:00.980003644 +1100
> > +++ powerpc.git/arch/powerpc/platforms/pseries/lpar.c	2010-10-21 17:34:54.942681273 +1100
> > @@ -701,6 +701,13 @@ EXPORT_SYMBOL(arch_free_page);
> >  /* NB: reg/unreg are called while guarded with the tracepoints_mutex */
> >  extern long hcall_tracepoint_refcount;
> >  
> > +/* 
> > + * Since the tracing code might execute hcalls we need to guard against
> > + * recursion. One example of this are spinlocks calling H_YIELD on
> > + * shared processor partitions.
> > + */
> > +static DEFINE_PER_CPU(unsigned int, hcall_trace_depth);
> > +
> >  void hcall_tracepoint_regfunc(void)
> >  {
> >  	hcall_tracepoint_refcount++;
> > @@ -713,12 +720,42 @@ void hcall_tracepoint_unregfunc(void)
> >  
> >  void __trace_hcall_entry(unsigned long opcode, unsigned long *args)
> >  {
> > +	unsigned long flags;
> > +	unsigned int *depth;
> > +
> > +	local_irq_save(flags);
> > +
> > +	depth = &__get_cpu_var(hcall_trace_depth);
> > +
> > +	if (*depth)
> > +		goto out;
> > +
> > +	(*depth)++;
> >  	trace_hcall_entry(opcode, args);
> > +	(*depth)--;
> > +
> > +out:
> > +	local_irq_restore(flags);
> >  }
> >  
> >  void __trace_hcall_exit(long opcode, unsigned long retval,
> >  			unsigned long *retbuf)
> >  {
> > +	unsigned long flags;
> > +	unsigned int *depth;
> > +
> > +	local_irq_save(flags);
> > +
> > +	depth = &__get_cpu_var(hcall_trace_depth);
> > +
> > +	if (*depth)
> > +		goto out;
> > +
> > +	(*depth)++;
> >  	trace_hcall_exit(opcode, retval, retbuf);
> > +	(*depth)--;
> > +
> > +out:
> > +	local_irq_restore(flags);
> >  }
> >  #endif
> > 
> > 

^ permalink raw reply	[flat|nested] 9+ messages in thread

end of thread, other threads:[~2010-11-11  7:57 UTC | newest]

Thread overview: 9+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
     [not found] <4C85A88D.10700@cn.fujitsu.com>
     [not found] ` <1285689961.11429.12.camel@subratamodak.linux.ibm.com>
     [not found]   ` <1286954486.4893.15.camel@subratamodak.linux.ibm.com>
     [not found]     ` <4CB55FE6.6000604@cn.fujitsu.com>
     [not found]       ` <4CB5615C.4070406@cn.fujitsu.com>
     [not found]         ` <4CB59825.7060504@cn.fujitsu.com>
     [not found]           ` <1286995066.4893.17.camel@subratamodak.linux.ibm.com>
2010-10-18  3:19             ` BUG: dead loop in PowerPC hcall tracepoint (Was: [LTP] [PATCH v2] Add ftrace-stress-test to LTP) Li Zefan
2010-10-18 10:05               ` Benjamin Herrenschmidt
2010-10-18 14:25               ` Steven Rostedt
2010-10-19  0:49                 ` Li Zefan
2010-10-21 10:52               ` [PATCH] powerpc: Fix hcall tracepoint recursion Anton Blanchard
2010-10-22  7:22                 ` Li Zefan
2010-10-22  7:25                   ` Subrata Modak
2010-11-11  7:57                   ` Subrata Modak
2010-10-22 14:14                 ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).