[PATCH 3/3] tracing/function-return-tracer: add the overrun field

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 3/3] tracing/function-return-tracer: add the overrun field
@ 2008-11-17  2:22 Frederic Weisbecker
  2008-11-17  8:49 ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Frederic Weisbecker @ 2008-11-17  2:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel

Impact: help to find the better depth of trace

We decided to arbitrary define the depth of function return trace as "20".
Perhaps this is not enough. To help finding an optimal depth, we measure now
the overrun: the number of functions that have been missed for the current thread.
By default this is not displayed, we have to do set a particular flag on the return
tracer: echo overrun > /debug/tracing/trace_options
And the overrun will be printed on the right.

As the trace shows below, the current 20 depth is not enough.

update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838)
update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838)
do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838)
tick_do_update_jiffies64+0xbf/0x160 -> do_timer (5339 ns) (Overruns: 2838)
tick_sched_timer+0x6a/0xf0 -> tick_do_update_jiffies64 (7209 ns) (Overruns: 2838)
vgacon_set_cursor_size+0x98/0x120 -> native_io_delay (2613 ns) (Overruns: 274)
vgacon_cursor+0x16e/0x1d0 -> vgacon_set_cursor_size (33151 ns) (Overruns: 274)
set_cursor+0x5f/0x80 -> vgacon_cursor (36432 ns) (Overruns: 274)
con_flush_chars+0x34/0x40 -> set_cursor (38790 ns) (Overruns: 274)
release_console_sem+0x1ec/0x230 -> up (721 ns) (Overruns: 274)
release_console_sem+0x225/0x230 -> wake_up_klogd (316 ns) (Overruns: 274)
con_flush_chars+0x39/0x40 -> release_console_sem (2996 ns) (Overruns: 274)
con_write+0x22/0x30 -> con_flush_chars (46067 ns) (Overruns: 274)
n_tty_write+0x1cc/0x360 -> con_write (292670 ns) (Overruns: 274)
smp_apic_timer_interrupt+0x2a/0x90 -> native_apic_mem_write (330 ns) (Overruns: 274)
irq_enter+0x17/0x70 -> idle_cpu (413 ns) (Overruns: 274)
smp_apic_timer_interrupt+0x2f/0x90 -> irq_enter (1525 ns) (Overruns: 274)
ktime_get_ts+0x40/0x70 -> getnstimeofday (465 ns) (Overruns: 274)
ktime_get_ts+0x60/0x70 -> set_normalized_timespec (436 ns) (Overruns: 274)
ktime_get+0x16/0x30 -> ktime_get_ts (2501 ns) (Overruns: 274)
hrtimer_interrupt+0x77/0x1a0 -> ktime_get (3439 ns) (Overruns: 274)

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/x86/include/asm/thread_info.h    |    7 ++++++
 arch/x86/kernel/ftrace.c              |   10 ++++++--
 include/linux/ftrace.h                |    2 +
 include/linux/sched.h                 |    1 +
 kernel/trace/trace.c                  |    1 +
 kernel/trace/trace.h                  |    1 +
 kernel/trace/trace_functions_return.c |   38 +++++++++++++++++++++++++++-----
 7 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h
index a711583..e90e81e 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -21,6 +21,7 @@ struct task_struct;
 struct exec_domain;
 #include <asm/processor.h>
 #include <asm/ftrace.h>
+#include <asm/atomic.h>
 
 struct thread_info {
 	struct task_struct	*task;		/* main task structure */
@@ -45,6 +46,11 @@ struct thread_info {
 	int		curr_ret_stack;
 	/* Stack of return addresses for return function tracing */
 	struct ftrace_ret_stack	ret_stack[FTRACE_RET_STACK_SIZE];
+	/*
+	 * Number of functions that haven't been traced
+	 * because of depth overrun.
+	 */
+	atomic_t	trace_overrun;
 #endif
 };
 
@@ -61,6 +67,7 @@ struct thread_info {
 		.fn = do_no_restart_syscall,	\
 	},					\
 	.curr_ret_stack = -1,\
+	.trace_overrun	= ATOMIC_INIT(0)	\
 }
 #else
 #define INIT_THREAD_INFO(tsk)			\
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 924153e..356bb1e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -353,8 +353,10 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
 	struct thread_info *ti = current_thread_info();
 
 	/* The return trace stack is full */
-	if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1)
+	if (ti->curr_ret_stack == FTRACE_RET_STACK_SIZE - 1) {
+		atomic_inc(&ti->trace_overrun);
 		return -EBUSY;
+	}
 
 	index = ++ti->curr_ret_stack;
 	barrier();
@@ -367,7 +369,7 @@ static int push_return_trace(unsigned long ret, unsigned long long time,
 
 /* Retrieve a function return address to the trace stack on thread info.*/
 static void pop_return_trace(unsigned long *ret, unsigned long long *time,
-				unsigned long *func)
+				unsigned long *func, unsigned long *overrun)
 {
 	int index;
 
@@ -376,6 +378,7 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
 	*ret = ti->ret_stack[index].ret;
 	*func = ti->ret_stack[index].func;
 	*time = ti->ret_stack[index].calltime;
+	*overrun = atomic_read(&ti->trace_overrun);
 	ti->curr_ret_stack--;
 }
 
@@ -386,7 +389,8 @@ static void pop_return_trace(unsigned long *ret, unsigned long long *time,
 unsigned long ftrace_return_to_handler(void)
 {
 	struct ftrace_retfunc trace;
-	pop_return_trace(&trace.ret, &trace.calltime, &trace.func);
+	pop_return_trace(&trace.ret, &trace.calltime, &trace.func,
+			&trace.overrun);
 	trace.rettime = cpu_clock(raw_smp_processor_id());
 	ftrace_function_return(&trace);
 
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f1af1aa..f7ba4ea 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -318,6 +318,8 @@ struct ftrace_retfunc {
 	unsigned long func; /* Current function */
 	unsigned long long calltime;
 	unsigned long long rettime;
+	/* Number of functions that overran the depth limit for current task */
+	unsigned long overrun;
 };
 
 #ifdef CONFIG_FUNCTION_RET_TRACER
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 979da3c..deba0ed 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2013,6 +2013,7 @@ static inline void setup_thread_stack(struct task_struct *p, struct task_struct
 	 * used.
 	 */
 	task_thread_info(p)->curr_ret_stack = -1;
+	atomic_set(&task_thread_info(p)->trace_overrun, 0);
 #endif
 }
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8c4eb80..c13e4ec 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -833,6 +833,7 @@ static void __trace_function_return(struct trace_array *tr,
 	entry->parent_ip	= trace->ret;
 	entry->rettime		= trace->rettime;
 	entry->calltime		= trace->calltime;
+	entry->overrun		= trace->overrun;
 	ring_buffer_unlock_commit(global_trace.buffer, event, irq_flags);
 }
 #endif
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fc06485..0eb20f1 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -60,6 +60,7 @@ struct ftrace_ret_entry {
 	unsigned long		parent_ip;
 	unsigned long long	calltime;
 	unsigned long long	rettime;
+	unsigned long		overrun;
 };
 extern struct tracer boot_tracer;
 
diff --git a/kernel/trace/trace_functions_return.c b/kernel/trace/trace_functions_return.c
index a68564a..e00d645 100644
--- a/kernel/trace/trace_functions_return.c
+++ b/kernel/trace/trace_functions_return.c
@@ -14,6 +14,19 @@
 #include "trace.h"
 
 
+#define TRACE_RETURN_PRINT_OVERRUN	0x1
+static struct tracer_opt trace_opts[] = {
+	/* Display overruns or not */
+	{ TRACER_OPT(overrun, TRACE_RETURN_PRINT_OVERRUN) },
+	{ } /* Empty entry */
+};
+
+static struct tracer_flags tracer_flags = {
+	.val = 0, /* Don't display overruns by default */
+	.opts = trace_opts
+};
+
+
 static int return_trace_init(struct trace_array *tr)
 {
 	int cpu;
@@ -42,26 +55,39 @@ print_return_function(struct trace_iterator *iter)
 		ret = trace_seq_printf(s, "%pF -> ", (void *)field->parent_ip);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
+
 		ret = seq_print_ip_sym(s, field->ip,
 					trace_flags & TRACE_ITER_SYM_MASK);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
-		ret = trace_seq_printf(s, " (%llu ns)\n",
+
+		ret = trace_seq_printf(s, " (%llu ns)",
 					field->rettime - field->calltime);
 		if (!ret)
 			return TRACE_TYPE_PARTIAL_LINE;
-		else
-			return TRACE_TYPE_HANDLED;
+
+		if (tracer_flags.val & TRACE_RETURN_PRINT_OVERRUN) {
+			ret = trace_seq_printf(s, " (Overruns: %lu)",
+						field->overrun);
+			if (!ret)
+				return TRACE_TYPE_PARTIAL_LINE;
+		}
+
+		ret = trace_seq_printf(s, "\n");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+
+		return TRACE_TYPE_HANDLED;
 	}
 	return TRACE_TYPE_UNHANDLED;
 }
 
-static struct tracer return_trace __read_mostly =
-{
+static struct tracer return_trace __read_mostly = {
 	.name	     = "return",
 	.init	     = return_trace_init,
 	.reset	     = return_trace_reset,
-	.print_line = print_return_function
+	.print_line = print_return_function,
+	.flags		= &tracer_flags,
 };
 
 static __init int init_return_trace(void)
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-17  2:22 [PATCH 3/3] tracing/function-return-tracer: add the overrun field Frederic Weisbecker
@ 2008-11-17  8:49 ` Ingo Molnar
  2008-11-17 18:38   ` Frederic Weisbecker
  2008-11-18 14:21   ` Steven Rostedt
  0 siblings, 2 replies; 29+ messages in thread
From: Ingo Molnar @ 2008-11-17  8:49 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Steven Rostedt, Linux Kernel


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> Impact: help to find the better depth of trace
> 
> We decided to arbitrary define the depth of function return trace as 
> "20". Perhaps this is not enough. To help finding an optimal depth, 
> we measure now the overrun: the number of functions that have been 
> missed for the current thread. By default this is not displayed, we 
> have to do set a particular flag on the return tracer: echo overrun 
> > /debug/tracing/trace_options And the overrun will be printed on 
> the right.
> 
> As the trace shows below, the current 20 depth is not enough.
> 
> update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838)
> update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838)
> do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838)

hm, interesting. Have you tried to figure out what a practical depth 
limit would be?

With lockdep we made the experience that function call stacks can be 
very deep - if we count IRQ contexts too it can be up to 100 in the 
extreme cases. (but at that stage kernel stack limits start hitting 
us)

I'd say 50 would be needed.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-17  8:49 ` Ingo Molnar
@ 2008-11-17 18:38   ` Frederic Weisbecker
  2008-11-18  8:47     ` Ingo Molnar
  2008-11-18 14:21   ` Steven Rostedt
  1 sibling, 1 reply; 29+ messages in thread
From: Frederic Weisbecker @ 2008-11-17 18:38 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel

Ingo Molnar a écrit :
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
>> Impact: help to find the better depth of trace
>>
>> We decided to arbitrary define the depth of function return trace as 
>> "20". Perhaps this is not enough. To help finding an optimal depth, 
>> we measure now the overrun: the number of functions that have been 
>> missed for the current thread. By default this is not displayed, we 
>> have to do set a particular flag on the return tracer: echo overrun 
>>> /debug/tracing/trace_options And the overrun will be printed on 
>> the right.
>>
>> As the trace shows below, the current 20 depth is not enough.
>>
>> update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838)
>> update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838)
>> do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838)
> 
> hm, interesting. Have you tried to figure out what a practical depth 
> limit would be?
> 
> With lockdep we made the experience that function call stacks can be 
> very deep - if we count IRQ contexts too it can be up to 100 in the 
> extreme cases. (but at that stage kernel stack limits start hitting 
> us)
> 
> I'd say 50 would be needed.
> 
> 	Ingo


Ok I will try with 50. If there are still a lot and often missing traces with this depth, perhaps
should we consider a hybrid solution between ret stack and trampolines?
We could use the normal ret stack on struct info for most common cases and the trampoline when we are exceeding the depth....


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-17 18:38   ` Frederic Weisbecker
@ 2008-11-18  8:47     ` Ingo Molnar
  2008-11-18 14:23       ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18  8:47 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Steven Rostedt, Linux Kernel


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> Ingo Molnar a écrit :
> > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > 
> >> Impact: help to find the better depth of trace
> >>
> >> We decided to arbitrary define the depth of function return trace as 
> >> "20". Perhaps this is not enough. To help finding an optimal depth, 
> >> we measure now the overrun: the number of functions that have been 
> >> missed for the current thread. By default this is not displayed, we 
> >> have to do set a particular flag on the return tracer: echo overrun 
> >>> /debug/tracing/trace_options And the overrun will be printed on 
> >> the right.
> >>
> >> As the trace shows below, the current 20 depth is not enough.
> >>
> >> update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838)
> >> update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838)
> >> do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838)
> > 
> > hm, interesting. Have you tried to figure out what a practical depth 
> > limit would be?
> > 
> > With lockdep we made the experience that function call stacks can be 
> > very deep - if we count IRQ contexts too it can be up to 100 in the 
> > extreme cases. (but at that stage kernel stack limits start hitting 
> > us)
> > 
> > I'd say 50 would be needed.
> > 
> > 	Ingo
> 
> 
> Ok I will try with 50. If there are still a lot and often missing 
> traces with this depth, perhaps should we consider a hybrid solution 
> between ret stack and trampolines? We could use the normal ret stack 
> on struct info for most common cases and the trampoline when we are 
> exceeding the depth....

dunno, trampolines make me feel uneasy.

Could you set it to some really large value (200) and add a "max depth 
seen" variable perhaps, and see the maximum depth?

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-17  8:49 ` Ingo Molnar
  2008-11-17 18:38   ` Frederic Weisbecker
@ 2008-11-18 14:21   ` Steven Rostedt
  2008-11-18 14:48     ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2008-11-18 14:21 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Frederic Weisbecker, Linux Kernel


On Mon, 17 Nov 2008, Ingo Molnar wrote:

> 
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> 
> > Impact: help to find the better depth of trace
> > 
> > We decided to arbitrary define the depth of function return trace as 
> > "20". Perhaps this is not enough. To help finding an optimal depth, 
> > we measure now the overrun: the number of functions that have been 
> > missed for the current thread. By default this is not displayed, we 
> > have to do set a particular flag on the return tracer: echo overrun 
> > > /debug/tracing/trace_options And the overrun will be printed on 
> > the right.
> > 
> > As the trace shows below, the current 20 depth is not enough.
> > 
> > update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838)
> > update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838)
> > do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838)
> 
> hm, interesting. Have you tried to figure out what a practical depth 
> limit would be?
> 
> With lockdep we made the experience that function call stacks can be 
> very deep - if we count IRQ contexts too it can be up to 100 in the 
> extreme cases. (but at that stage kernel stack limits start hitting 
> us)
> 
> I'd say 50 would be needed.

I was just looking at the stack tracer, and it pretty much gives us the 
answer ;-)  I'm hitting on max traces around 55, but some of those are asm 
calls. We could do 50 or 60?  We probably want to make sure that the two 
do not come close to hitting. That is, the bottom of the stack to 
overwrite the saved return addresses.

-- Steve

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18  8:47     ` Ingo Molnar
@ 2008-11-18 14:23       ` Steven Rostedt
  2008-11-18 14:51         ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2008-11-18 14:23 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Frederic Weisbecker, Linux Kernel


On Tue, 18 Nov 2008, Ingo Molnar wrote:
> > 
> > Ok I will try with 50. If there are still a lot and often missing 
> > traces with this depth, perhaps should we consider a hybrid solution 
> > between ret stack and trampolines? We could use the normal ret stack 
> > on struct info for most common cases and the trampoline when we are 
> > exceeding the depth....
> 
> dunno, trampolines make me feel uneasy.
> 
> Could you set it to some really large value (200) and add a "max depth 
> seen" variable perhaps, and see the maximum depth?

Don't run that on a box you care about ;-)  But hopefully the stacks will 
not collide. This should also depend on IRQSTACKS.

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 14:21   ` Steven Rostedt
@ 2008-11-18 14:48     ` Ingo Molnar
  2008-11-18 14:58       ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18 14:48 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, Linux Kernel


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Mon, 17 Nov 2008, Ingo Molnar wrote:
> 
> > 
> > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> > 
> > > Impact: help to find the better depth of trace
> > > 
> > > We decided to arbitrary define the depth of function return trace as 
> > > "20". Perhaps this is not enough. To help finding an optimal depth, 
> > > we measure now the overrun: the number of functions that have been 
> > > missed for the current thread. By default this is not displayed, we 
> > > have to do set a particular flag on the return tracer: echo overrun 
> > > > /debug/tracing/trace_options And the overrun will be printed on 
> > > the right.
> > > 
> > > As the trace shows below, the current 20 depth is not enough.
> > > 
> > > update_wall_time+0x37f/0x8c0 -> update_xtime_cache (345 ns) (Overruns: 2838)
> > > update_wall_time+0x384/0x8c0 -> clocksource_get_next (1141 ns) (Overruns: 2838)
> > > do_timer+0x23/0x100 -> update_wall_time (3882 ns) (Overruns: 2838)
> > 
> > hm, interesting. Have you tried to figure out what a practical depth 
> > limit would be?
> > 
> > With lockdep we made the experience that function call stacks can be 
> > very deep - if we count IRQ contexts too it can be up to 100 in the 
> > extreme cases. (but at that stage kernel stack limits start hitting 
> > us)
> > 
> > I'd say 50 would be needed.
> 
> I was just looking at the stack tracer, and it pretty much gives us 
> the answer ;-) I'm hitting on max traces around 55, but some of 
> those are asm calls. We could do 50 or 60?  We probably want to make 
> sure that the two do not come close to hitting. That is, the bottom 
> of the stack to overwrite the saved return addresses.

does the stack tracer properly nest across IRQ entry boundaries 
already on x86? We used to have problems in that area.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 14:23       ` Steven Rostedt
@ 2008-11-18 14:51         ` Ingo Molnar
  2008-11-18 15:06           ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18 14:51 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, Linux Kernel


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Tue, 18 Nov 2008, Ingo Molnar wrote:
> > > 
> > > Ok I will try with 50. If there are still a lot and often missing 
> > > traces with this depth, perhaps should we consider a hybrid solution 
> > > between ret stack and trampolines? We could use the normal ret stack 
> > > on struct info for most common cases and the trampoline when we are 
> > > exceeding the depth....
> > 
> > dunno, trampolines make me feel uneasy.
> > 
> > Could you set it to some really large value (200) and add a "max 
> > depth seen" variable perhaps, and see the maximum depth?
> 
> Don't run that on a box you care about ;-) But hopefully the stacks 
> will not collide. This should also depend on IRQSTACKS.

that reminds me: ti->ret_stack[] should be moved to task->ret_stack[]. 
That way we decouple its size from any kernel stack size limits. 
(thread-info resides at one end of the kernel stack, on x86)

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 14:48     ` Ingo Molnar
@ 2008-11-18 14:58       ` Steven Rostedt
  2008-11-18 15:02         ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2008-11-18 14:58 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Frederic Weisbecker, Linux Kernel

On Tue, 18 Nov 2008, Ingo Molnar wrote:
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > I was just looking at the stack tracer, and it pretty much gives us 
> > the answer ;-) I'm hitting on max traces around 55, but some of 
> > those are asm calls. We could do 50 or 60?  We probably want to make 
> > sure that the two do not come close to hitting. That is, the bottom 
> > of the stack to overwrite the saved return addresses.
> 
> does the stack tracer properly nest across IRQ entry boundaries 
> already on x86? We used to have problems in that area.

Actually, because the stack tracer is in generic code, we punt on IRQ 
stacks:

	/* we do not handle interrupt stacks yet */
	if (!object_is_on_stack(&this_size))
		return;

I check if the local variable "this_size" is on the current->stack and if 
it is not then this means that we are using some other stack, and we do 
not record it.

What would be needed is to make a per-arch stack call. Perhaps have a:

   arch_check_stack(&this_size, &max_stack_trace, &max_stack_size);

Where a weak function can be defined to return nothing. But the arch can 
check which stack the "this_size" variable is on and run the stack tracer 
against that stack.

Maybe we should have two stack traces, a stack_trace file and a 
stack_trace_irq ?

Because, some archs, like x86_64 have different size stacks. The thread 
stack is 8K where as the IRQ stack is 4K.  We may want to see which IRQ 
stack call is the worst, and not compare it to the thread stack call.

-- Steve

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 14:58       ` Steven Rostedt
@ 2008-11-18 15:02         ` Ingo Molnar
  2008-11-18 15:11           ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18 15:02 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, Linux Kernel


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Tue, 18 Nov 2008, Ingo Molnar wrote:
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> > > 
> > > I was just looking at the stack tracer, and it pretty much gives us 
> > > the answer ;-) I'm hitting on max traces around 55, but some of 
> > > those are asm calls. We could do 50 or 60?  We probably want to make 
> > > sure that the two do not come close to hitting. That is, the bottom 
> > > of the stack to overwrite the saved return addresses.
> > 
> > does the stack tracer properly nest across IRQ entry boundaries 
> > already on x86? We used to have problems in that area.
> 
> Actually, because the stack tracer is in generic code, we punt on IRQ 
> stacks:
> 
> 	/* we do not handle interrupt stacks yet */
> 	if (!object_is_on_stack(&this_size))
> 		return;
> 
> I check if the local variable "this_size" is on the current->stack 
> and if it is not then this means that we are using some other stack, 
> and we do not record it.
> 
> What would be needed is to make a per-arch stack call. Perhaps have 
> a:
> 
>    arch_check_stack(&this_size, &max_stack_trace, &max_stack_size);
> 
> Where a weak function can be defined to return nothing. But the arch 
> can check which stack the "this_size" variable is on and run the 
> stack tracer against that stack.
> 
> Maybe we should have two stack traces, a stack_trace file and a 
> stack_trace_irq ?
> 
> Because, some archs, like x86_64 have different size stacks. The 
> thread stack is 8K where as the IRQ stack is 4K.  We may want to see 
> which IRQ stack call is the worst, and not compare it to the thread 
> stack call.

... and on 64-bit x86 the IRQ stacks are 16K, and some of the IST 
exception stacks have different sizes as well.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 14:51         ` Ingo Molnar
@ 2008-11-18 15:06           ` Steven Rostedt
  2008-11-18 15:13             ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2008-11-18 15:06 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Frederic Weisbecker, Linux Kernel


On Tue, 18 Nov 2008, Ingo Molnar wrote:

> 
> * Steven Rostedt <rostedt@goodmis.org> wrote:
> 
> > 
> > On Tue, 18 Nov 2008, Ingo Molnar wrote:
> > > > 
> > > > Ok I will try with 50. If there are still a lot and often missing 
> > > > traces with this depth, perhaps should we consider a hybrid solution 
> > > > between ret stack and trampolines? We could use the normal ret stack 
> > > > on struct info for most common cases and the trampoline when we are 
> > > > exceeding the depth....
> > > 
> > > dunno, trampolines make me feel uneasy.
> > > 
> > > Could you set it to some really large value (200) and add a "max 
> > > depth seen" variable perhaps, and see the maximum depth?
> > 
> > Don't run that on a box you care about ;-) But hopefully the stacks 
> > will not collide. This should also depend on IRQSTACKS.
> 
> that reminds me: ti->ret_stack[] should be moved to task->ret_stack[]. 
> That way we decouple its size from any kernel stack size limits. 
> (thread-info resides at one end of the kernel stack, on x86)

Yeah, I recommended that to Frederic to save space. But that can be 
dangerous. Using task instead would be safer with the downside of making 
the task struct even bigger.

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 15:02         ` Ingo Molnar
@ 2008-11-18 15:11           ` Steven Rostedt
  0 siblings, 0 replies; 29+ messages in thread
From: Steven Rostedt @ 2008-11-18 15:11 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Frederic Weisbecker, Linux Kernel


On Tue, 18 Nov 2008, Ingo Molnar wrote:
> > 
> > What would be needed is to make a per-arch stack call. Perhaps have 
> > a:
> > 
> >    arch_check_stack(&this_size, &max_stack_trace, &max_stack_size);
> > 
> > Where a weak function can be defined to return nothing. But the arch 
> > can check which stack the "this_size" variable is on and run the 
> > stack tracer against that stack.
> > 
> > Maybe we should have two stack traces, a stack_trace file and a 
> > stack_trace_irq ?
> > 
> > Because, some archs, like x86_64 have different size stacks. The 
> > thread stack is 8K where as the IRQ stack is 4K.  We may want to see 
> > which IRQ stack call is the worst, and not compare it to the thread 
> > stack call.
> 
> ... and on 64-bit x86 the IRQ stacks are 16K, and some of the IST 
> exception stacks have different sizes as well.

Heh, I expect that we will not support IRQ stacks until 2.6.30 or 31 :-/

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 15:06           ` Steven Rostedt
@ 2008-11-18 15:13             ` Ingo Molnar
  2008-11-18 15:22               ` Steven Rostedt
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18 15:13 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, Linux Kernel


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Tue, 18 Nov 2008, Ingo Molnar wrote:
> 
> > 
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> > 
> > > 
> > > On Tue, 18 Nov 2008, Ingo Molnar wrote:
> > > > > 
> > > > > Ok I will try with 50. If there are still a lot and often missing 
> > > > > traces with this depth, perhaps should we consider a hybrid solution 
> > > > > between ret stack and trampolines? We could use the normal ret stack 
> > > > > on struct info for most common cases and the trampoline when we are 
> > > > > exceeding the depth....
> > > > 
> > > > dunno, trampolines make me feel uneasy.
> > > > 
> > > > Could you set it to some really large value (200) and add a "max 
> > > > depth seen" variable perhaps, and see the maximum depth?
> > > 
> > > Don't run that on a box you care about ;-) But hopefully the stacks 
> > > will not collide. This should also depend on IRQSTACKS.
> > 
> > that reminds me: ti->ret_stack[] should be moved to task->ret_stack[]. 
> > That way we decouple its size from any kernel stack size limits. 
> > (thread-info resides at one end of the kernel stack, on x86)
> 
> Yeah, I recommended that to Frederic to save space. But that can be 
> dangerous. Using task instead would be safer with the downside of 
> making the task struct even bigger.

We almost never put new stuff into thread_info - we have the lockdep 
lock stack in the task structure too, for similar reasons.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 15:13             ` Ingo Molnar
@ 2008-11-18 15:22               ` Steven Rostedt
  2008-11-18 15:50                 ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2008-11-18 15:22 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Frederic Weisbecker, Linux Kernel


On Tue, 18 Nov 2008, Ingo Molnar wrote:
> > > 
> > > that reminds me: ti->ret_stack[] should be moved to task->ret_stack[]. 
> > > That way we decouple its size from any kernel stack size limits. 
> > > (thread-info resides at one end of the kernel stack, on x86)
> > 
> > Yeah, I recommended that to Frederic to save space. But that can be 
> > dangerous. Using task instead would be safer with the downside of 
> > making the task struct even bigger.
> 
> We almost never put new stuff into thread_info - we have the lockdep 
> lock stack in the task structure too, for similar reasons.

Yeah, it was just a recommendation, and perhaps not a good one ;-)

Frederic, it is better if you move the array from the thread info to the 
task struct. It will take up more memory but it is a hell of a lot safer. 
The pro here definitely outways the con.

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 15:22               ` Steven Rostedt
@ 2008-11-18 15:50                 ` Ingo Molnar
  2008-11-18 16:31                   ` Frédéric Weisbecker
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18 15:50 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Frederic Weisbecker, Linux Kernel


* Steven Rostedt <rostedt@goodmis.org> wrote:

> 
> On Tue, 18 Nov 2008, Ingo Molnar wrote:
> > > > 
> > > > that reminds me: ti->ret_stack[] should be moved to task->ret_stack[]. 
> > > > That way we decouple its size from any kernel stack size limits. 
> > > > (thread-info resides at one end of the kernel stack, on x86)
> > > 
> > > Yeah, I recommended that to Frederic to save space. But that can be 
> > > dangerous. Using task instead would be safer with the downside of 
> > > making the task struct even bigger.
> > 
> > We almost never put new stuff into thread_info - we have the 
> > lockdep lock stack in the task structure too, for similar reasons.
> 
> Yeah, it was just a recommendation, and perhaps not a good one ;-)
> 
> Frederic, it is better if you move the array from the thread info to 
> the task struct. It will take up more memory but it is a hell of a 
> lot safer. The pro here definitely outways the con.

if the memory footprint starts mattering we could turn this into a 
single pointer to an array - and add/remove these arrays (from all 
tasks currently running) as the tracer is turned on/off.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 15:50                 ` Ingo Molnar
@ 2008-11-18 16:31                   ` Frédéric Weisbecker
  2008-11-18 16:40                     ` Ingo Molnar
  2008-11-18 16:43                     ` Steven Rostedt
  0 siblings, 2 replies; 29+ messages in thread
From: Frédéric Weisbecker @ 2008-11-18 16:31 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel

2008/11/18 Ingo Molnar <mingo@elte.hu>:
>
> * Steven Rostedt <rostedt@goodmis.org> wrote:
>
>>
>> On Tue, 18 Nov 2008, Ingo Molnar wrote:
>> > > >
>> > > > that reminds me: ti->ret_stack[] should be moved to task->ret_stack[].
>> > > > That way we decouple its size from any kernel stack size limits.
>> > > > (thread-info resides at one end of the kernel stack, on x86)
>> > >
>> > > Yeah, I recommended that to Frederic to save space. But that can be
>> > > dangerous. Using task instead would be safer with the downside of
>> > > making the task struct even bigger.
>> >
>> > We almost never put new stuff into thread_info - we have the
>> > lockdep lock stack in the task structure too, for similar reasons.
>>
>> Yeah, it was just a recommendation, and perhaps not a good one ;-)
>>
>> Frederic, it is better if you move the array from the thread info to
>> the task struct. It will take up more memory but it is a hell of a
>> lot safer. The pro here definitely outways the con.
>
> if the memory footprint starts mattering we could turn this into a
> single pointer to an array - and add/remove these arrays (from all
> tasks currently running) as the tracer is turned on/off.
>
>        Ingo
>

Ok. So what do you suggest once? Do I begin to move the array from
thread info to struct task but by keeping the static
array or should I directly use a dynamic allocation and add/remove dynamically?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 16:31                   ` Frédéric Weisbecker
@ 2008-11-18 16:40                     ` Ingo Molnar
  2008-11-18 16:47                       ` Frédéric Weisbecker
  2008-11-18 16:43                     ` Steven Rostedt
  1 sibling, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18 16:40 UTC (permalink / raw)
  To: Frédéric Weisbecker; +Cc: Steven Rostedt, Linux Kernel


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> 2008/11/18 Ingo Molnar <mingo@elte.hu>:
> >
> > * Steven Rostedt <rostedt@goodmis.org> wrote:
> >
> >>
> >> On Tue, 18 Nov 2008, Ingo Molnar wrote:
> >> > > >
> >> > > > that reminds me: ti->ret_stack[] should be moved to task->ret_stack[].
> >> > > > That way we decouple its size from any kernel stack size limits.
> >> > > > (thread-info resides at one end of the kernel stack, on x86)
> >> > >
> >> > > Yeah, I recommended that to Frederic to save space. But that can be
> >> > > dangerous. Using task instead would be safer with the downside of
> >> > > making the task struct even bigger.
> >> >
> >> > We almost never put new stuff into thread_info - we have the
> >> > lockdep lock stack in the task structure too, for similar reasons.
> >>
> >> Yeah, it was just a recommendation, and perhaps not a good one ;-)
> >>
> >> Frederic, it is better if you move the array from the thread info to
> >> the task struct. It will take up more memory but it is a hell of a
> >> lot safer. The pro here definitely outways the con.
> >
> > if the memory footprint starts mattering we could turn this into a
> > single pointer to an array - and add/remove these arrays (from all
> > tasks currently running) as the tracer is turned on/off.
> >
> >        Ingo
> >
> 
> Ok. So what do you suggest once? Do I begin to move the array from 
> thread info to struct task but by keeping the static array or should 
> I directly use a dynamic allocation and add/remove dynamically?

Would be nice to have the dynamic allocation straight away - the 
tracer looks rather useful but people wont enable it by default for 
sure. This way distros could enable it all by default without worrying 
about the memory overhead.

And with a dynamic array the tracer could even have a tunable 'max 
depth' option [only changeable when the tracer is not active].

So it all looks much nicer to me that way ... if you succeed in coding 
it up that is ;-) Changing all tasks at once is tricky business: you'd 
have to take the tasklist_lock and iterate over every user and kernel 
task - including idle tasks.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 16:31                   ` Frédéric Weisbecker
  2008-11-18 16:40                     ` Ingo Molnar
@ 2008-11-18 16:43                     ` Steven Rostedt
  1 sibling, 0 replies; 29+ messages in thread
From: Steven Rostedt @ 2008-11-18 16:43 UTC (permalink / raw)
  To: Frédéric Weisbecker; +Cc: Ingo Molnar, Linux Kernel


On Tue, 18 Nov 2008, Fr?d?ric Weisbecker wrote:
> >>
> >> Yeah, it was just a recommendation, and perhaps not a good one ;-)
> >>
> >> Frederic, it is better if you move the array from the thread info to
> >> the task struct. It will take up more memory but it is a hell of a
> >> lot safer. The pro here definitely outways the con.
> >
> > if the memory footprint starts mattering we could turn this into a
> > single pointer to an array - and add/remove these arrays (from all
> > tasks currently running) as the tracer is turned on/off.
> >
> >        Ingo
> >
> 
> Ok. So what do you suggest once? Do I begin to move the array from
> thread info to struct task but by keeping the static
> array or should I directly use a dynamic allocation and add/remove dynamically?

I would recommend using a static array in task struct (say 200?) and keep 
the max recorded for later output. This way we can find a better size.

As for the dynamic use of different arrays. Put that towards the end. We 
want to get this working solid first before adding more variables to the 
equation.

-- Steve


^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 16:40                     ` Ingo Molnar
@ 2008-11-18 16:47                       ` Frédéric Weisbecker
  2008-11-18 16:53                         ` Steven Rostedt
  2008-11-18 21:03                         ` Ingo Molnar
  0 siblings, 2 replies; 29+ messages in thread
From: Frédéric Weisbecker @ 2008-11-18 16:47 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel

2008/11/18 Ingo Molnar <mingo@elte.hu>:
> Would be nice to have the dynamic allocation straight away

2008/11/18 Steven Rostedt <rostedt@goodmis.org>:
> I would recommend using a static array

:-)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 16:47                       ` Frédéric Weisbecker
@ 2008-11-18 16:53                         ` Steven Rostedt
  2008-11-18 16:58                           ` Frédéric Weisbecker
  2008-11-18 21:03                         ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: Steven Rostedt @ 2008-11-18 16:53 UTC (permalink / raw)
  To: Frédéric Weisbecker; +Cc: Ingo Molnar, Linux Kernel

On Tue, 18 Nov 2008, Fr?d?ric Weisbecker wrote:

> 2008/11/18 Ingo Molnar <mingo@elte.hu>:
> > Would be nice to have the dynamic allocation straight away
> 
> 2008/11/18 Steven Rostedt <rostedt@goodmis.org>:
> > I would recommend using a static array
> 
> :-)

How about a compromise, start off the patch series getting it working with 
task struct static array, and then finish the series with the dynamic 
array.

This is my development model, because it lets me know where the bugs are 
better. If we find some strange bug, this can help pin point via a bisect 
if the bug is with the general code, or with the use of a dynamic array.

Just my preference ;-)

-- Steve

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 16:53                         ` Steven Rostedt
@ 2008-11-18 16:58                           ` Frédéric Weisbecker
  2008-11-18 17:00                             ` Frédéric Weisbecker
  0 siblings, 1 reply; 29+ messages in thread
From: Frédéric Weisbecker @ 2008-11-18 16:58 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, Linux Kernel

2008/11/18 Steven Rostedt <rostedt@goodmis.org>:
> How about a compromise, start off the patch series getting it working with
> task struct static array, and then finish the series with the dynamic
> array.
>
> This is my development model, because it lets me know where the bugs are
> better. If we find some strange bug, this can help pin point via a bisect
> if the bug is with the general code, or with the use of a dynamic array.
>
> Just my preference ;-)


Ooh. I first agreed with Ingo's arguments about the fact that distros
can enable it whithout worrying.
But as I read your message, I guess that would be better to start with
static arrays to better find the bugs,
state by state...

Ingo, what do you think?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 16:58                           ` Frédéric Weisbecker
@ 2008-11-18 17:00                             ` Frédéric Weisbecker
  2008-11-18 21:01                               ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Frédéric Weisbecker @ 2008-11-18 17:00 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, Linux Kernel

2008/11/18 Frédéric Weisbecker <fweisbec@gmail.com>:
> 2008/11/18 Steven Rostedt <rostedt@goodmis.org>:
>> How about a compromise, start off the patch series getting it working with
>> task struct static array, and then finish the series with the dynamic
>> array.
>>
>> This is my development model, because it lets me know where the bugs are
>> better. If we find some strange bug, this can help pin point via a bisect
>> if the bug is with the general code, or with the use of a dynamic array.
>>
>> Just my preference ;-)
>
>
> Ooh. I first agreed with Ingo's arguments about the fact that distros
> can enable it whithout worrying.
> But as I read your message, I guess that would be better to start with
> static arrays to better find the bugs,
> state by state...
>
> Ingo, what do you think?
>

And then a last state with dynamic arrays...

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 17:00                             ` Frédéric Weisbecker
@ 2008-11-18 21:01                               ` Ingo Molnar
  0 siblings, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18 21:01 UTC (permalink / raw)
  To: Frédéric Weisbecker; +Cc: Steven Rostedt, Linux Kernel


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> 2008/11/18 Frédéric Weisbecker <fweisbec@gmail.com>:
> > 2008/11/18 Steven Rostedt <rostedt@goodmis.org>:
> >> How about a compromise, start off the patch series getting it working with
> >> task struct static array, and then finish the series with the dynamic
> >> array.
> >>
> >> This is my development model, because it lets me know where the bugs are
> >> better. If we find some strange bug, this can help pin point via a bisect
> >> if the bug is with the general code, or with the use of a dynamic array.
> >>
> >> Just my preference ;-)
> >
> >
> > Ooh. I first agreed with Ingo's arguments about the fact that distros
> > can enable it whithout worrying.
> > But as I read your message, I guess that would be better to start with
> > static arrays to better find the bugs,
> > state by state...
> >
> > Ingo, what do you think?
> >
> 
> And then a last state with dynamic arrays...

it's your call really, either way is fine as long as the end result 
works! :-)

Generally it's indeed much easier to do as small steps as possible, 
and to validate every step in practice.

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 16:47                       ` Frédéric Weisbecker
  2008-11-18 16:53                         ` Steven Rostedt
@ 2008-11-18 21:03                         ` Ingo Molnar
  2008-11-19  7:35                           ` Frédéric Weisbecker
  2008-11-21 19:39                           ` Frédéric Weisbecker
  1 sibling, 2 replies; 29+ messages in thread
From: Ingo Molnar @ 2008-11-18 21:03 UTC (permalink / raw)
  To: Frédéric Weisbecker; +Cc: Steven Rostedt, Linux Kernel


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> 2008/11/18 Ingo Molnar <mingo@elte.hu>:
> > Would be nice to have the dynamic allocation straight away
> 
> 2008/11/18 Steven Rostedt <rostedt@goodmis.org>:
> > I would recommend using a static array
> 
> :-)

hey, look at the bright side of it: whichever variant you pick, if it 
goes wrong, you'll always have someone else to blame for that 
unbelievably stupid suggestion ;-)

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 21:03                         ` Ingo Molnar
@ 2008-11-19  7:35                           ` Frédéric Weisbecker
  2008-11-21 19:39                           ` Frédéric Weisbecker
  1 sibling, 0 replies; 29+ messages in thread
From: Frédéric Weisbecker @ 2008-11-19  7:35 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel

2008/11/18 Ingo Molnar <mingo@elte.hu>:
> it's your call really, either way is fine as long as the end result
> works! :-)
>
> Generally it's indeed much easier to do as small steps as possible,
> and to validate every step in practice.
>
>        Ingo
>


That's right, I will start with a static array first and end with
dynamic allocation.
I'm not sure if it will be ready for merge window of 2.6.29 but that
doesn't really matter.


> hey, look at the bright side of it: whichever variant you pick, if it
> goes wrong, you'll always have someone else to blame for that
> unbelievably stupid suggestion ;-)
>
>        Ingo
>

Hehe! Indeed :-)

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-18 21:03                         ` Ingo Molnar
  2008-11-19  7:35                           ` Frédéric Weisbecker
@ 2008-11-21 19:39                           ` Frédéric Weisbecker
  2008-11-21 19:48                             ` Ingo Molnar
  1 sibling, 1 reply; 29+ messages in thread
From: Frédéric Weisbecker @ 2008-11-21 19:39 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel

2008/11/18 Ingo Molnar <mingo@elte.hu>:

> hey, look at the bright side of it: whichever variant you pick, if it
> goes wrong, you'll always have someone else to blame for that
> unbelievably stupid suggestion ;-)

Actually, since the current implementation of function-return-tracer
works well with static arrays of return stack,
I guess the moving of this array to struct task would not be a real
problem. I think I should directly start with
the dynamic arrays.
But I'm facing a problem with the allocation of these arrays.

When the tracer will be launched, I will hold the tasklist_lock to
allocate/insert the dynamic arrays.
So in this atomic context, I will not be able to call kmalloc with
GFP_KERNEL. And I fear that using GFP_ATOMIC
for possible hundreds of tasks would be clearly unacceptable.

What do you think of this way:

_tracer activates
_a function enters the tracer entry-hooker. If the array is allocated
for the current task, that's well. If not I launch a kernel thread
that will later allocate an array for the current task (I will pass
the pid as a parameter). So the current task will be soon be traced.
_ when a process forks, I can allocate a dynamic array for the new
task without problem (I hope).

So some tasks will not be traced at the early beggining of tracing but
they will soon all be traced....
There is perhaps a problem with tasks that are sleeping for long
times... There will be some losses once they will be awaken...

What do you think?

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-21 19:39                           ` Frédéric Weisbecker
@ 2008-11-21 19:48                             ` Ingo Molnar
  2008-11-21 20:07                               ` Frédéric Weisbecker
  0 siblings, 1 reply; 29+ messages in thread
From: Ingo Molnar @ 2008-11-21 19:48 UTC (permalink / raw)
  To: Frédéric Weisbecker; +Cc: Steven Rostedt, Linux Kernel


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> When the tracer will be launched, I will hold the tasklist_lock to 
> allocate/insert the dynamic arrays. So in this atomic context, I 
> will not be able to call kmalloc with GFP_KERNEL. And I fear that 
> using GFP_ATOMIC for possible hundreds of tasks would be clearly 
> unacceptable.
> 
> What do you think of this way:
> 
> _tracer activates
> _a function enters the tracer entry-hooker. If the array is allocated
> for the current task, that's well. If not I launch a kernel thread
> that will later allocate an array for the current task (I will pass
> the pid as a parameter). So the current task will be soon be traced.
> _ when a process forks, I can allocate a dynamic array for the new
> task without problem (I hope).
> 
> So some tasks will not be traced at the early beggining of tracing 
> but they will soon all be traced.... There is perhaps a problem with 
> tasks that are sleeping for long times... There will be some losses 
> once they will be awaken...

i'd suggest a different approach that is simpler:

- step0: set flag that "all newly created tasks need the array 
  allocated from now on".

- step1: allocate N arrays outside tasklist_lock

- step2: take tasklist_lock, loop over all tasks that exist and pass 
  in the N arrays to all tasks that still need it.

  If tasks were 'refilled', drop tasklist_lock and go back to step 1.

- step3: free N (superfluously allocated) arrays

Make N something like 32 to not get into a bad quadratic nr_tasks 
double loop in practice. (Possibly allocate arrays[32] dynamically as 
well at step0 and not have it on the kernel stack - so 32 can be 
changed to 128 or so.)

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-21 19:48                             ` Ingo Molnar
@ 2008-11-21 20:07                               ` Frédéric Weisbecker
  2008-11-23 13:18                                 ` Ingo Molnar
  0 siblings, 1 reply; 29+ messages in thread
From: Frédéric Weisbecker @ 2008-11-21 20:07 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel

2008/11/21 Ingo Molnar <mingo@elte.hu>:
>
> * Frédéric Weisbecker <fweisbec@gmail.com> wrote:
>
>> When the tracer will be launched, I will hold the tasklist_lock to
>> allocate/insert the dynamic arrays. So in this atomic context, I
>> will not be able to call kmalloc with GFP_KERNEL. And I fear that
>> using GFP_ATOMIC for possible hundreds of tasks would be clearly
>> unacceptable.
>>
>> What do you think of this way:
>>
>> _tracer activates
>> _a function enters the tracer entry-hooker. If the array is allocated
>> for the current task, that's well. If not I launch a kernel thread
>> that will later allocate an array for the current task (I will pass
>> the pid as a parameter). So the current task will be soon be traced.
>> _ when a process forks, I can allocate a dynamic array for the new
>> task without problem (I hope).
>>
>> So some tasks will not be traced at the early beggining of tracing
>> but they will soon all be traced.... There is perhaps a problem with
>> tasks that are sleeping for long times... There will be some losses
>> once they will be awaken...
>
> i'd suggest a different approach that is simpler:
>
> - step0: set flag that "all newly created tasks need the array
>  allocated from now on".
>
> - step1: allocate N arrays outside tasklist_lock
>
> - step2: take tasklist_lock, loop over all tasks that exist and pass
>  in the N arrays to all tasks that still need it.
>
>  If tasks were 'refilled', drop tasklist_lock and go back to step 1.
>
> - step3: free N (superfluously allocated) arrays
>
> Make N something like 32 to not get into a bad quadratic nr_tasks
> double loop in practice. (Possibly allocate arrays[32] dynamically as
> well at step0 and not have it on the kernel stack - so 32 can be
> changed to 128 or so.)
>
>        Ingo
>

Ok. I thought about this method but wondered about the fact that
kmalloc can schedule and then I could run in an infinite loop (or a
too long one).
I will try this. Thanks.

^ permalink raw reply	[flat|nested] 29+ messages in thread

* Re: [PATCH 3/3] tracing/function-return-tracer: add the overrun field
  2008-11-21 20:07                               ` Frédéric Weisbecker
@ 2008-11-23 13:18                                 ` Ingo Molnar
  0 siblings, 0 replies; 29+ messages in thread
From: Ingo Molnar @ 2008-11-23 13:18 UTC (permalink / raw)
  To: Frédéric Weisbecker; +Cc: Steven Rostedt, Linux Kernel


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> 2008/11/21 Ingo Molnar <mingo@elte.hu>:
> >
> > * Frédéric Weisbecker <fweisbec@gmail.com> wrote:
> >
> >> When the tracer will be launched, I will hold the tasklist_lock to
> >> allocate/insert the dynamic arrays. So in this atomic context, I
> >> will not be able to call kmalloc with GFP_KERNEL. And I fear that
> >> using GFP_ATOMIC for possible hundreds of tasks would be clearly
> >> unacceptable.
> >>
> >> What do you think of this way:
> >>
> >> _tracer activates
> >> _a function enters the tracer entry-hooker. If the array is allocated
> >> for the current task, that's well. If not I launch a kernel thread
> >> that will later allocate an array for the current task (I will pass
> >> the pid as a parameter). So the current task will be soon be traced.
> >> _ when a process forks, I can allocate a dynamic array for the new
> >> task without problem (I hope).
> >>
> >> So some tasks will not be traced at the early beggining of tracing
> >> but they will soon all be traced.... There is perhaps a problem with
> >> tasks that are sleeping for long times... There will be some losses
> >> once they will be awaken...
> >
> > i'd suggest a different approach that is simpler:
> >
> > - step0: set flag that "all newly created tasks need the array
> >  allocated from now on".
> >
> > - step1: allocate N arrays outside tasklist_lock
> >
> > - step2: take tasklist_lock, loop over all tasks that exist and pass
> >  in the N arrays to all tasks that still need it.
> >
> >  If tasks were 'refilled', drop tasklist_lock and go back to step 1.
> >
> > - step3: free N (superfluously allocated) arrays
> >
> > Make N something like 32 to not get into a bad quadratic nr_tasks
> > double loop in practice. (Possibly allocate arrays[32] dynamically as
> > well at step0 and not have it on the kernel stack - so 32 can be
> > changed to 128 or so.)
> >
> >        Ingo
> >
> 
> Ok. I thought about this method but wondered about the fact that 
> kmalloc can schedule and then I could run in an infinite loop (or a 
> too long one).

the retry loop should solve that aspect - and the chunking solves the 
"dont run too long with a lock held" problem.

> I will try this. Thanks.

looks good, applied :)

	Ingo

^ permalink raw reply	[flat|nested] 29+ messages in thread

end of thread, other threads:[~2008-11-23 13:19 UTC | newest]

Thread overview: 29+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-17  2:22 [PATCH 3/3] tracing/function-return-tracer: add the overrun field Frederic Weisbecker
2008-11-17  8:49 ` Ingo Molnar
2008-11-17 18:38   ` Frederic Weisbecker
2008-11-18  8:47     ` Ingo Molnar
2008-11-18 14:23       ` Steven Rostedt
2008-11-18 14:51         ` Ingo Molnar
2008-11-18 15:06           ` Steven Rostedt
2008-11-18 15:13             ` Ingo Molnar
2008-11-18 15:22               ` Steven Rostedt
2008-11-18 15:50                 ` Ingo Molnar
2008-11-18 16:31                   ` Frédéric Weisbecker
2008-11-18 16:40                     ` Ingo Molnar
2008-11-18 16:47                       ` Frédéric Weisbecker
2008-11-18 16:53                         ` Steven Rostedt
2008-11-18 16:58                           ` Frédéric Weisbecker
2008-11-18 17:00                             ` Frédéric Weisbecker
2008-11-18 21:01                               ` Ingo Molnar
2008-11-18 21:03                         ` Ingo Molnar
2008-11-19  7:35                           ` Frédéric Weisbecker
2008-11-21 19:39                           ` Frédéric Weisbecker
2008-11-21 19:48                             ` Ingo Molnar
2008-11-21 20:07                               ` Frédéric Weisbecker
2008-11-23 13:18                                 ` Ingo Molnar
2008-11-18 16:43                     ` Steven Rostedt
2008-11-18 14:21   ` Steven Rostedt
2008-11-18 14:48     ` Ingo Molnar
2008-11-18 14:58       ` Steven Rostedt
2008-11-18 15:02         ` Ingo Molnar
2008-11-18 15:11           ` Steven Rostedt

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox