* [RFC][PATCH 1/3] ftrace: Make ret_stack usable by other than function graph
2011-07-12 18:51 [RFC][PATCH 0/3] ftrace: entend notrace to notrace called functions Steven Rostedt
@ 2011-07-12 18:51 ` Steven Rostedt
2011-07-12 18:51 ` [RFC][PATCH 2/3] ftrace: Add extend-notrace to not trace sub funcs of notrace funcs Steven Rostedt
2011-07-12 18:51 ` [RFC][PATCH 3/3] ftrace: Add extend-notrace-debug to see what is not traced Steven Rostedt
2 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2011-07-12 18:51 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, Peter Zijlstra,
Frederic Weisbecker
[-- Attachment #1: 0001-ftrace-Make-ret_stack-usable-by-other-than-function-.patch --]
[-- Type: text/plain, Size: 13962 bytes --]
From: Steven Rostedt <srostedt@redhat.com>
Move the ret_stack code out of the CONFIG_FUNCTION_GRAPH_TRACER
macro section to enable it to be usable by other function tracers.
Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
include/linux/ftrace.h | 17 ++--
include/linux/sched.h | 10 +-
kernel/fork.c | 4 +-
kernel/sched.c | 2 +-
kernel/trace/ftrace.c | 278 +++++++++++++++++++++++++++---------------------
kernel/trace/trace.h | 6 +
6 files changed, 181 insertions(+), 136 deletions(-)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 9d88e1c..60e38c0 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -99,6 +99,10 @@ void clear_ftrace_function(void);
extern void ftrace_stub(unsigned long a0, unsigned long a1);
+extern void ftrace_init_task(struct task_struct *t);
+extern void ftrace_exit_task(struct task_struct *t);
+extern void ftrace_init_idle_task(struct task_struct *t, int cpu);
+
#else /* !CONFIG_FUNCTION_TRACER */
/*
* (un)register_ftrace_function must be a macro since the ops parameter
@@ -110,6 +114,11 @@ static inline void clear_ftrace_function(void) { }
static inline void ftrace_kill(void) { }
static inline void ftrace_stop(void) { }
static inline void ftrace_start(void) { }
+
+static inline void ftrace_init_task(struct task_struct *t) { }
+static inline void ftrace_exit_task(struct task_struct *t) { }
+static inline void ftrace_init_idle_task(struct task_struct *t, int cpu) { }
+
#endif /* CONFIG_FUNCTION_TRACER */
#ifdef CONFIG_STACK_TRACER
@@ -443,10 +452,6 @@ extern trace_func_graph_ent_t ftrace_graph_entry;
extern void unregister_ftrace_graph(void);
-extern void ftrace_graph_init_task(struct task_struct *t);
-extern void ftrace_graph_exit_task(struct task_struct *t);
-extern void ftrace_graph_init_idle_task(struct task_struct *t, int cpu);
-
static inline int task_curr_ret_stack(struct task_struct *t)
{
return t->curr_ret_stack;
@@ -467,10 +472,6 @@ static inline void unpause_graph_tracing(void)
#define __irq_entry
#define INIT_FTRACE_GRAPH
-static inline void ftrace_graph_init_task(struct task_struct *t) { }
-static inline void ftrace_graph_exit_task(struct task_struct *t) { }
-static inline void ftrace_graph_init_idle_task(struct task_struct *t, int cpu) { }
-
static inline int register_ftrace_graph(trace_func_graph_ret_t retfunc,
trace_func_graph_ent_t entryfunc)
{
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 496770a..c61b5bd 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1530,10 +1530,6 @@ struct task_struct {
struct list_head *scm_work_list;
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
- /* Index of current stored address in ret_stack */
- int curr_ret_stack;
- /* Stack of return addresses for return function tracing */
- struct ftrace_ret_stack *ret_stack;
/* time stamp for last schedule */
unsigned long long ftrace_timestamp;
/*
@@ -1544,6 +1540,12 @@ struct task_struct {
/* Pause for the tracing */
atomic_t tracing_graph_pause;
#endif
+#ifdef CONFIG_FUNCTION_TRACER
+ /* Index of current stored address in ret_stack */
+ int curr_ret_stack;
+ /* Stack of return addresses for return function tracing */
+ struct ftrace_ret_stack *ret_stack;
+#endif
#ifdef CONFIG_TRACING
/* state flags for use by tracers */
unsigned long trace;
diff --git a/kernel/fork.c b/kernel/fork.c
index 0276c30..8516893e 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -167,7 +167,7 @@ void free_task(struct task_struct *tsk)
account_kernel_stack(tsk->stack, -1);
free_thread_info(tsk->stack);
rt_mutex_debug_task_free(tsk);
- ftrace_graph_exit_task(tsk);
+ ftrace_exit_task(tsk);
free_task_struct(tsk);
}
EXPORT_SYMBOL(free_task);
@@ -1095,7 +1095,7 @@ static struct task_struct *copy_process(unsigned long clone_flags,
if (!p)
goto fork_out;
- ftrace_graph_init_task(p);
+ ftrace_init_task(p);
rt_mutex_init_task(p);
diff --git a/kernel/sched.c b/kernel/sched.c
index 9769c75..6059180 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -5903,7 +5903,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
* The idle tasks have their own, simple scheduling class:
*/
idle->sched_class = &idle_sched_class;
- ftrace_graph_init_idle_task(idle, cpu);
+ ftrace_init_idle_task(idle, cpu);
}
/*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 908038f..20bdbd6 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3895,20 +3895,88 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
return ret;
}
+static int ret_stack_active;
+static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
+static DEFINE_MUTEX(ret_stack_mutex);
+
+static void
+trace_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
+{
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+ atomic_set(&t->tracing_graph_pause, 0);
+ atomic_set(&t->trace_overrun, 0);
+ t->ftrace_timestamp = 0;
+#endif
+ /* make curr_ret_stack visible before we add the ret_stack */
+ smp_wmb();
+ t->ret_stack = ret_stack;
+}
-static int ftrace_graph_active;
-static struct notifier_block ftrace_suspend_notifier;
+/*
+ * Allocate a return stack for the idle task. May be the first
+ * time through, or it may be done by CPU hotplug online.
+ */
+void ftrace_init_idle_task(struct task_struct *t, int cpu)
+{
+ t->curr_ret_stack = -1;
+ /*
+ * The idle task has no parent, it either has its own
+ * stack or no stack at all.
+ */
+ if (t->ret_stack)
+ WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
-int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+ /*
+ * We don't care about races with ret_stack_active
+ * being cleared here. If we allocate one too many, so
+ * be it. The allocated stacks stay for the life of the
+ * task anyway.
+ */
+ if (ret_stack_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = per_cpu(idle_ret_stack, cpu);
+ if (!ret_stack) {
+ ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+ * sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ per_cpu(idle_ret_stack, cpu) = ret_stack;
+ }
+ trace_init_task(t, ret_stack);
+ }
+}
+
+/* Allocate a return stack for newly created task */
+void ftrace_init_task(struct task_struct *t)
{
- return 0;
+ /* Make sure we do not use the parent ret_stack */
+ t->ret_stack = NULL;
+ t->curr_ret_stack = -1;
+
+ if (ret_stack_active) {
+ struct ftrace_ret_stack *ret_stack;
+
+ ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
+ * sizeof(struct ftrace_ret_stack),
+ GFP_KERNEL);
+ if (!ret_stack)
+ return;
+ trace_init_task(t, ret_stack);
+ }
}
-/* The callbacks that hook a function */
-trace_func_graph_ret_t ftrace_graph_return =
- (trace_func_graph_ret_t)ftrace_stub;
-trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
+void ftrace_exit_task(struct task_struct *t)
+{
+ struct ftrace_ret_stack *ret_stack = t->ret_stack;
+
+ t->ret_stack = NULL;
+ /* NULL must become visible to IRQs before we free it: */
+ barrier();
+
+ kfree(ret_stack);
+}
/* Try to assign a return stack array on FTRACE_RETSTACK_ALLOC_SIZE tasks. */
static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
@@ -3939,12 +4007,8 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
}
if (t->ret_stack == NULL) {
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
t->curr_ret_stack = -1;
- /* Make sure the tasks see the -1 first: */
- smp_wmb();
- t->ret_stack = ret_stack_list[start++];
+ trace_init_task(t, ret_stack_list[start++]);
}
} while_each_thread(g, t);
@@ -3956,6 +4020,73 @@ free:
return ret;
}
+/* Allocate a return stack for each task */
+int trace_ret_stack_enable(void)
+{
+ struct ftrace_ret_stack **ret_stack_list;
+ int ret = -EBUSY;
+ int cpu;
+
+ mutex_lock(&ret_stack_mutex);
+
+ if (ret_stack_active)
+ goto out_unlock;
+
+ /* set ret_stack_active, as some functions need it set now */
+ ret_stack_active = 1;
+
+ ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
+ sizeof(struct ftrace_ret_stack *),
+ GFP_KERNEL);
+
+ if (!ret_stack_list) {
+ ret_stack_active = 0;
+ ret = -ENOMEM;
+ goto out_unlock;
+ }
+
+ /* The cpu_boot init_task->ret_stack will never be freed */
+ for_each_online_cpu(cpu) {
+ if (!idle_task(cpu)->ret_stack)
+ ftrace_init_idle_task(idle_task(cpu), cpu);
+ }
+
+ do {
+ ret = alloc_retstack_tasklist(ret_stack_list);
+ } while (ret == -EAGAIN);
+
+ if (ret)
+ ret_stack_active = 0;
+
+ kfree(ret_stack_list);
+ out_unlock:
+ mutex_unlock(&ret_stack_mutex);
+ return ret;
+}
+
+/* Does not free anything. Only makes new tasks not create a ret_stack */
+void trace_ret_stack_disable(void)
+{
+ mutex_lock(&ret_stack_mutex);
+ ret_stack_active = 0;
+ mutex_unlock(&ret_stack_mutex);
+}
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+static int ftrace_graph_active;
+static struct notifier_block ftrace_suspend_notifier;
+
+int ftrace_graph_entry_stub(struct ftrace_graph_ent *trace)
+{
+ return 0;
+}
+
+/* The callbacks that hook a function */
+trace_func_graph_ret_t ftrace_graph_return =
+ (trace_func_graph_ret_t)ftrace_stub;
+trace_func_graph_ent_t ftrace_graph_entry = ftrace_graph_entry_stub;
+
static void
ftrace_graph_probe_sched_switch(void *ignore,
struct task_struct *prev, struct task_struct *next)
@@ -3988,40 +4119,6 @@ ftrace_graph_probe_sched_switch(void *ignore,
next->ret_stack[index].calltime += timestamp;
}
-/* Allocate a return stack for each task */
-static int start_graph_tracing(void)
-{
- struct ftrace_ret_stack **ret_stack_list;
- int ret, cpu;
-
- ret_stack_list = kmalloc(FTRACE_RETSTACK_ALLOC_SIZE *
- sizeof(struct ftrace_ret_stack *),
- GFP_KERNEL);
-
- if (!ret_stack_list)
- return -ENOMEM;
-
- /* The cpu_boot init_task->ret_stack will never be freed */
- for_each_online_cpu(cpu) {
- if (!idle_task(cpu)->ret_stack)
- ftrace_graph_init_idle_task(idle_task(cpu), cpu);
- }
-
- do {
- ret = alloc_retstack_tasklist(ret_stack_list);
- } while (ret == -EAGAIN);
-
- if (!ret) {
- ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
- if (ret)
- pr_info("ftrace_graph: Couldn't activate tracepoint"
- " probe to kernel_sched_switch\n");
- }
-
- kfree(ret_stack_list);
- return ret;
-}
-
/*
* Hibernation protection.
* The state of the current task is too much unstable during
@@ -4060,12 +4157,23 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
register_pm_notifier(&ftrace_suspend_notifier);
ftrace_graph_active++;
- ret = start_graph_tracing();
+ ret = trace_ret_stack_enable();
+
+ if (!ret) {
+ ret = register_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
+ if (ret) {
+ pr_info("ftrace_graph: Couldn't activate tracepoint"
+ " probe to kernel_sched_switch\n");
+ trace_ret_stack_disable();
+ }
+ }
+
if (ret) {
ftrace_graph_active--;
goto out;
}
+
ftrace_graph_return = retfunc;
ftrace_graph_entry = entryfunc;
@@ -4083,6 +4191,7 @@ void unregister_ftrace_graph(void)
if (unlikely(!ftrace_graph_active))
goto out;
+ trace_ret_stack_disable();
ftrace_graph_active--;
ftrace_graph_return = (trace_func_graph_ret_t)ftrace_stub;
ftrace_graph_entry = ftrace_graph_entry_stub;
@@ -4094,79 +4203,6 @@ void unregister_ftrace_graph(void)
mutex_unlock(&ftrace_lock);
}
-static DEFINE_PER_CPU(struct ftrace_ret_stack *, idle_ret_stack);
-
-static void
-graph_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
-{
- atomic_set(&t->tracing_graph_pause, 0);
- atomic_set(&t->trace_overrun, 0);
- t->ftrace_timestamp = 0;
- /* make curr_ret_stack visible before we add the ret_stack */
- smp_wmb();
- t->ret_stack = ret_stack;
-}
-
-/*
- * Allocate a return stack for the idle task. May be the first
- * time through, or it may be done by CPU hotplug online.
- */
-void ftrace_graph_init_idle_task(struct task_struct *t, int cpu)
-{
- t->curr_ret_stack = -1;
- /*
- * The idle task has no parent, it either has its own
- * stack or no stack at all.
- */
- if (t->ret_stack)
- WARN_ON(t->ret_stack != per_cpu(idle_ret_stack, cpu));
-
- if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
-
- ret_stack = per_cpu(idle_ret_stack, cpu);
- if (!ret_stack) {
- ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
- * sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
- if (!ret_stack)
- return;
- per_cpu(idle_ret_stack, cpu) = ret_stack;
- }
- graph_init_task(t, ret_stack);
- }
-}
-
-/* Allocate a return stack for newly created task */
-void ftrace_graph_init_task(struct task_struct *t)
-{
- /* Make sure we do not use the parent ret_stack */
- t->ret_stack = NULL;
- t->curr_ret_stack = -1;
-
- if (ftrace_graph_active) {
- struct ftrace_ret_stack *ret_stack;
-
- ret_stack = kmalloc(FTRACE_RETFUNC_DEPTH
- * sizeof(struct ftrace_ret_stack),
- GFP_KERNEL);
- if (!ret_stack)
- return;
- graph_init_task(t, ret_stack);
- }
-}
-
-void ftrace_graph_exit_task(struct task_struct *t)
-{
- struct ftrace_ret_stack *ret_stack = t->ret_stack;
-
- t->ret_stack = NULL;
- /* NULL must become visible to IRQs before we free it: */
- barrier();
-
- kfree(ret_stack);
-}
-
void ftrace_graph_stop(void)
{
ftrace_stop();
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 229f859..fa439f0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -537,11 +537,17 @@ static inline int ftrace_trace_task(struct task_struct *task)
return test_tsk_trace_trace(task);
}
+
+int trace_ret_stack_enable(void);
+void trace_ret_stack_disable(void);
+
#else
static inline int ftrace_trace_task(struct task_struct *task)
{
return 1;
}
+static inline int trace_ret_stack_enable(void) { return -ENODEV; }
+static inline void trace_ret_stack_disable(void) { }
#endif
/*
--
1.7.5.4
^ permalink raw reply related [flat|nested] 6+ messages in thread* [RFC][PATCH 2/3] ftrace: Add extend-notrace to not trace sub funcs of notrace funcs
2011-07-12 18:51 [RFC][PATCH 0/3] ftrace: entend notrace to notrace called functions Steven Rostedt
2011-07-12 18:51 ` [RFC][PATCH 1/3] ftrace: Make ret_stack usable by other than function graph Steven Rostedt
@ 2011-07-12 18:51 ` Steven Rostedt
2011-07-12 19:45 ` Peter Zijlstra
2011-07-12 18:51 ` [RFC][PATCH 3/3] ftrace: Add extend-notrace-debug to see what is not traced Steven Rostedt
2 siblings, 1 reply; 6+ messages in thread
From: Steven Rostedt @ 2011-07-12 18:51 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, Peter Zijlstra,
Frederic Weisbecker
[-- Attachment #1: 0002-ftrace-Add-extend-notrace-to-not-trace-sub-funcs-of-.patch --]
[-- Type: text/plain, Size: 12964 bytes --]
From: Steven Rostedt <srostedt@redhat.com>
When debugging the kernel with the function tracer, the trace can trace
lots of "noise" functions. That is functions that you do not care about
tracing but end up filling the ring buffer and perhaps even causing you
to lose what you wanted to trace. The set_ftrace_notrace as well as
denoting specific functions in the kernel with 'notrace' is useful to
keep these functions down. But sometimes they call functions that you
do not want to trace when a notrace function calls them. For this the
trace option 'extend-notrace' has been added.
Because the function tracer only records the entering of the function
and function graph tracing may be too slow to trace what is needed,
being able to recognise notrace functions is a complex problem as there's
no good way to know when a function has returned. But to do a "best effort"
in this the following is done.
The extend-notrace option uses the function_graph ret_stack to record a
call chain to each function. As the function tracer callbacks receive the
instruction pointer of both the traced function and the parent that called
that function, we can use the parent to help figure out if the function was
called by a notrace function or not. If the parent function is in the ret_stack
then we know that this function can be traced. If it is not, then it is a
candidate to not be traced.
When the parent is not in the ret_stack, care must be taken to know if the
current function should be ignored or not. Along with the call chain, the
irq context is also recorded. If the last traced function was not in irq
context and the current traced function is, then we trace this function.
If nothing is in the ret_stack than we just trace that as well. But because
we never know when this top function has exited, we have to have other tricks
to find out when we are at the top again.
One is to check the current stack (by referencing the address of a local
variable). If it is less than the current top function's stack, then we declare
this function as the new top pointer.
Or, if it is greater than 64 words of the kernel stack, then it is assumed
to be a sys_call and we start tracing that as the top function as well.
Known issues: Things that are called from assembly are also not traced.
That is because assembly is just like a "notrace" statement. This includes
do_page_fault() as that is called from a trap, as well as preempt_schedule()
which may be called from returning from an interrupt.
Requested-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
include/linux/ftrace.h | 21 +++++-
include/linux/sched.h | 6 ++
kernel/fork.c | 1 -
kernel/trace/ftrace.c | 3 +
kernel/trace/trace.c | 4 +
kernel/trace/trace.h | 4 +
kernel/trace/trace_functions.c | 152 +++++++++++++++++++++++++++++++++++++++-
7 files changed, 186 insertions(+), 5 deletions(-)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 60e38c0..9539e7a 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -406,8 +406,25 @@ typedef int (*trace_func_graph_ent_t)(struct ftrace_graph_ent *); /* entry */
struct ftrace_ret_stack {
unsigned long ret;
unsigned long func;
- unsigned long long calltime;
- unsigned long long subtime;
+ /*
+ * Function graph and function extend notrace
+ * share this structure. The ret and func above are
+ * sensitive to function graph and modifying them could
+ * cause a kernel panic. The calltime and subtime are
+ * just used for reporting. When function extend notrace
+ * is active, function graph can not be active, so these
+ * numbers are ignored anyway. If not, the worse that can
+ * happen is that we get funny numbers in the output trace, but
+ * this should never happen.
+ */
+ union {
+ unsigned long long calltime;
+ unsigned long start_ip;
+ };
+ union {
+ unsigned long long subtime;
+ unsigned long end_ip;
+ };
unsigned long fp;
};
diff --git a/include/linux/sched.h b/include/linux/sched.h
index c61b5bd..7dd914e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1543,6 +1543,12 @@ struct task_struct {
#ifdef CONFIG_FUNCTION_TRACER
/* Index of current stored address in ret_stack */
int curr_ret_stack;
+ /*
+ * Index of current stored address in ret_stack.
+ * Must be separate from function graph curr_ret_stack.
+ */
+ int curr_trace_stack;
+ unsigned long trace_start_stack;
/* Stack of return addresses for return function tracing */
struct ftrace_ret_stack *ret_stack;
#endif
diff --git a/kernel/fork.c b/kernel/fork.c
index 8516893e..6d60ac3 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1199,7 +1199,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
p->curr_chain_key = 0;
p->lockdep_recursion = 0;
#endif
-
#ifdef CONFIG_DEBUG_MUTEXES
p->blocked_on = NULL; /* not blocked yet */
#endif
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 20bdbd6..2cbba7c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -3919,6 +3919,7 @@ trace_init_task(struct task_struct *t, struct ftrace_ret_stack *ret_stack)
void ftrace_init_idle_task(struct task_struct *t, int cpu)
{
t->curr_ret_stack = -1;
+ t->trace_start_stack = 0;
/*
* The idle task has no parent, it either has its own
* stack or no stack at all.
@@ -3954,6 +3955,7 @@ void ftrace_init_task(struct task_struct *t)
/* Make sure we do not use the parent ret_stack */
t->ret_stack = NULL;
t->curr_ret_stack = -1;
+ t->trace_start_stack = 0;
if (ret_stack_active) {
struct ftrace_ret_stack *ret_stack;
@@ -4008,6 +4010,7 @@ static int alloc_retstack_tasklist(struct ftrace_ret_stack **ret_stack_list)
if (t->ret_stack == NULL) {
t->curr_ret_stack = -1;
+ t->trace_start_stack = 0;
trace_init_task(t, ret_stack_list[start++]);
}
} while_each_thread(g, t);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ee9c921..dc8b945 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -424,6 +424,7 @@ static const char *trace_options[] = {
"graph-time",
"record-cmd",
"overwrite",
+ "extend-notrace",
NULL
};
@@ -2540,6 +2541,9 @@ static void set_tracer_flags(unsigned int mask, int enabled)
if (mask == TRACE_ITER_OVERWRITE)
ring_buffer_change_overwrite(global_trace.buffer, enabled);
+
+ if (mask == TRACE_ITER_EXTEND_NOTRACE)
+ trace_extend_notrace(enabled);
}
static ssize_t
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index fa439f0..bbef5a5 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -540,6 +540,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
int trace_ret_stack_enable(void);
void trace_ret_stack_disable(void);
+void trace_extend_notrace(int enabled);
#else
static inline int ftrace_trace_task(struct task_struct *task)
@@ -548,6 +549,7 @@ static inline int ftrace_trace_task(struct task_struct *task)
}
static inline int trace_ret_stack_enable(void) { return -ENODEV; }
static inline void trace_ret_stack_disable(void) { }
+static inline void trace_extend_notrace(int enabled) { }
#endif
/*
@@ -615,6 +617,7 @@ enum trace_iterator_flags {
TRACE_ITER_GRAPH_TIME = 0x80000,
TRACE_ITER_RECORD_CMD = 0x100000,
TRACE_ITER_OVERWRITE = 0x200000,
+ TRACE_ITER_EXTEND_NOTRACE = 0x400000,
};
/*
@@ -800,6 +803,7 @@ extern const char *__stop___trace_bprintk_fmt[];
/* for function tracing recursion */
#define TRACE_INTERNAL_BIT (1<<11)
#define TRACE_GLOBAL_BIT (1<<12)
+#define TRACE_IN_IRQ (1<<13)
#define trace_recursion_set(bit) do { (current)->trace_recursion |= (bit); } while (0)
#define trace_recursion_clear(bit) do { (current)->trace_recursion &= ~(bit); } while (0)
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 8d0e1cc..73cf336 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -47,6 +47,148 @@ static void function_trace_start(struct trace_array *tr)
tracing_reset_online_cpus(tr);
}
+static int notrace_extended;
+
+void trace_extend_notrace(int enable)
+{
+ int ret = 0;
+
+ if (!ftrace_function_enabled)
+ return;
+
+ if (!!enable == notrace_extended)
+ return;
+
+ if (enable)
+ ret = trace_ret_stack_enable();
+ else
+ trace_ret_stack_disable();
+ if (!ret)
+ notrace_extended = !!enable;
+}
+
+/* 64 words depth to reset stack */
+#define STACK_OFFSET (THREAD_SIZE - (64 * BITS_PER_LONG/8))
+
+static inline int skip_notrace(unsigned long ip, unsigned long parent_ip)
+{
+ unsigned long offset;
+ unsigned long size;
+ int irq_stack;
+ int last_stack;
+ int curr_ret;
+ int i;
+
+ /* Optimize for this case to be off */
+ if (likely(!(trace_flags & TRACE_ITER_EXTEND_NOTRACE)))
+ return 0;
+
+ /*
+ * We always trace NMIs, and do not update anything.
+ * This code requires interrupts disabled.
+ */
+ if (in_nmi())
+ return 0;
+
+ if (!current->ret_stack)
+ return 0;
+
+ /*
+ * We must make sure the ret_stack is tested before we read
+ * anything else.
+ */
+ smp_rmb();
+
+ curr_ret = current->curr_trace_stack;
+
+ /* Test the stack if we are in irq or not. */
+ last_stack = !!trace_recursion_test(TRACE_IN_IRQ);
+ irq_stack = !!(!object_is_on_stack(&irq_stack) || in_irq());
+
+ /* Always update the irq info if it changes */
+ if (irq_stack != last_stack) {
+ if (irq_stack)
+ trace_recursion_set(TRACE_IN_IRQ);
+ else
+ trace_recursion_clear(TRACE_IN_IRQ);
+ }
+
+ /* If we just transitioned from normal to irq context then print */
+ if (!last_stack && irq_stack)
+ goto out_trace;
+
+ /* If the ret_stack is empty this is our first function, trace it */
+ if (curr_ret == -1)
+ goto out_trace;
+
+ /* If we just transitioned from irq to normal, find last normal stack */
+ if (last_stack && !irq_stack) {
+ for (i = curr_ret; i >= 0; i--) {
+ if (!(current->ret_stack[i].start_ip & 1))
+ break;
+ }
+ curr_ret = i;
+ if (curr_ret < 0)
+ goto out_trace;
+ }
+
+ /* Check if the parent function is in our saved stack */
+ for (i = curr_ret; i >= 0; i--) {
+ if (parent_ip >= current->ret_stack[i].start_ip &&
+ parent_ip <= current->ret_stack[i].end_ip)
+ break;
+ }
+ /* If found then reset curr_trace_stack to our parent and trace */
+ if (i >= 0) {
+ curr_ret = i;
+ goto out_trace;
+ }
+
+ /* If stack is full, just print everything */
+ if (curr_ret == FTRACE_RETFUNC_DEPTH - 1)
+ return 0;
+
+ /* If this stack is higher than the top stack, trace it */
+ if (object_is_on_stack(&irq_stack) &&
+ ((unsigned long)&irq_stack > current->trace_start_stack ||
+ (void *)&irq_stack > task_stack_page(current) + STACK_OFFSET)) {
+ /* Force recording of new start_stack */
+ curr_ret = -1;
+ goto out_trace;
+ }
+
+ /*
+ * Seems that this function was called by something that was
+ * marked, notrace. We don't want to trace this function either.
+ */
+ current->curr_trace_stack = curr_ret;
+ return 1;
+
+ out_trace:
+ /* If stack is full, just print everything */
+ if (curr_ret == FTRACE_RETFUNC_DEPTH - 1)
+ return 0;
+
+ i = ++curr_ret;
+ current->curr_trace_stack = curr_ret;
+ if (!irq_stack && (!i || !current->trace_start_stack))
+ current->trace_start_stack = (unsigned long)&irq_stack;
+
+ /* Find the actual function pointer */
+ kallsyms_lookup_size_offset(ip, &size, &offset);
+
+ /* Make offset point to the actual function */
+ offset = ip - offset;
+
+ /* We use the LSB of start_ip to denote irq stack or not */
+ offset = (~1UL & offset) | !!irq_stack;
+
+ current->ret_stack[i].start_ip = offset;
+ current->ret_stack[i].end_ip = offset + size;
+
+ return 0;
+}
+
static void
function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
{
@@ -96,7 +238,7 @@ function_trace_call(unsigned long ip, unsigned long parent_ip)
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
- if (likely(disabled == 1)) {
+ if (likely(disabled == 1) && !skip_notrace(ip, parent_ip)) {
pc = preempt_count();
trace_function(tr, ip, parent_ip, flags, pc);
}
@@ -127,7 +269,7 @@ function_stack_trace_call(unsigned long ip, unsigned long parent_ip)
data = tr->data[cpu];
disabled = atomic_inc_return(&data->disabled);
- if (likely(disabled == 1)) {
+ if (likely(disabled == 1) && !skip_notrace(ip, parent_ip)) {
pc = preempt_count();
trace_function(tr, ip, parent_ip, flags, pc);
/*
@@ -189,6 +331,9 @@ static void tracing_start_function_trace(void)
else
register_ftrace_function(&trace_ops);
+ if (trace_flags & TRACE_ITER_EXTEND_NOTRACE)
+ trace_extend_notrace(1);
+
ftrace_function_enabled = 1;
}
@@ -196,6 +341,9 @@ static void tracing_stop_function_trace(void)
{
ftrace_function_enabled = 0;
+ if (trace_flags & TRACE_ITER_EXTEND_NOTRACE)
+ trace_extend_notrace(0);
+
if (func_flags.val & TRACE_FUNC_OPT_STACK)
unregister_ftrace_function(&trace_stack_ops);
else
--
1.7.5.4
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [RFC][PATCH 2/3] ftrace: Add extend-notrace to not trace sub funcs of notrace funcs
2011-07-12 18:51 ` [RFC][PATCH 2/3] ftrace: Add extend-notrace to not trace sub funcs of notrace funcs Steven Rostedt
@ 2011-07-12 19:45 ` Peter Zijlstra
2011-07-12 20:30 ` Steven Rostedt
0 siblings, 1 reply; 6+ messages in thread
From: Peter Zijlstra @ 2011-07-12 19:45 UTC (permalink / raw)
To: Steven Rostedt
Cc: linux-kernel, Ingo Molnar, Andrew Morton, Thomas Gleixner,
Frederic Weisbecker
On Tue, 2011-07-12 at 14:51 -0400, Steven Rostedt wrote:
> Known issues: Things that are called from assembly are also not traced.
We could of course create a gasm CALL macro that adds the mcount hookery
and use that for those functions.
/me runs
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [RFC][PATCH 2/3] ftrace: Add extend-notrace to not trace sub funcs of notrace funcs
2011-07-12 19:45 ` Peter Zijlstra
@ 2011-07-12 20:30 ` Steven Rostedt
0 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2011-07-12 20:30 UTC (permalink / raw)
To: Peter Zijlstra
Cc: linux-kernel, Ingo Molnar, Andrew Morton, Thomas Gleixner,
Frederic Weisbecker
On Tue, 2011-07-12 at 21:45 +0200, Peter Zijlstra wrote:
> On Tue, 2011-07-12 at 14:51 -0400, Steven Rostedt wrote:
> > Known issues: Things that are called from assembly are also not traced.
>
> We could of course create a gasm CALL macro that adds the mcount hookery
> and use that for those functions.
Hmm, that wouldn't be too hard to do. I'll go write up some patches.
>
> /me runs
/me chases
^ permalink raw reply [flat|nested] 6+ messages in thread
* [RFC][PATCH 3/3] ftrace: Add extend-notrace-debug to see what is not traced
2011-07-12 18:51 [RFC][PATCH 0/3] ftrace: entend notrace to notrace called functions Steven Rostedt
2011-07-12 18:51 ` [RFC][PATCH 1/3] ftrace: Make ret_stack usable by other than function graph Steven Rostedt
2011-07-12 18:51 ` [RFC][PATCH 2/3] ftrace: Add extend-notrace to not trace sub funcs of notrace funcs Steven Rostedt
@ 2011-07-12 18:51 ` Steven Rostedt
2 siblings, 0 replies; 6+ messages in thread
From: Steven Rostedt @ 2011-07-12 18:51 UTC (permalink / raw)
To: linux-kernel
Cc: Ingo Molnar, Andrew Morton, Thomas Gleixner, Peter Zijlstra,
Frederic Weisbecker
[-- Attachment #1: 0003-ftrace-Add-extend-notrace-debug-to-see-what-is-not-t.patch --]
[-- Type: text/plain, Size: 3143 bytes --]
From: Steven Rostedt <srostedt@redhat.com>
Enabling extend-notrace causes functions not to be traced if it
is determined that its calling function is not traced. To be able
to debug this, the option extend-notrace-debug is added. It only
takes affect when extend-notrace is enabled.
When extend-notrace-debug is enabled, the functions traced when
extend-notrace is enabled is only the functions that would have
not been traced. In other words, extend-notrace-debug causes extend-notrace
to do the inverse of what it usually does. This allows the user
to see what functions may be missing when they enable extend-notrace.
It does not enable "notrace" functions themselves. It only traces
the functions that would have normally been traced if extend-notrace
was not enabled, but were skipped because of extend-notrace.
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
kernel/trace/trace.c | 1 +
kernel/trace/trace.h | 1 +
kernel/trace/trace_functions.c | 21 ++++++++++++++++-----
3 files changed, 18 insertions(+), 5 deletions(-)
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index dc8b945..3294549 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -425,6 +425,7 @@ static const char *trace_options[] = {
"record-cmd",
"overwrite",
"extend-notrace",
+ "extend-notrace-debug",
NULL
};
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index bbef5a5..739eaf2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -618,6 +618,7 @@ enum trace_iterator_flags {
TRACE_ITER_RECORD_CMD = 0x100000,
TRACE_ITER_OVERWRITE = 0x200000,
TRACE_ITER_EXTEND_NOTRACE = 0x400000,
+ TRACE_ITER_DEBUG_EXTEND_NOTRACE = 0x800000,
};
/*
diff --git a/kernel/trace/trace_functions.c b/kernel/trace/trace_functions.c
index 73cf336..ff6c793 100644
--- a/kernel/trace/trace_functions.c
+++ b/kernel/trace/trace_functions.c
@@ -70,7 +70,7 @@ void trace_extend_notrace(int enable)
/* 64 words depth to reset stack */
#define STACK_OFFSET (THREAD_SIZE - (64 * BITS_PER_LONG/8))
-static inline int skip_notrace(unsigned long ip, unsigned long parent_ip)
+static int __skip_notrace(unsigned long ip, unsigned long parent_ip)
{
unsigned long offset;
unsigned long size;
@@ -79,10 +79,6 @@ static inline int skip_notrace(unsigned long ip, unsigned long parent_ip)
int curr_ret;
int i;
- /* Optimize for this case to be off */
- if (likely(!(trace_flags & TRACE_ITER_EXTEND_NOTRACE)))
- return 0;
-
/*
* We always trace NMIs, and do not update anything.
* This code requires interrupts disabled.
@@ -189,6 +185,21 @@ static inline int skip_notrace(unsigned long ip, unsigned long parent_ip)
return 0;
}
+static inline int skip_notrace(unsigned long ip, unsigned long parent_ip)
+{
+ int ret;
+
+ /* Optimize for this case to be off */
+ if (likely(!(trace_flags & TRACE_ITER_EXTEND_NOTRACE)))
+ return 0;
+
+ ret = __skip_notrace(ip, parent_ip);
+
+ if (trace_flags & TRACE_ITER_DEBUG_EXTEND_NOTRACE)
+ return !ret;
+ return ret;
+}
+
static void
function_trace_call_preempt_only(unsigned long ip, unsigned long parent_ip)
{
--
1.7.5.4
^ permalink raw reply related [flat|nested] 6+ messages in thread