* [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features
@ 2009-01-23 1:04 Frederic Weisbecker
2009-01-23 10:19 ` Ingo Molnar
0 siblings, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2009-01-23 1:04 UTC (permalink / raw)
To: Steven Rostedt
Cc: Ingo Molnar, Linux Kernel Mailing List, Frederic Weisbecker
This patch brings various bugfixes:
_ Drop the first irrelevant task switch on the very beginning of a trace.
_ Drop the OVERHEAD word from the headers, the DURATION word is sufficient and will not
overlap other columns.
_ Make the headers fit well their respective columns whatever the selected options.
Ie, default options:
# tracer: function_graph
#
# CPU DURATION FUNCTION CALLS
# | | | | | | |
1) 0.646 us | }
1) | mem_cgroup_del_lru_list() {
1) 0.624 us | lookup_page_cgroup();
1) 1.970 us | }
echo funcgraph-proc > trace_options
# tracer: function_graph
#
# CPU TASK/PID DURATION FUNCTION CALLS
# | | | | | | | | |
0) bash-2937 | 0.895 us | }
0) bash-2937 | 0.888 us | __rcu_read_unlock();
0) bash-2937 | 0.864 us | conv_uni_to_pc();
0) bash-2937 | 1.015 us | __rcu_read_lock();
echo nofuncgraph-cpu > trace_options
echo nofuncgraph-proc > trace_options
# tracer: function_graph
#
# DURATION FUNCTION CALLS
# | | | | | |
3.752 us | native_pud_val();
0.616 us | native_pud_val();
0.624 us | native_pmd_val();
About features, one can now disable the duration (this will hide the overhead too
for convenient reasons and because on doesn't need overhead if it hasn't the duration
echo nofuncgraph-duration > trace_options
# tracer: function_graph
#
# FUNCTION CALLS
# | | | |
cap_vm_enough_memory() {
__vm_enough_memory() {
vm_acct_memory();
}
}
}
And at last, an option to print the absolute time:
//Restart from default options
echo funcgraph-abstime > trace_options
# tracer: function_graph
#
# TIME CPU DURATION FUNCTION CALLS
# | | | | | | | |
261.339774 | 1) + 42.823 us | }
261.339775 | 1) 1.045 us | _spin_lock_irq();
261.339777 | 1) 0.940 us | _spin_lock_irqsave();
261.339778 | 1) 0.752 us | _spin_unlock_irqrestore();
261.339780 | 1) 0.857 us | _spin_unlock_irq();
261.339782 | 1) | flush_to_ldisc() {
261.339783 | 1) | tty_ldisc_ref() {
261.339783 | 1) | tty_ldisc_try() {
261.339784 | 1) 1.075 us | _spin_lock_irqsave();
261.339786 | 1) 0.842 us | _spin_unlock_irqrestore();
261.339788 | 1) 4.211 us | }
261.339788 | 1) 5.662 us | }
The format is seconds.usecs
I guess no one needs the nanosec precision here, the main goal is to find when happened
the events on a cpu when the trace switches from one cpu to another.
ie:
274.874760 | 1) 0.676 us | _spin_unlock();
274.874762 | 1) 0.609 us | native_load_sp0();
274.874763 | 1) 0.602 us | native_load_tls();
274.878739 | 0) 0.722 us | }
274.878740 | 0) 0.714 us | native_pmd_val();
274.878741 | 0) 0.730 us | native_pmd_val();
Here there is a 4000 usecs difference when we switch the cpu.
Changes in V2:
_ Completely fix the first pointless task switch.
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
kernel/trace/trace_functions_graph.c | 298 ++++++++++++++++++++++------------
1 files changed, 195 insertions(+), 103 deletions(-)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 3c54598..66fc7b8 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1,7 +1,7 @@
/*
*
* Function graph tracer.
- * Copyright (c) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ * Copyright (c) 2008-2009 Frederic Weisbecker <fweisbec@gmail.com>
* Mostly borrowed from function tracer which
* is Copyright (c) Steven Rostedt <srostedt@redhat.com>
*
@@ -21,9 +21,11 @@
#define TRACE_GRAPH_PRINT_CPU 0x2
#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
#define TRACE_GRAPH_PRINT_PROC 0x8
+#define TRACE_GRAPH_PRINT_DURATION 0x10
+#define TRACE_GRAPH_PRINT_ABS_TIME 0X20
static struct tracer_opt trace_opts[] = {
- /* Display overruns ? */
+ /* Display overruns? (for self-debug purpose) */
{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
/* Display CPU ? */
{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
@@ -31,17 +33,22 @@ static struct tracer_opt trace_opts[] = {
{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
/* Display proc name/pid */
{ TRACER_OPT(funcgraph-proc, TRACE_GRAPH_PRINT_PROC) },
+ /* Display duration of execution */
+ { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) },
+ /* Display absolute time of an entry */
+ { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) },
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
/* Don't display overruns and proc by default */
- .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
+ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD |
+ TRACE_GRAPH_PRINT_DURATION,
.opts = trace_opts
};
/* pid on the last trace processed */
-static pid_t last_pid[NR_CPUS] = { [0 ... NR_CPUS-1] = -1 };
+
static int graph_trace_init(struct trace_array *tr)
{
@@ -154,17 +161,25 @@ print_graph_proc(struct trace_seq *s, pid_t pid)
/* If the pid changed since the last trace, output this event */
static enum print_line_t
-verif_pid(struct trace_seq *s, pid_t pid, int cpu)
+verif_pid(struct trace_seq *s, pid_t pid, int cpu, pid_t *last_pids_cpu)
{
pid_t prev_pid;
+ pid_t *last_pid;
int ret;
- if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
+ if (!last_pids_cpu)
+ return TRACE_TYPE_HANDLED;
+
+ last_pid = per_cpu_ptr(last_pids_cpu, cpu);
+
+ if (*last_pid == pid)
return TRACE_TYPE_HANDLED;
- prev_pid = last_pid[cpu];
- last_pid[cpu] = pid;
+ prev_pid = *last_pid;
+ *last_pid = pid;
+ if (prev_pid == -1)
+ return TRACE_TYPE_HANDLED;
/*
* Context-switch trace line:
@@ -232,9 +247,34 @@ trace_branch_is_leaf(struct trace_iterator *iter,
return true;
}
+/* Signal a overhead of time execution to the output */
+static int
+print_graph_overhead(unsigned long long duration, struct trace_seq *s)
+{
+ /* If duration disappear, we don't need anything */
+ if (!(tracer_flags.val & TRACE_GRAPH_PRINT_DURATION))
+ return 1;
+
+ /* Non nested entry or return */
+ if (duration == -1)
+ return trace_seq_printf(s, " ");
+
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ /* Duration exceeded 100 msecs */
+ if (duration > 100000ULL)
+ return trace_seq_printf(s, "! ");
+
+ /* Duration exceeded 10 msecs */
+ if (duration > 10000ULL)
+ return trace_seq_printf(s, "+ ");
+ }
+
+ return trace_seq_printf(s, " ");
+}
+
static enum print_line_t
print_graph_irq(struct trace_seq *s, unsigned long addr,
- enum trace_type type, int cpu, pid_t pid)
+ enum trace_type type, int cpu, pid_t pid)
{
int ret;
@@ -242,35 +282,40 @@ print_graph_irq(struct trace_seq *s, unsigned long addr,
addr >= (unsigned long)__irqentry_text_end)
return TRACE_TYPE_UNHANDLED;
- if (type == TRACE_GRAPH_ENT) {
- ret = trace_seq_printf(s, "==========> | ");
- } else {
- /* Cpu */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
- ret = print_graph_cpu(s, cpu);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
- }
- /* Proc */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
- ret = print_graph_proc(s, pid);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ /* Proc */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) {
+ ret = print_graph_proc(s, pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ ret = trace_seq_printf(s, " | ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ /* No overhead */
+ ret = print_graph_overhead(-1, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
- /* No overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = trace_seq_printf(s, " ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ if (type == TRACE_GRAPH_ENT)
+ ret = trace_seq_printf(s, "==========>");
+ else
+ ret = trace_seq_printf(s, "<==========");
+
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Don't close the duration column if haven't one */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
+ trace_seq_printf(s, " |");
+ ret = trace_seq_printf(s, "\n");
- ret = trace_seq_printf(s, "<========== |\n");
- }
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
return TRACE_TYPE_HANDLED;
@@ -289,7 +334,7 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
sprintf(msecs_str, "%lu", (unsigned long) duration);
/* Print msecs */
- ret = trace_seq_printf(s, msecs_str);
+ ret = trace_seq_printf(s, "%s", msecs_str);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -322,19 +367,15 @@ print_graph_duration(unsigned long long duration, struct trace_seq *s)
}
-/* Signal a overhead of time execution to the output */
-static int
-print_graph_overhead(unsigned long long duration, struct trace_seq *s)
+static int print_graph_abs_time(u64 t, struct trace_seq *s)
{
- /* Duration exceeded 100 msecs */
- if (duration > 100000ULL)
- return trace_seq_printf(s, "! ");
+ unsigned long usecs_rem;
- /* Duration exceeded 10 msecs */
- if (duration > 10000ULL)
- return trace_seq_printf(s, "+ ");
+ usecs_rem = do_div(t, 1000000000);
+ usecs_rem /= 1000;
- return trace_seq_printf(s, " ");
+ return trace_seq_printf(s, "%5lu.%06lu | ",
+ (unsigned long)t, usecs_rem);
}
/* Case of a leaf function on its call entry */
@@ -357,16 +398,16 @@ print_graph_entry_leaf(struct trace_iterator *iter,
duration = graph_ret->rettime - graph_ret->calltime;
/* Overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = print_graph_overhead(duration, s);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
/* Duration */
- ret = print_graph_duration(duration, s);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
+ ret = print_graph_duration(duration, s);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -395,25 +436,17 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
struct ftrace_graph_ent *call = &entry->graph_ent;
/* No overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = trace_seq_printf(s, " ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ ret = print_graph_overhead(-1, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
- /* Interrupt */
- ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, pid);
- if (ret == TRACE_TYPE_UNHANDLED) {
- /* No time */
+ /* No time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
ret = trace_seq_printf(s, " | ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- } else {
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
}
-
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
ret = trace_seq_printf(s, " ");
@@ -434,15 +467,30 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
static enum print_line_t
print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
- struct trace_iterator *iter, int cpu)
+ struct trace_iterator *iter)
{
int ret;
+ int cpu = iter->cpu;
+ pid_t *last_entry = iter->private;
struct trace_entry *ent = iter->ent;
+ struct ftrace_graph_ent *call = &field->graph_ent;
/* Pid */
- if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
+ if (verif_pid(s, ent->pid, cpu, last_entry) == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* Interrupt */
+ ret = print_graph_irq(s, call->func, TRACE_GRAPH_ENT, cpu, ent->pid);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
+ /* Absolute time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
+ ret = print_graph_abs_time(iter->ts, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
/* Cpu */
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
ret = print_graph_cpu(s, cpu);
@@ -470,16 +518,25 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
static enum print_line_t
print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
- struct trace_entry *ent, int cpu)
+ struct trace_entry *ent, struct trace_iterator *iter)
{
int i;
int ret;
+ int cpu = iter->cpu;
+ pid_t *last_pid = iter->private;
unsigned long long duration = trace->rettime - trace->calltime;
/* Pid */
- if (verif_pid(s, ent->pid, cpu) == TRACE_TYPE_PARTIAL_LINE)
+ if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
+ /* Absolute time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
+ ret = print_graph_abs_time(iter->ts, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
/* Cpu */
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
ret = print_graph_cpu(s, cpu);
@@ -499,16 +556,16 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
}
/* Overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = print_graph_overhead(duration, s);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
- }
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
/* Duration */
- ret = print_graph_duration(duration, s);
- if (ret == TRACE_TYPE_PARTIAL_LINE)
- return TRACE_TYPE_PARTIAL_LINE;
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
+ ret = print_graph_duration(duration, s);
+ if (ret == TRACE_TYPE_PARTIAL_LINE)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Closing brace */
for (i = 0; i < trace->depth * TRACE_GRAPH_INDENT; i++) {
@@ -542,14 +599,23 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
{
int i;
int ret;
+ int cpu = iter->cpu;
+ pid_t *last_pid = iter->private;
+
+ /* Absolute time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME) {
+ ret = print_graph_abs_time(iter->ts, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Pid */
- if (verif_pid(s, ent->pid, iter->cpu) == TRACE_TYPE_PARTIAL_LINE)
+ if (verif_pid(s, ent->pid, cpu, last_pid) == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
/* Cpu */
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
- ret = print_graph_cpu(s, iter->cpu);
+ ret = print_graph_cpu(s, cpu);
if (ret == TRACE_TYPE_PARTIAL_LINE)
return TRACE_TYPE_PARTIAL_LINE;
}
@@ -566,17 +632,17 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s,
}
/* No overhead */
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- ret = trace_seq_printf(s, " ");
+ ret = print_graph_overhead(-1, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+
+ /* No time */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION) {
+ ret = trace_seq_printf(s, " | ");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
}
- /* No time */
- ret = trace_seq_printf(s, " | ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
/* Indentation */
if (trace->depth > 0)
for (i = 0; i < (trace->depth + 1) * TRACE_GRAPH_INDENT; i++) {
@@ -614,13 +680,12 @@ print_graph_function(struct trace_iterator *iter)
case TRACE_GRAPH_ENT: {
struct ftrace_graph_ent_entry *field;
trace_assign_type(field, entry);
- return print_graph_entry(field, s, iter,
- iter->cpu);
+ return print_graph_entry(field, s, iter);
}
case TRACE_GRAPH_RET: {
struct ftrace_graph_ret_entry *field;
trace_assign_type(field, entry);
- return print_graph_return(&field->ret, s, entry, iter->cpu);
+ return print_graph_return(&field->ret, s, entry, iter);
}
case TRACE_PRINT: {
struct print_entry *field;
@@ -636,28 +701,55 @@ static void print_graph_headers(struct seq_file *s)
{
/* 1st line */
seq_printf(s, "# ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
+ seq_printf(s, " TIME ");
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
- seq_printf(s, "CPU ");
+ seq_printf(s, "CPU");
if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
- seq_printf(s, "TASK/PID ");
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD)
- seq_printf(s, "OVERHEAD/");
- seq_printf(s, "DURATION FUNCTION CALLS\n");
+ seq_printf(s, " TASK/PID ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
+ seq_printf(s, " DURATION ");
+ seq_printf(s, " FUNCTION CALLS\n");
/* 2nd line */
seq_printf(s, "# ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_ABS_TIME)
+ seq_printf(s, " | ");
if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU)
- seq_printf(s, "| ");
+ seq_printf(s, "| ");
if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC)
- seq_printf(s, "| | ");
- if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
- seq_printf(s, "| ");
- seq_printf(s, "| | | | |\n");
- } else
- seq_printf(s, " | | | | |\n");
+ seq_printf(s, " | | ");
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_DURATION)
+ seq_printf(s, " | | ");
+ seq_printf(s, " | | | |\n");
}
+
+static void graph_trace_open(struct trace_iterator *iter)
+{
+ /* pid on the last trace processed */
+ pid_t *last_pid = alloc_percpu(pid_t);
+ int cpu;
+
+ if (!last_pid)
+ pr_warning("function graph tracer: not enough memory\n");
+ else
+ for_each_possible_cpu(cpu) {
+ pid_t *pid = per_cpu_ptr(last_pid, cpu);
+ *pid = -1;
+ }
+
+ iter->private = last_pid;
+}
+
+static void graph_trace_close(struct trace_iterator *iter)
+{
+ percpu_free(iter->private);
+}
+
static struct tracer graph_trace __read_mostly = {
.name = "function_graph",
+ .open = graph_trace_open,
+ .close = graph_trace_close,
.init = graph_trace_init,
.reset = graph_trace_reset,
.print_line = print_graph_function,
--
1.6.0.4
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features
2009-01-23 1:04 [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features Frederic Weisbecker
@ 2009-01-23 10:19 ` Ingo Molnar
2009-01-23 10:36 ` Frédéric Weisbecker
0 siblings, 1 reply; 7+ messages in thread
From: Ingo Molnar @ 2009-01-23 10:19 UTC (permalink / raw)
To: Frederic Weisbecker; +Cc: Steven Rostedt, Linux Kernel Mailing List
* Frederic Weisbecker <fweisbec@gmail.com> wrote:
>
> This patch brings various bugfixes:
>
> _ Drop the first irrelevant task switch on the very beginning of a trace.
> _ Drop the OVERHEAD word from the headers, the DURATION word is sufficient and will not
> overlap other columns.
> _ Make the headers fit well their respective columns whatever the selected options.
> About features, one can now disable the duration (this will hide the
> overhead too for convenient reasons and because on doesn't need overhead
> if it hasn't the duration
very nice! I've applied this to tip/tracing/function-graph-tracer, thanks
Frederic!
this:
> I guess no one needs the nanosec precision here, the main goal is to
> find when happened the events on a cpu when the trace switches from one
> cpu to another.
>
> ie:
>
> 274.874760 | 1) 0.676 us | _spin_unlock();
> 274.874762 | 1) 0.609 us | native_load_sp0();
> 274.874763 | 1) 0.602 us | native_load_tls();
> 274.878739 | 0) 0.722 us | }
> 274.878740 | 0) 0.714 us | native_pmd_val();
> 274.878741 | 0) 0.730 us | native_pmd_val();
>
> Here there is a 4000 usecs difference when we switch the cpu.
Still needs a solution - if we do cross-CPU traces we want to have a
global trace clock with 'seemless' transition between CPUs.
Ingo
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features
2009-01-23 10:19 ` Ingo Molnar
@ 2009-01-23 10:36 ` Frédéric Weisbecker
2009-01-23 11:00 ` Ingo Molnar
0 siblings, 1 reply; 7+ messages in thread
From: Frédéric Weisbecker @ 2009-01-23 10:36 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel Mailing List
2009/1/23 Ingo Molnar <mingo@elte.hu>:
>
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
>
>>
>> This patch brings various bugfixes:
>>
>> _ Drop the first irrelevant task switch on the very beginning of a trace.
>> _ Drop the OVERHEAD word from the headers, the DURATION word is sufficient and will not
>> overlap other columns.
>> _ Make the headers fit well their respective columns whatever the selected options.
>
>> About features, one can now disable the duration (this will hide the
>> overhead too for convenient reasons and because on doesn't need overhead
>> if it hasn't the duration
>
> very nice! I've applied this to tip/tracing/function-graph-tracer, thanks
> Frederic!
>
> this:
>
>> I guess no one needs the nanosec precision here, the main goal is to
>> find when happened the events on a cpu when the trace switches from one
>> cpu to another.
>>
>> ie:
>>
>> 274.874760 | 1) 0.676 us | _spin_unlock();
>> 274.874762 | 1) 0.609 us | native_load_sp0();
>> 274.874763 | 1) 0.602 us | native_load_tls();
>> 274.878739 | 0) 0.722 us | }
>> 274.878740 | 0) 0.714 us | native_pmd_val();
>> 274.878741 | 0) 0.730 us | native_pmd_val();
>>
>> Here there is a 4000 usecs difference when we switch the cpu.
>
> Still needs a solution - if we do cross-CPU traces we want to have a
> global trace clock with 'seemless' transition between CPUs.
>
> Ingo
>
So it doesn't only need a monotonic clock. It needs a global
consistent clock like ktime for example?
Unfortunately this one uses seq_locks and would add some drawbacks
like verifying if the traced
function doesn't hold the write seq_lock and it will bring some more
ftrace recursion...
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features
2009-01-23 10:36 ` Frédéric Weisbecker
@ 2009-01-23 11:00 ` Ingo Molnar
2009-01-23 14:53 ` Steven Rostedt
2009-01-24 16:02 ` Frederic Weisbecker
0 siblings, 2 replies; 7+ messages in thread
From: Ingo Molnar @ 2009-01-23 11:00 UTC (permalink / raw)
To: Frédéric Weisbecker; +Cc: Steven Rostedt, Linux Kernel Mailing List
* Frédéric Weisbecker <fweisbec@gmail.com> wrote:
> > Still needs a solution - if we do cross-CPU traces we want to have a
> > global trace clock with 'seemless' transition between CPUs.
>
> So it doesn't only need a monotonic clock. It needs a global consistent
> clock like ktime for example? Unfortunately this one uses seq_locks and
> would add some drawbacks like verifying if the traced function doesn't
> hold the write seq_lock and it will bring some more ftrace recursion...
using ktime_get() is indeed out of question - GTOD callpaths are too
complex (and also too slow).
I'd not change anything in the current logic, but i was thinking of a new
trace_option, which can be set optionally. If that trace option is set
then this bit of ring_buffer_time_stamp():
time = sched_clock() << DEBUG_SHIFT;
gets turned into:
time = cpu_clock(cpu) << DEBUG_SHIFT;
This way we default to sched_clock(), but also gain some 'global'
properties if the trace_option is set.
Furthermore, another trace_option could introduce a third 'strongly
ordered' trace-clock variant, which would use cmpxchg and per cpu
timestamps, something like this:
atomic64_t curr_time;
DEFINE_PER_CPU(u64, prev_cpu_time);
...
retry:
prev_cpu_time = per_cpu(prev_cpu_time, cpu);
cpu_time = sched_clock();
old_time = atomic64_read(&curr_time);
delta = cpu_time - prev_cpu_time;
if (unlikely((s64)delta <= 0))
delta = 1;
new_time = old_time + delta;
if (atomic64_cmpxchg(&curr_time, old_time, new_time) != new_time)
goto repeat;
time = new_time << DEBUG_SHIFT;
This would be a monotonic, global clock wrapped around sched_clock(). It
uses a cmpxchg to achieve it, but we have to use global ordering anyway.
It would still be _much_ faster than any GTOD clocksource we have.
Hm?
Ingo
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features
2009-01-23 11:00 ` Ingo Molnar
@ 2009-01-23 14:53 ` Steven Rostedt
2009-01-24 16:02 ` Frederic Weisbecker
1 sibling, 0 replies; 7+ messages in thread
From: Steven Rostedt @ 2009-01-23 14:53 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Frédéric Weisbecker, Linux Kernel Mailing List
On Fri, 23 Jan 2009, Ingo Molnar wrote:
>
> * Fr?d?ric Weisbecker <fweisbec@gmail.com> wrote:
>
> > > Still needs a solution - if we do cross-CPU traces we want to have a
> > > global trace clock with 'seemless' transition between CPUs.
> >
> > So it doesn't only need a monotonic clock. It needs a global consistent
> > clock like ktime for example? Unfortunately this one uses seq_locks and
> > would add some drawbacks like verifying if the traced function doesn't
> > hold the write seq_lock and it will bring some more ftrace recursion...
>
> using ktime_get() is indeed out of question - GTOD callpaths are too
> complex (and also too slow).
>
> I'd not change anything in the current logic, but i was thinking of a new
> trace_option, which can be set optionally. If that trace option is set
> then this bit of ring_buffer_time_stamp():
>
> time = sched_clock() << DEBUG_SHIFT;
>
> gets turned into:
>
> time = cpu_clock(cpu) << DEBUG_SHIFT;
>
> This way we default to sched_clock(), but also gain some 'global'
> properties if the trace_option is set.
I would like to keep the ring buffer agnostic to ftrace, or any other
user. What we could do is add an interface to the ring buffer that will
allow the time keeping interface to be set by the buffer.
typedef u64 (*ring_buffer_time_stamp_func)(int cpu);
typedef void(*ring_buffer_normalize_func)(int cpu, u64 *ts);
void ring_buffer_add_time_source(ring_buffer_time_stamp_func *tsfunc,
ring_buffer_normalize_func *norfunc);
Then the ftrace tracer could pass in its own function to keep time.
This way we could also make several different tracers that use different
timesources to test them out and compare, without needing to reboot.
-- Steve
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features
2009-01-23 11:00 ` Ingo Molnar
2009-01-23 14:53 ` Steven Rostedt
@ 2009-01-24 16:02 ` Frederic Weisbecker
2009-01-26 14:55 ` Ingo Molnar
1 sibling, 1 reply; 7+ messages in thread
From: Frederic Weisbecker @ 2009-01-24 16:02 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel Mailing List
On Fri, Jan 23, 2009 at 12:00:37PM +0100, Ingo Molnar wrote:
>
> * Frédéric Weisbecker <fweisbec@gmail.com> wrote:
>
> > > Still needs a solution - if we do cross-CPU traces we want to have a
> > > global trace clock with 'seemless' transition between CPUs.
> >
> > So it doesn't only need a monotonic clock. It needs a global consistent
> > clock like ktime for example? Unfortunately this one uses seq_locks and
> > would add some drawbacks like verifying if the traced function doesn't
> > hold the write seq_lock and it will bring some more ftrace recursion...
>
> using ktime_get() is indeed out of question - GTOD callpaths are too
> complex (and also too slow).
>
> I'd not change anything in the current logic, but i was thinking of a new
> trace_option, which can be set optionally. If that trace option is set
> then this bit of ring_buffer_time_stamp():
>
> time = sched_clock() << DEBUG_SHIFT;
>
> gets turned into:
>
> time = cpu_clock(cpu) << DEBUG_SHIFT;
>
> This way we default to sched_clock(), but also gain some 'global'
> properties if the trace_option is set.
Ok, yeah that's a good idea.
> Furthermore, another trace_option could introduce a third 'strongly
> ordered' trace-clock variant, which would use cmpxchg and per cpu
> timestamps, something like this:
>
> atomic64_t curr_time;
>
> DEFINE_PER_CPU(u64, prev_cpu_time);
> ...
>
> retry:
> prev_cpu_time = per_cpu(prev_cpu_time, cpu);
> cpu_time = sched_clock();
> old_time = atomic64_read(&curr_time);
>
> delta = cpu_time - prev_cpu_time;
> if (unlikely((s64)delta <= 0))
> delta = 1;
>
> new_time = old_time + delta;
>
> if (atomic64_cmpxchg(&curr_time, old_time, new_time) != new_time)
> goto repeat;
>
> time = new_time << DEBUG_SHIFT;
>
> This would be a monotonic, global clock wrapped around sched_clock(). It
> uses a cmpxchg to achieve it, but we have to use global ordering anyway.
>
> It would still be _much_ faster than any GTOD clocksource we have.
>
> Hm?
>
And that would be even more faster that cpu_clock().
But why implement both? Wouldn't the above be more faster while playing the same thing
than cpu_clock()
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features
2009-01-24 16:02 ` Frederic Weisbecker
@ 2009-01-26 14:55 ` Ingo Molnar
0 siblings, 0 replies; 7+ messages in thread
From: Ingo Molnar @ 2009-01-26 14:55 UTC (permalink / raw)
To: Frederic Weisbecker; +Cc: Steven Rostedt, Linux Kernel Mailing List
* Frederic Weisbecker <fweisbec@gmail.com> wrote:
> On Fri, Jan 23, 2009 at 12:00:37PM +0100, Ingo Molnar wrote:
> >
> > * Frédéric Weisbecker <fweisbec@gmail.com> wrote:
> >
> > > > Still needs a solution - if we do cross-CPU traces we want to have a
> > > > global trace clock with 'seemless' transition between CPUs.
> > >
> > > So it doesn't only need a monotonic clock. It needs a global consistent
> > > clock like ktime for example? Unfortunately this one uses seq_locks and
> > > would add some drawbacks like verifying if the traced function doesn't
> > > hold the write seq_lock and it will bring some more ftrace recursion...
> >
> > using ktime_get() is indeed out of question - GTOD callpaths are too
> > complex (and also too slow).
> >
> > I'd not change anything in the current logic, but i was thinking of a new
> > trace_option, which can be set optionally. If that trace option is set
> > then this bit of ring_buffer_time_stamp():
> >
> > time = sched_clock() << DEBUG_SHIFT;
> >
> > gets turned into:
> >
> > time = cpu_clock(cpu) << DEBUG_SHIFT;
> >
> > This way we default to sched_clock(), but also gain some 'global'
> > properties if the trace_option is set.
>
>
> Ok, yeah that's a good idea.
>
>
> > Furthermore, another trace_option could introduce a third 'strongly
> > ordered' trace-clock variant, which would use cmpxchg and per cpu
> > timestamps, something like this:
> >
> > atomic64_t curr_time;
> >
> > DEFINE_PER_CPU(u64, prev_cpu_time);
> > ...
> >
> > retry:
> > prev_cpu_time = per_cpu(prev_cpu_time, cpu);
> > cpu_time = sched_clock();
> > old_time = atomic64_read(&curr_time);
> >
> > delta = cpu_time - prev_cpu_time;
> > if (unlikely((s64)delta <= 0))
> > delta = 1;
> >
> > new_time = old_time + delta;
> >
> > if (atomic64_cmpxchg(&curr_time, old_time, new_time) != new_time)
> > goto repeat;
> >
> > time = new_time << DEBUG_SHIFT;
> >
> > This would be a monotonic, global clock wrapped around sched_clock(). It
> > uses a cmpxchg to achieve it, but we have to use global ordering anyway.
> >
> > It would still be _much_ faster than any GTOD clocksource we have.
> >
> > Hm?
> >
>
> And that would be even more faster that cpu_clock().
>
> But why implement both? Wouldn't the above be more faster while playing
> the same thing than cpu_clock()
cpu_clock() can be faster than a cmpxchg on a global variable - as
cpu_clock() does not serialize globally at all as long as each CPU
observes its own time only.
in any case, these are nuances - we need _some_ trace clock before we
worry about details like that ;-)
Ingo
^ permalink raw reply [flat|nested] 7+ messages in thread
end of thread, other threads:[~2009-01-26 14:55 UTC | newest]
Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-23 1:04 [PATCH 1/2 v2] tracing/function-graph-tracer: various fixes and features Frederic Weisbecker
2009-01-23 10:19 ` Ingo Molnar
2009-01-23 10:36 ` Frédéric Weisbecker
2009-01-23 11:00 ` Ingo Molnar
2009-01-23 14:53 ` Steven Rostedt
2009-01-24 16:02 ` Frederic Weisbecker
2009-01-26 14:55 ` Ingo Molnar
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox