* [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
@ 2008-11-27 23:42 Frederic Weisbecker
2008-11-28 9:18 ` Ingo Molnar
0 siblings, 1 reply; 11+ messages in thread
From: Frederic Weisbecker @ 2008-11-27 23:42 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
Impact: increase visibility of the output
This patch applies various changes.
_CPU is now a decimal number, followed by a parenthesis.
_Overhead is now on the second column (gives a good visibility)
_Cost is now on the third column, can't exceed 9999.99 us. It is followed
by a virtual line based on a "|" character.
_Functions calls are now the last column on the right. This way, we haven't
dynamic column (which flow is harder to follow) on its right.
_CPU and Overhead have their own option flag. They are default-on but you can
disable them easily:
echo nofuncgraph-cpu > trace_options
echo nofuncgraph-overhead > trace_options
TODO:
_ Refactoring of the thread switch output.
_ Give a default-off option to output the thread and its pid on each row.
_ Provide headers
_ ....
Here is an example of the new trace style (a wider one is provided in attachment):
0) | mutex_unlock() {
0) 0.639 us | __mutex_unlock_slowpath();
0) 1.607 us | }
0) | remove_wait_queue() {
0) 0.616 us | _spin_lock_irqsave();
0) 0.616 us | _spin_unlock_irqrestore();
0) 2.779 us | }
0) 0.495 us | n_tty_set_room();
0) ! 9999.999 us | }
0) | tty_ldisc_deref() {
0) 0.615 us | _spin_lock_irqsave();
0) 0.616 us | _spin_unlock_irqrestore();
0) 2.793 us | }
0) | current_fs_time() {
0) 0.488 us | current_kernel_time();
0) 0.495 us | timespec_trunc();
0) 2.486 us | }
0) ! 9999.999 us | }
0) ! 9999.999 us | }
0) ! 9999.999 us | }
0) | sys_read() {
0) 0.796 us | fget_light();
0) | vfs_read() {
0) | rw_verify_area() {
0) | security_file_permission() {
0) 0.488 us | cap_file_permission();
0) 1.720 us | }
0) 3. 4 us | }
0) | tty_read() {
0) 0.488 us | tty_paranoia_check();
0) | tty_ldisc_ref_wait() {
0) | tty_ldisc_try() {
0) 0.615 us | _spin_lock_irqsave();
0) 0.615 us | _spin_unlock_irqrestore();
0) 5.436 us | }
0) 6.427 us | }
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
kernel/trace/trace_functions_graph.c | 142 ++++++++++++++++++++++------------
1 files changed, 93 insertions(+), 49 deletions(-)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b958d60..596a3ee 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,20 +14,25 @@
#include "trace.h"
#define TRACE_GRAPH_INDENT 2
-/* Spaces between function call and time duration */
-#define TRACE_GRAPH_TIMESPACE_ENTRY " "
-/* Spaces between function call and closing braces */
-#define TRACE_GRAPH_TIMESPACE_RET " "
+/* Flag options */
#define TRACE_GRAPH_PRINT_OVERRUN 0x1
+#define TRACE_GRAPH_PRINT_CPU 0x2
+#define TRACE_GRAPH_PRINT_OVERHEAD 0x4
+
static struct tracer_opt trace_opts[] = {
- /* Display overruns or not */
- { TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+ /* Display overruns ? */
+ { TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+ /* Display CPU ? */
+ { TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
+ /* Display Overhead ? */
+ { TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
{ } /* Empty entry */
};
static struct tracer_flags tracer_flags = {
- .val = 0, /* Don't display overruns by default */
+ /* Don't display overruns by default */
+ .val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
.opts = trace_opts
};
@@ -56,6 +61,36 @@ static void graph_trace_reset(struct trace_array *tr)
unregister_ftrace_graph();
}
+static inline int log10_cpu(int nb)
+{
+ if (nb / 100)
+ return 3;
+ if (nb / 10)
+ return 2;
+ return 1;
+}
+
+static enum print_line_t
+print_graph_cpu(struct trace_seq *s, int cpu)
+{
+ int i;
+ int ret;
+ int log10_this = log10_cpu(cpu);
+ int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+
+
+ for (i = 0; i < log10_all - log10_this; i++) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+ ret = trace_seq_printf(s, "%d) ", cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_HANDLED;
+}
+
+
/* If the pid changed since the last trace, output this event */
static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
{
@@ -67,8 +102,7 @@ static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
last_pid[cpu] = pid;
comm = trace_find_cmdline(pid);
- return trace_seq_printf(s, "\nCPU[%03d] "
- " ------------8<---------- thread %s-%d"
+ return trace_seq_printf(s, "\n------------8<---------- thread %s-%d"
" ------------8<----------\n\n",
cpu, comm, pid);
}
@@ -86,7 +120,7 @@ trace_branch_is_leaf(struct trace_iterator *iter,
if (!ring_iter)
return false;
- event = ring_buffer_iter_peek(iter->buffer_iter[iter->cpu], NULL);
+ event = ring_buffer_iter_peek(ring_iter, NULL);
if (!event)
return false;
@@ -108,7 +142,7 @@ static inline int
print_graph_duration(unsigned long long duration, struct trace_seq *s)
{
unsigned long nsecs_rem = do_div(duration, 1000);
- return trace_seq_printf(s, "+ %llu.%lu us\n", duration, nsecs_rem);
+ return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
}
/* Signal a overhead of time execution to the output */
@@ -136,8 +170,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
struct ring_buffer_event *event;
struct ftrace_graph_ent *call;
unsigned long long duration;
- int i;
int ret;
+ int i;
event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
ret_entry = ring_buffer_event_data(event);
@@ -145,8 +179,19 @@ print_graph_entry_leaf(struct trace_iterator *iter,
call = &entry->graph_ent;
duration = graph_ret->rettime - graph_ret->calltime;
+ /* Must not exceed 8 characters: 9999.999 us */
+ if (duration > 10000000ULL)
+ duration = 9999999ULL;
+
/* Overhead */
- ret = print_graph_overhead(duration, s);
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Duration */
+ ret = print_graph_duration(duration, s);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -161,16 +206,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_printf(s, "();");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Duration */
- ret = trace_seq_printf(s, TRACE_GRAPH_TIMESPACE_ENTRY);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ret = print_graph_duration(duration, s);
+ ret = trace_seq_printf(s, "();\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -186,9 +222,14 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
struct ftrace_graph_ent *call = &entry->graph_ent;
/* No overhead */
- ret = trace_seq_printf(s, " ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = trace_seq_printf(s, " ");
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* No time */
+ ret = trace_seq_printf(s, " | ");
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -201,12 +242,7 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_printf(s, "() {");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* No duration to print at this state */
- ret = trace_seq_printf(s, TRACE_GRAPH_TIMESPACE_ENTRY "-\n");
+ ret = trace_seq_printf(s, "() {\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -220,12 +256,16 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
int ret;
struct trace_entry *ent = iter->ent;
+ /* Pid */
if (!verif_pid(s, ent->pid, cpu))
return TRACE_TYPE_PARTIAL_LINE;
- ret = trace_seq_printf(s, "CPU[%03d] ", cpu);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ /* Cpu */
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
if (trace_branch_is_leaf(iter, field))
return print_graph_entry_leaf(iter, field, s);
@@ -242,17 +282,30 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
int ret;
unsigned long long duration = trace->rettime - trace->calltime;
+ /* Must not exceed 8 characters: xxxx.yyy us */
+ if (duration > 10000000ULL)
+ duration = 9999999ULL;
+
/* Pid */
if (!verif_pid(s, ent->pid, cpu))
return TRACE_TYPE_PARTIAL_LINE;
/* Cpu */
- ret = trace_seq_printf(s, "CPU[%03d] ", cpu);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+ ret = print_graph_cpu(s, cpu);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
/* Overhead */
- ret = print_graph_overhead(duration, s);
+ if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+ ret = print_graph_overhead(duration, s);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ }
+
+ /* Duration */
+ ret = print_graph_duration(duration, s);
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
@@ -263,16 +316,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
return TRACE_TYPE_PARTIAL_LINE;
}
- ret = trace_seq_printf(s, "} ");
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- /* Duration */
- ret = trace_seq_printf(s, TRACE_GRAPH_TIMESPACE_RET);
- if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
-
- ret = print_graph_duration(duration, s);
+ ret = trace_seq_printf(s, "}\n");
if (!ret)
return TRACE_TYPE_PARTIAL_LINE;
--
1.5.2.5
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-27 23:42 [PATCH] tracing/function-graph-tracer: adjustments of the trace informations Frederic Weisbecker
@ 2008-11-28 9:18 ` Ingo Molnar
2008-11-28 9:25 ` Ingo Molnar
2008-11-28 13:55 ` Frédéric Weisbecker
0 siblings, 2 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-11-28 9:18 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
* Frederic Weisbecker <fweisbec@gmail.com> wrote:
> Impact: increase visibility of the output
>
> This patch applies various changes.
>
> _CPU is now a decimal number, followed by a parenthesis.
>
> _Overhead is now on the second column (gives a good visibility)
>
> _Cost is now on the third column, can't exceed 9999.99 us. It is followed
> by a virtual line based on a "|" character.
>
> _Functions calls are now the last column on the right. This way, we haven't
> dynamic column (which flow is harder to follow) on its right.
>
> _CPU and Overhead have their own option flag. They are default-on but you can
> disable them easily:
>
> echo nofuncgraph-cpu > trace_options
> echo nofuncgraph-overhead > trace_options
>
> TODO:
>
> _ Refactoring of the thread switch output.
> _ Give a default-off option to output the thread and its pid on each row.
> _ Provide headers
> _ ....
>
> Here is an example of the new trace style (a wider one is provided in attachment):
>
> 0) | mutex_unlock() {
> 0) 0.639 us | __mutex_unlock_slowpath();
> 0) 1.607 us | }
> 0) | remove_wait_queue() {
> 0) 0.616 us | _spin_lock_irqsave();
> 0) 0.616 us | _spin_unlock_irqrestore();
> 0) 2.779 us | }
> 0) 0.495 us | n_tty_set_room();
> 0) ! 9999.999 us | }
> 0) | tty_ldisc_deref() {
> 0) 0.615 us | _spin_lock_irqsave();
> 0) 0.616 us | _spin_unlock_irqrestore();
> 0) 2.793 us | }
> 0) | current_fs_time() {
> 0) 0.488 us | current_kernel_time();
> 0) 0.495 us | timespec_trunc();
> 0) 2.486 us | }
> 0) ! 9999.999 us | }
> 0) ! 9999.999 us | }
> 0) ! 9999.999 us | }
> 0) | sys_read() {
> 0) 0.796 us | fget_light();
> 0) | vfs_read() {
> 0) | rw_verify_area() {
> 0) | security_file_permission() {
> 0) 0.488 us | cap_file_permission();
> 0) 1.720 us | }
> 0) 3. 4 us | }
> 0) | tty_read() {
> 0) 0.488 us | tty_paranoia_check();
> 0) | tty_ldisc_ref_wait() {
> 0) | tty_ldisc_try() {
> 0) 0.615 us | _spin_lock_irqsave();
> 0) 0.615 us | _spin_unlock_irqrestore();
> 0) 5.436 us | }
> 0) 6.427 us | }
very nice! Applied to tip/tracing/function-graph-tracer
a few comments.
Firstly, what do you think about the additional tweaks i did - see the
patch below?
Before:
0) | sys_read() {
0) 0.796 us | fget_light();
0) | vfs_read() {
0) | rw_verify_area() {
0) | security_file_permission() {
------------8<---------- thread sshd-1755 ------------8<----------
After:
0) | sys_read() {
0) 0.796 us | fget_light();
0) | vfs_read() {
0) | rw_verify_area() {
0) | security_file_permission() {
------------------------------------------
| 1) migration/0--1 => sshd-1755
------------------------------------------
Secondly:
> + /* Must not exceed 8 characters: xxxx.yyy us */
> + if (duration > 10000000ULL)
> + duration = 9999999ULL;
10 milliseconds isnt much or full system calls, etc.- so i believe the
rule should be what i outlined in an earlier mail. The relevant
transition points go like this:
0.000
xxxx.yyy
9999.999
10000.00
xxxxx.yy
99999.99
100000.0
xxxxxx.y
999999.9
1000000
9999999
10000000
xxxxxxxx
99999999 [ 100 seconds ]
100000000 ... up to infinity
this way we can get up to 99999999 usecs with the same fixed width -
or 100 seconds. _That_ is enough in practice.
But even beyond that we should print it all out: we should still not
clip actual information but instead shift the line to the right. The
slightly inconsistent line is not a big problem - we want a 100
seconds delay to stand out anyway ;-)
The moving decimal point above 10 milliseconds is not a big problem
with the '+' and '!' marker. Maybe add a '!!' marker to these lines?
Ingo
------------------->
>From d51090b34602a20984ab0312ef04e47069c0aec6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 28 Nov 2008 09:55:16 +0100
Subject: [PATCH] tracing/function-graph-tracer: more output tweaks
Impact: prettify the output some more
Before:
0) | sys_read() {
0) 0.796 us | fget_light();
0) | vfs_read() {
0) | rw_verify_area() {
0) | security_file_permission() {
------------8<---------- thread sshd-1755 ------------8<----------
After:
0) | sys_read() {
0) 0.796 us | fget_light();
0) | vfs_read() {
0) | rw_verify_area() {
0) | security_file_permission() {
------------------------------------------
| 1) migration/0--1 => sshd-1755
------------------------------------------
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
kernel/trace/trace_functions_graph.c | 45 ++++++++++++++++++++++++++++-----
1 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 596a3ee..894b50b 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -79,6 +79,19 @@ print_graph_cpu(struct trace_seq *s, int cpu)
int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+ /*
+ * Start with a space character - to make it stand out
+ * to the right a bit when trace output is pasted into
+ * email:
+ */
+ ret = trace_seq_printf(s, " ");
+
+ /*
+ * Tricky - we space the CPU field according to the max
+ * number of online CPUs. On a 2-cpu system it would take
+ * a maximum of 1 digit - on a 128 cpu system it would
+ * take up to 3 digits:
+ */
for (i = 0; i < log10_all - log10_this; i++) {
ret = trace_seq_printf(s, " ");
if (!ret)
@@ -86,7 +99,8 @@ print_graph_cpu(struct trace_seq *s, int cpu)
}
ret = trace_seq_printf(s, "%d) ", cpu);
if (!ret)
- return TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_PARTIAL_LINE;
+
return TRACE_TYPE_HANDLED;
}
@@ -94,17 +108,34 @@ print_graph_cpu(struct trace_seq *s, int cpu)
/* If the pid changed since the last trace, output this event */
static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
{
- char *comm;
+ char *comm, *prev_comm;
+ pid_t prev_pid;
+ int ret;
if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
return 1;
+ prev_pid = last_pid[cpu];
last_pid[cpu] = pid;
+
comm = trace_find_cmdline(pid);
+ prev_comm = trace_find_cmdline(prev_pid);
- return trace_seq_printf(s, "\n------------8<---------- thread %s-%d"
- " ------------8<----------\n\n",
- cpu, comm, pid);
+/*
+ * Context-switch trace line:
+
+ ------------------------------------------
+ | 1) migration/0--1 => sshd-1755
+ ------------------------------------------
+
+ */
+ ret = trace_seq_printf(s,
+ " ------------------------------------------\n");
+ ret += trace_seq_printf(s, " | %d) %s-%d => %s-%d\n",
+ cpu, prev_comm, prev_pid, comm, pid);
+ ret += trace_seq_printf(s,
+ " ------------------------------------------\n\n");
+ return ret;
}
static bool
@@ -142,7 +173,7 @@ static inline int
print_graph_duration(unsigned long long duration, struct trace_seq *s)
{
unsigned long nsecs_rem = do_div(duration, 1000);
- return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
+ return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
}
/* Signal a overhead of time execution to the output */
@@ -229,7 +260,7 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
}
/* No time */
- ret = trace_seq_printf(s, " | ");
+ ret = trace_seq_printf(s, " | ");
/* Function */
for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
^ permalink raw reply related [flat|nested] 11+ messages in thread* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-28 9:18 ` Ingo Molnar
@ 2008-11-28 9:25 ` Ingo Molnar
2008-11-28 13:58 ` Frédéric Weisbecker
2008-11-28 19:22 ` Ingo Molnar
2008-11-28 13:55 ` Frédéric Weisbecker
1 sibling, 2 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-11-28 9:25 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
another thing which i'm sure you are aware of:
# tracer: function_graph
#
# TASK-PID CPU# TIMESTAMP FUNCTION
# | | | | |
------------------------------------------
| 1) S99local--1 => ssh-1724
------------------------------------------
1) 0.867 us | }
1) | mark_page_accessed() {
1) 0.293 us | constant_test_bit();
1) 0.294 us | constant_test_bit();
[...]
the header description is now out of of sync :-)
Also, we should probably not start the trace with a PID-switch event.
the last_pid[] markers are probably out of sync when we start printing
the trace.
So the output should probably be something like:
# tracer: function_graph
#
# CPU COST FUNCTION-GRAPH
# | | |
1) 0.867 us | }
1) | mark_page_accessed() {
1) 0.293 us | constant_test_bit();
1) 0.294 us | constant_test_bit();
[...]
Ingo
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-28 9:25 ` Ingo Molnar
@ 2008-11-28 13:58 ` Frédéric Weisbecker
2008-11-28 19:22 ` Ingo Molnar
1 sibling, 0 replies; 11+ messages in thread
From: Frédéric Weisbecker @ 2008-11-28 13:58 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
2008/11/28 Ingo Molnar <mingo@elte.hu>:
>
> another thing which i'm sure you are aware of:
>
> # tracer: function_graph
> #
> # TASK-PID CPU# TIMESTAMP FUNCTION
> # | | | | |
> ------------------------------------------
> | 1) S99local--1 => ssh-1724
> ------------------------------------------
>
> 1) 0.867 us | }
> 1) | mark_page_accessed() {
> 1) 0.293 us | constant_test_bit();
> 1) 0.294 us | constant_test_bit();
> [...]
>
> the header description is now out of of sync :-)
>
> Also, we should probably not start the trace with a PID-switch event.
> the last_pid[] markers are probably out of sync when we start printing
> the trace.
>
> So the output should probably be something like:
>
> # tracer: function_graph
> #
> # CPU COST FUNCTION-GRAPH
> # | | |
>
> 1) 0.867 us | }
> 1) | mark_page_accessed() {
> 1) 0.293 us | constant_test_bit();
> 1) 0.294 us | constant_test_bit();
> [...]
Ok I will just have to update something in the open callback....
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-28 9:25 ` Ingo Molnar
2008-11-28 13:58 ` Frédéric Weisbecker
@ 2008-11-28 19:22 ` Ingo Molnar
2008-11-29 3:40 ` Frédéric Weisbecker
1 sibling, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2008-11-28 19:22 UTC (permalink / raw)
To: Frederic Weisbecker
Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
another detail: it would be nice to annotate IRQ entries some more.
right now if a syscall is interrupted by a hardware interrupt, it looks
like this:
0) | skb_dequeue() {
0) 0.365 us | _spin_lock_irqsave();
0) 0.422 us | _spin_unlock_irqrestore();
0) 1.912 us | }
0) | memcpy_toiovec() {
0) | copy_to_user() {
0) | smp_apic_timer_interrupt() {
0) 0.450 us | native_apic_mem_write();
0) | irq_enter() {
0) 0.410 us | idle_cpu();
0) 1.167 us | }
0) | hrtimer_interrupt() {
the natural nesting is nice, but easy to miss. It would be nice to have
something like:
0) | skb_dequeue() {
0) 0.365 us | _spin_lock_irqsave();
0) 0.422 us | _spin_unlock_irqrestore();
0) 1.912 us | }
0) | memcpy_toiovec() {
0) | copy_to_user() {
0) =====> smp_apic_timer_interrupt() {
0) 0.450 us | native_apic_mem_write();
0) | irq_enter() {
0) 0.410 us | idle_cpu();
0) 1.167 us | }
0) | hrtimer_interrupt() {
and perhaps something like this at the end of the irq frame:
0) 0.408 us | _local_bh_enable();
0) + 13.139 us | }
0) + 13.887 us | }
0) 0.406 us | idle_cpu();
0) + 15.373 us | }
0) + 74.564 us | }
0) | _cond_resched() {
0) | need_resched() {
0) 0.423 us | constant_test_bit();
0) 1.164 us | }
0) 1.915 us <=== } /* IRQ 1 end */
0) + 78.396 us | }
0) + 79.123 us | }
0) 0.387 us | skb_pull();
0) | kfree_skb() {
?
Ingo
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-28 19:22 ` Ingo Molnar
@ 2008-11-29 3:40 ` Frédéric Weisbecker
2008-11-29 10:59 ` Peter Zijlstra
0 siblings, 1 reply; 11+ messages in thread
From: Frédéric Weisbecker @ 2008-11-29 3:40 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
2008/11/28 Ingo Molnar <mingo@elte.hu>:
>
> another detail: it would be nice to annotate IRQ entries some more.
>
> right now if a syscall is interrupted by a hardware interrupt, it looks
> like this:
>
> 0) | skb_dequeue() {
> 0) 0.365 us | _spin_lock_irqsave();
> 0) 0.422 us | _spin_unlock_irqrestore();
> 0) 1.912 us | }
> 0) | memcpy_toiovec() {
> 0) | copy_to_user() {
> 0) | smp_apic_timer_interrupt() {
> 0) 0.450 us | native_apic_mem_write();
> 0) | irq_enter() {
> 0) 0.410 us | idle_cpu();
> 0) 1.167 us | }
> 0) | hrtimer_interrupt() {
>
> the natural nesting is nice, but easy to miss. It would be nice to have
> something like:
>
> 0) | skb_dequeue() {
> 0) 0.365 us | _spin_lock_irqsave();
> 0) 0.422 us | _spin_unlock_irqrestore();
> 0) 1.912 us | }
> 0) | memcpy_toiovec() {
> 0) | copy_to_user() {
> 0) =====> smp_apic_timer_interrupt() {
> 0) 0.450 us | native_apic_mem_write();
> 0) | irq_enter() {
> 0) 0.410 us | idle_cpu();
> 0) 1.167 us | }
> 0) | hrtimer_interrupt() {
>
> and perhaps something like this at the end of the irq frame:
>
> 0) 0.408 us | _local_bh_enable();
> 0) + 13.139 us | }
> 0) + 13.887 us | }
> 0) 0.406 us | idle_cpu();
> 0) + 15.373 us | }
> 0) + 74.564 us | }
> 0) | _cond_resched() {
> 0) | need_resched() {
> 0) 0.423 us | constant_test_bit();
> 0) 1.164 us | }
> 0) 1.915 us <=== } /* IRQ 1 end */
> 0) + 78.396 us | }
> 0) + 79.123 us | }
> 0) 0.387 us | skb_pull();
> 0) | kfree_skb() {
>
> ?
I tried to figure out a way to signal a hardirq in a trace, I like
this arrow but It
breaks the indentation.
I would suggest:
0) | skb_dequeue() {
0) 0.365 us | _spin_lock_irqsave();
0) 0.422 us | _spin_unlock_irqrestore();
0) 1.912 us | }
0) | memcpy_toiovec() {
0) | copy_to_user() {
=====>
0) | smp_apic_timer_interrupt() {
0) 0.450 us | native_apic_mem_write();
0) | irq_enter() {
0) 0.410 us | idle_cpu();
0) 1.167 us | }
0) | hrtimer_interrupt() {
Your idea solves a part of one of the features I wanted to add: why
not using a single character column to print
the current context: workqueue, syscall, kernel thread, tasklet, hardirq .....
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-29 3:40 ` Frédéric Weisbecker
@ 2008-11-29 10:59 ` Peter Zijlstra
0 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2008-11-29 10:59 UTC (permalink / raw)
To: Frédéric Weisbecker
Cc: Ingo Molnar, Steven Rostedt, Tim Bird, Linux Kernel
On Sat, 2008-11-29 at 04:40 +0100, Frédéric Weisbecker wrote:
> 2008/11/28 Ingo Molnar <mingo@elte.hu>:
> I tried to figure out a way to signal a hardirq in a trace, I like
> this arrow but It
> breaks the indentation.
> I would suggest:
>
> 0) | skb_dequeue() {
> 0) 0.365 us | _spin_lock_irqsave();
> 0) 0.422 us | _spin_unlock_irqrestore();
> 0) 1.912 us | }
> 0) | memcpy_toiovec() {
> 0) | copy_to_user() {
> =====>
> 0) | smp_apic_timer_interrupt() {
> 0) 0.450 us | native_apic_mem_write();
> 0) | irq_enter() {
> 0) 0.410 us | idle_cpu();
> 0) 1.167 us | }
> 0) | hrtimer_interrupt() {
>
> Your idea solves a part of one of the features I wanted to add: why
> not using a single character column to print
> the current context: workqueue, syscall, kernel thread, tasklet, hardirq .....
That was my initial suggestion too, like the latency tracer has.
Then again, most of that would be obvious from the comm/pid column, that
will show preemption and all those things.
But showing Hard,Soft irq disabled and perhaps the preempt count would
be handy.
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-28 9:18 ` Ingo Molnar
2008-11-28 9:25 ` Ingo Molnar
@ 2008-11-28 13:55 ` Frédéric Weisbecker
2008-11-28 14:17 ` Ingo Molnar
1 sibling, 1 reply; 11+ messages in thread
From: Frédéric Weisbecker @ 2008-11-28 13:55 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
2008/11/28 Ingo Molnar <mingo@elte.hu>:
> Firstly, what do you think about the additional tweaks i did - see the
> patch below?
>
> Before:
>
> 0) | sys_read() {
> 0) 0.796 us | fget_light();
> 0) | vfs_read() {
> 0) | rw_verify_area() {
> 0) | security_file_permission() {
> ------------8<---------- thread sshd-1755 ------------8<----------
>
> After:
>
> 0) | sys_read() {
> 0) 0.796 us | fget_light();
> 0) | vfs_read() {
> 0) | rw_verify_area() {
> 0) | security_file_permission() {
> ------------------------------------------
> | 1) migration/0--1 => sshd-1755
> ------------------------------------------
Thanks! That good, but I remember you wanted a limited number of
characters for thread name/pid couple?
> Secondly:
>
>> + /* Must not exceed 8 characters: xxxx.yyy us */
>> + if (duration > 10000000ULL)
>> + duration = 9999999ULL;
>
> 10 milliseconds isnt much or full system calls, etc.- so i believe the
> rule should be what i outlined in an earlier mail. The relevant
> transition points go like this:
>
> 0.000
> xxxx.yyy
> 9999.999
> 10000.00
> xxxxx.yy
> 99999.99
> 100000.0
> xxxxxx.y
> 999999.9
> 1000000
> 9999999
> 10000000
> xxxxxxxx
> 99999999 [ 100 seconds ]
> 100000000 ... up to infinity
>
> this way we can get up to 99999999 usecs with the same fixed width -
> or 100 seconds. _That_ is enough in practice.
>
> But even beyond that we should print it all out: we should still not
> clip actual information but instead shift the line to the right. The
> slightly inconsistent line is not a big problem - we want a 100
> seconds delay to stand out anyway ;-)
>
> The moving decimal point above 10 milliseconds is not a big problem
> with the '+' and '!' marker. Maybe add a '!!' marker to these lines?
Yeah, I was sure I misunderstood your idea about it in your last email :-)
Ok. I will do so, I felt a bit uncomfortable with the fixed width above a limit,
so I like it this way. Long sleeping functions will be rare enough in the trace
to let us move a bit the columns to the right in such cases...
Thanks.
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-28 13:55 ` Frédéric Weisbecker
@ 2008-11-28 14:17 ` Ingo Molnar
2008-11-29 3:24 ` Frédéric Weisbecker
0 siblings, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2008-11-28 14:17 UTC (permalink / raw)
To: Frédéric Weisbecker
Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
* Frédéric Weisbecker <fweisbec@gmail.com> wrote:
> 2008/11/28 Ingo Molnar <mingo@elte.hu>:
> > Firstly, what do you think about the additional tweaks i did - see the
> > patch below?
> >
> > Before:
> >
> > 0) | sys_read() {
> > 0) 0.796 us | fget_light();
> > 0) | vfs_read() {
> > 0) | rw_verify_area() {
> > 0) | security_file_permission() {
> > ------------8<---------- thread sshd-1755 ------------8<----------
> >
> > After:
> >
> > 0) | sys_read() {
> > 0) 0.796 us | fget_light();
> > 0) | vfs_read() {
> > 0) | rw_verify_area() {
> > 0) | security_file_permission() {
> > ------------------------------------------
> > | 1) migration/0--1 => sshd-1755
> > ------------------------------------------
>
> Thanks! That good, but I remember you wanted a limited number of
> characters for thread name/pid couple?
Yeah, that needs updating. I supposed you'd be working on the task+pid
column anyway, and do a helper function that prints the task-pid couple.
Then that helper can be used in the context-switch case too to print out
a uniform ID.
Another thing that would be nice is to separate out the "cpu)" printing
bits into a helper function. Right now what exists cannot be used in a
seq-manner, so i couldnt reuse it.
Anyway, i wont change it (just wanted to get a final-ish output to look
at) so feel free to clean it all up thoroughly :)
Today i played with the graph-tracer on a testbox _way_ too much.
Combined with the wildcard filter it's _really_ addictive. Kernel
developers, beware!
One thing that came up: it would be nice to have an 'inverted' wildcard
to punch out certain functions from the filter list. For example i did in
the shell:
$ echo 'sys_*' >> set_ftrace_filter
$ echo '*socket*' >> set_ftrace_filter
$ echo '*timer*' >> set_ftrace_filter
$ echo '*skb*' >> set_ftrace_filter
and looked at the trace and found that certain functions are too verbose
and not really interesting - so i wanted to exclude them.
We've got set_ftrace_notrace but it's not really an inverse wildcard but
a complementary set of filter functions - which is not the same and not
as easy to think about as a single set of filter functions.
what i think would be more natural to do is via the filter file itself,
via an extension like:
$ echo '!timer_*' >> set_ftrace_filter
which would eliminate the functions matching that negative pattern. Such
negative wildcards would act on the current set of functions, while
set_ftrace_notrace is permanent and cannot be used to shape an arbitrary
set of functions iteratively.
> > Secondly:
> >
> >> + /* Must not exceed 8 characters: xxxx.yyy us */
> >> + if (duration > 10000000ULL)
> >> + duration = 9999999ULL;
> >
> > 10 milliseconds isnt much or full system calls, etc.- so i believe the
> > rule should be what i outlined in an earlier mail. The relevant
> > transition points go like this:
> >
> > 0.000
> > xxxx.yyy
> > 9999.999
> > 10000.00
> > xxxxx.yy
> > 99999.99
> > 100000.0
> > xxxxxx.y
> > 999999.9
> > 1000000
> > 9999999
> > 10000000
> > xxxxxxxx
> > 99999999 [ 100 seconds ]
> > 100000000 ... up to infinity
> >
> > this way we can get up to 99999999 usecs with the same fixed width -
> > or 100 seconds. _That_ is enough in practice.
> >
> > But even beyond that we should print it all out: we should still not
> > clip actual information but instead shift the line to the right. The
> > slightly inconsistent line is not a big problem - we want a 100
> > seconds delay to stand out anyway ;-)
> >
> > The moving decimal point above 10 milliseconds is not a big problem
> > with the '+' and '!' marker. Maybe add a '!!' marker to these lines?
>
> Yeah, I was sure I misunderstood your idea about it in your last email
> :-) Ok. I will do so, I felt a bit uncomfortable with the fixed width
> above a limit, so I like it this way. Long sleeping functions will be
> rare enough in the trace to let us move a bit the columns to the right
> in such cases...
yeah.
The other small detail in the scheme i suggested is that the loss of
nanosec precision and its gradual weakening to usec precision is
immaterial in practice as well: we still have at least 9 significant
digits, so the relative precision is around 1:1000000000 - more than
enough.
Ingo
^ permalink raw reply [flat|nested] 11+ messages in thread* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-28 14:17 ` Ingo Molnar
@ 2008-11-29 3:24 ` Frédéric Weisbecker
2008-11-29 14:19 ` Ingo Molnar
0 siblings, 1 reply; 11+ messages in thread
From: Frédéric Weisbecker @ 2008-11-29 3:24 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
2008/11/28 Ingo Molnar <mingo@elte.hu>:
> Yeah, that needs updating. I supposed you'd be working on the task+pid
> column anyway, and do a helper function that prints the task-pid couple.
> Then that helper can be used in the context-switch case too to print out
> a uniform ID.
Ok.
> Another thing that would be nice is to separate out the "cpu)" printing
> bits into a helper function. Right now what exists cannot be used in a
> seq-manner, so i couldnt reuse it.
>
Isn't it the case with print_graph_cpu() ?
> Anyway, i wont change it (just wanted to get a final-ish output to look
> at) so feel free to clean it all up thoroughly :)
>
> Today i played with the graph-tracer on a testbox _way_ too much.
> Combined with the wildcard filter it's _really_ addictive. Kernel
> developers, beware!
:-)
>
> One thing that came up: it would be nice to have an 'inverted' wildcard
> to punch out certain functions from the filter list. For example i did in
> the shell:
>
> $ echo 'sys_*' >> set_ftrace_filter
> $ echo '*socket*' >> set_ftrace_filter
> $ echo '*timer*' >> set_ftrace_filter
> $ echo '*skb*' >> set_ftrace_filter
>
> and looked at the trace and found that certain functions are too verbose
> and not really interesting - so i wanted to exclude them.
>
> We've got set_ftrace_notrace but it's not really an inverse wildcard but
> a complementary set of filter functions - which is not the same and not
> as easy to think about as a single set of filter functions.
>
> what i think would be more natural to do is via the filter file itself,
> via an extension like:
>
> $ echo '!timer_*' >> set_ftrace_filter
>
> which would eliminate the functions matching that negative pattern. Such
> negative wildcards would act on the current set of functions, while
> set_ftrace_notrace is permanent and cannot be used to shape an arbitrary
> set of functions iteratively.
Ok, I'll add it in my workqueue (unless Steven does it).
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
2008-11-29 3:24 ` Frédéric Weisbecker
@ 2008-11-29 14:19 ` Ingo Molnar
0 siblings, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-11-29 14:19 UTC (permalink / raw)
To: Frédéric Weisbecker
Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel
* Frédéric Weisbecker <fweisbec@gmail.com> wrote:
> > Another thing that would be nice is to separate out the "cpu)"
> > printing bits into a helper function. Right now what exists cannot be
> > used in a seq-manner, so i couldnt reuse it.
> >
>
> Isn't it the case with print_graph_cpu() ?
yeah, that should be okay. I didnt use it in verif_pid() because it
returns print_line_t - but i guess it would be better to fix verif_pid()
to check for !ret instead of the ret += thing i added.
Ingo
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2008-11-29 14:20 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-27 23:42 [PATCH] tracing/function-graph-tracer: adjustments of the trace informations Frederic Weisbecker
2008-11-28 9:18 ` Ingo Molnar
2008-11-28 9:25 ` Ingo Molnar
2008-11-28 13:58 ` Frédéric Weisbecker
2008-11-28 19:22 ` Ingo Molnar
2008-11-29 3:40 ` Frédéric Weisbecker
2008-11-29 10:59 ` Peter Zijlstra
2008-11-28 13:55 ` Frédéric Weisbecker
2008-11-28 14:17 ` Ingo Molnar
2008-11-29 3:24 ` Frédéric Weisbecker
2008-11-29 14:19 ` Ingo Molnar
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.