All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
@ 2008-11-27 23:42 Frederic Weisbecker
  2008-11-28  9:18 ` Ingo Molnar
  0 siblings, 1 reply; 11+ messages in thread
From: Frederic Weisbecker @ 2008-11-27 23:42 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel

Impact: increase visibility of the output

This patch applies various changes.

_CPU is now a decimal number, followed by a parenthesis.

_Overhead is now on the second column (gives a good visibility)

_Cost is now on the third column, can't exceed 9999.99 us. It is followed
 by a virtual line based on a "|" character.

_Functions calls are now the last column on the right. This way, we haven't
 dynamic column (which flow is harder to follow) on its right.

_CPU and Overhead have their own option flag. They are default-on but you can
 disable them easily:

 echo nofuncgraph-cpu > trace_options
 echo nofuncgraph-overhead > trace_options

TODO:

_ Refactoring of the thread switch output.
_ Give a default-off option to output the thread and its pid on each row.
_ Provide headers
_ ....

Here is an example of the new trace style (a wider one is provided in attachment):

0)           |             mutex_unlock() {
0)      0.639 us |           __mutex_unlock_slowpath();
0)      1.607 us |         }
0)           |             remove_wait_queue() {
0)      0.616 us |           _spin_lock_irqsave();
0)      0.616 us |           _spin_unlock_irqrestore();
0)      2.779 us |         }
0)      0.495 us |         n_tty_set_room();
0) ! 9999.999 us |       }
0)           |           tty_ldisc_deref() {
0)      0.615 us |         _spin_lock_irqsave();
0)      0.616 us |         _spin_unlock_irqrestore();
0)      2.793 us |       }
0)           |           current_fs_time() {
0)      0.488 us |         current_kernel_time();
0)      0.495 us |         timespec_trunc();
0)      2.486 us |       }
0) ! 9999.999 us |     }
0) ! 9999.999 us |   }
0) ! 9999.999 us | }
0)           |     sys_read() {
0)      0.796 us |   fget_light();
0)           |       vfs_read() {
0)           |         rw_verify_area() {
0)           |           security_file_permission() {
0)      0.488 us |         cap_file_permission();
0)      1.720 us |       }
0)      3.  4 us |     }
0)           |         tty_read() {
0)      0.488 us |       tty_paranoia_check();
0)           |           tty_ldisc_ref_wait() {
0)           |             tty_ldisc_try() {
0)      0.615 us |           _spin_lock_irqsave();
0)      0.615 us |           _spin_unlock_irqrestore();
0)      5.436 us |         }
0)      6.427 us |       }

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 kernel/trace/trace_functions_graph.c |  142 ++++++++++++++++++++++------------
 1 files changed, 93 insertions(+), 49 deletions(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index b958d60..596a3ee 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -14,20 +14,25 @@
 #include "trace.h"
 
 #define TRACE_GRAPH_INDENT	2
-/* Spaces between function call and time duration */
-#define TRACE_GRAPH_TIMESPACE_ENTRY	"                    "
-/* Spaces between function call and closing braces */
-#define TRACE_GRAPH_TIMESPACE_RET	"                               "
 
+/* Flag options */
 #define TRACE_GRAPH_PRINT_OVERRUN	0x1
+#define TRACE_GRAPH_PRINT_CPU		0x2
+#define TRACE_GRAPH_PRINT_OVERHEAD	0x4
+
 static struct tracer_opt trace_opts[] = {
-	/* Display overruns or not */
-	{ TRACER_OPT(overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+	/* Display overruns ? */
+	{ TRACER_OPT(funcgraph-overrun, TRACE_GRAPH_PRINT_OVERRUN) },
+	/* Display CPU ? */
+	{ TRACER_OPT(funcgraph-cpu, TRACE_GRAPH_PRINT_CPU) },
+	/* Display Overhead ? */
+	{ TRACER_OPT(funcgraph-overhead, TRACE_GRAPH_PRINT_OVERHEAD) },
 	{ } /* Empty entry */
 };
 
 static struct tracer_flags tracer_flags = {
-	.val = 0, /* Don't display overruns by default */
+	/* Don't display overruns by default */
+	.val = TRACE_GRAPH_PRINT_CPU | TRACE_GRAPH_PRINT_OVERHEAD,
 	.opts = trace_opts
 };
 
@@ -56,6 +61,36 @@ static void graph_trace_reset(struct trace_array *tr)
 	unregister_ftrace_graph();
 }
 
+static inline int log10_cpu(int nb)
+{
+	if (nb / 100)
+		return 3;
+	if (nb / 10)
+		return 2;
+	return 1;
+}
+
+static enum print_line_t
+print_graph_cpu(struct trace_seq *s, int cpu)
+{
+	int i;
+	int ret;
+	int log10_this = log10_cpu(cpu);
+	int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
+
+
+	for (i = 0; i < log10_all - log10_this; i++) {
+		ret = trace_seq_printf(s, " ");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+	ret = trace_seq_printf(s, "%d) ", cpu);
+	if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	return TRACE_TYPE_HANDLED;
+}
+
+
 /* If the pid changed since the last trace, output this event */
 static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
 {
@@ -67,8 +102,7 @@ static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
 	last_pid[cpu] = pid;
 	comm = trace_find_cmdline(pid);
 
-	return trace_seq_printf(s, "\nCPU[%03d] "
-				    " ------------8<---------- thread %s-%d"
+	return trace_seq_printf(s, "\n------------8<---------- thread %s-%d"
 				    " ------------8<----------\n\n",
 				    cpu, comm, pid);
 }
@@ -86,7 +120,7 @@ trace_branch_is_leaf(struct trace_iterator *iter,
 	if (!ring_iter)
 		return false;
 
-	event = ring_buffer_iter_peek(iter->buffer_iter[iter->cpu], NULL);
+	event = ring_buffer_iter_peek(ring_iter, NULL);
 
 	if (!event)
 		return false;
@@ -108,7 +142,7 @@ static inline int
 print_graph_duration(unsigned long long duration, struct trace_seq *s)
 {
 	unsigned long nsecs_rem = do_div(duration, 1000);
-	return trace_seq_printf(s, "+ %llu.%lu us\n", duration, nsecs_rem);
+	return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
 }
 
 /* Signal a overhead of time execution to the output */
@@ -136,8 +170,8 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 	struct ring_buffer_event *event;
 	struct ftrace_graph_ent *call;
 	unsigned long long duration;
-	int i;
 	int ret;
+	int i;
 
 	event = ring_buffer_read(iter->buffer_iter[iter->cpu], NULL);
 	ret_entry = ring_buffer_event_data(event);
@@ -145,8 +179,19 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 	call = &entry->graph_ent;
 	duration = graph_ret->rettime - graph_ret->calltime;
 
+	/* Must not exceed 8 characters: 9999.999 us */
+	if (duration > 10000000ULL)
+		duration = 9999999ULL;
+
 	/* Overhead */
-	ret = print_graph_overhead(duration, s);
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+		ret = print_graph_overhead(duration, s);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	/* Duration */
+	ret = print_graph_duration(duration, s);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -161,16 +206,7 @@ print_graph_entry_leaf(struct trace_iterator *iter,
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	ret = trace_seq_printf(s, "();");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Duration */
-	ret = trace_seq_printf(s, TRACE_GRAPH_TIMESPACE_ENTRY);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ret = print_graph_duration(duration, s);
+	ret = trace_seq_printf(s, "();\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -186,9 +222,14 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
 	struct ftrace_graph_ent *call = &entry->graph_ent;
 
 	/* No overhead */
-	ret = trace_seq_printf(s, "  ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+		ret = trace_seq_printf(s, "  ");
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	/* No time */
+	ret = trace_seq_printf(s, "        |     ");
 
 	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {
@@ -201,12 +242,7 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	ret = trace_seq_printf(s, "() {");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* No duration to print at this state */
-	ret = trace_seq_printf(s, TRACE_GRAPH_TIMESPACE_ENTRY "-\n");
+	ret = trace_seq_printf(s, "() {\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -220,12 +256,16 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s,
 	int ret;
 	struct trace_entry *ent = iter->ent;
 
+	/* Pid */
 	if (!verif_pid(s, ent->pid, cpu))
 		return TRACE_TYPE_PARTIAL_LINE;
 
-	ret = trace_seq_printf(s, "CPU[%03d] ", cpu);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
+	/* Cpu */
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+		ret = print_graph_cpu(s, cpu);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
 
 	if (trace_branch_is_leaf(iter, field))
 		return print_graph_entry_leaf(iter, field, s);
@@ -242,17 +282,30 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 	int ret;
 	unsigned long long duration = trace->rettime - trace->calltime;
 
+	/* Must not exceed 8 characters: xxxx.yyy us */
+	if (duration > 10000000ULL)
+		duration = 9999999ULL;
+
 	/* Pid */
 	if (!verif_pid(s, ent->pid, cpu))
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	/* Cpu */
-	ret = trace_seq_printf(s, "CPU[%03d] ", cpu);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_CPU) {
+		ret = print_graph_cpu(s, cpu);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
 
 	/* Overhead */
-	ret = print_graph_overhead(duration, s);
+	if (tracer_flags.val & TRACE_GRAPH_PRINT_OVERHEAD) {
+		ret = print_graph_overhead(duration, s);
+		if (!ret)
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
+
+	/* Duration */
+	ret = print_graph_duration(duration, s);
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
@@ -263,16 +316,7 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s,
 			return TRACE_TYPE_PARTIAL_LINE;
 	}
 
-	ret = trace_seq_printf(s, "} ");
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	/* Duration */
-	ret = trace_seq_printf(s, TRACE_GRAPH_TIMESPACE_RET);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ret = print_graph_duration(duration, s);
+	ret = trace_seq_printf(s, "}\n");
 	if (!ret)
 		return TRACE_TYPE_PARTIAL_LINE;
 
-- 
1.5.2.5

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-27 23:42 [PATCH] tracing/function-graph-tracer: adjustments of the trace informations Frederic Weisbecker
@ 2008-11-28  9:18 ` Ingo Molnar
  2008-11-28  9:25   ` Ingo Molnar
  2008-11-28 13:55   ` Frédéric Weisbecker
  0 siblings, 2 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-11-28  9:18 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> Impact: increase visibility of the output
> 
> This patch applies various changes.
> 
> _CPU is now a decimal number, followed by a parenthesis.
> 
> _Overhead is now on the second column (gives a good visibility)
> 
> _Cost is now on the third column, can't exceed 9999.99 us. It is followed
>  by a virtual line based on a "|" character.
> 
> _Functions calls are now the last column on the right. This way, we haven't
>  dynamic column (which flow is harder to follow) on its right.
> 
> _CPU and Overhead have their own option flag. They are default-on but you can
>  disable them easily:
> 
>  echo nofuncgraph-cpu > trace_options
>  echo nofuncgraph-overhead > trace_options
> 
> TODO:
> 
> _ Refactoring of the thread switch output.
> _ Give a default-off option to output the thread and its pid on each row.
> _ Provide headers
> _ ....
> 
> Here is an example of the new trace style (a wider one is provided in attachment):
> 
> 0)           |             mutex_unlock() {
> 0)      0.639 us |           __mutex_unlock_slowpath();
> 0)      1.607 us |         }
> 0)           |             remove_wait_queue() {
> 0)      0.616 us |           _spin_lock_irqsave();
> 0)      0.616 us |           _spin_unlock_irqrestore();
> 0)      2.779 us |         }
> 0)      0.495 us |         n_tty_set_room();
> 0) ! 9999.999 us |       }
> 0)           |           tty_ldisc_deref() {
> 0)      0.615 us |         _spin_lock_irqsave();
> 0)      0.616 us |         _spin_unlock_irqrestore();
> 0)      2.793 us |       }
> 0)           |           current_fs_time() {
> 0)      0.488 us |         current_kernel_time();
> 0)      0.495 us |         timespec_trunc();
> 0)      2.486 us |       }
> 0) ! 9999.999 us |     }
> 0) ! 9999.999 us |   }
> 0) ! 9999.999 us | }
> 0)           |     sys_read() {
> 0)      0.796 us |   fget_light();
> 0)           |       vfs_read() {
> 0)           |         rw_verify_area() {
> 0)           |           security_file_permission() {
> 0)      0.488 us |         cap_file_permission();
> 0)      1.720 us |       }
> 0)      3.  4 us |     }
> 0)           |         tty_read() {
> 0)      0.488 us |       tty_paranoia_check();
> 0)           |           tty_ldisc_ref_wait() {
> 0)           |             tty_ldisc_try() {
> 0)      0.615 us |           _spin_lock_irqsave();
> 0)      0.615 us |           _spin_unlock_irqrestore();
> 0)      5.436 us |         }
> 0)      6.427 us |       }

very nice! Applied to tip/tracing/function-graph-tracer

a few comments. 

Firstly, what do you think about the additional tweaks i did - see the 
patch below?

Before:

0)           |     sys_read() {
0)      0.796 us |   fget_light();
0)           |       vfs_read() {
0)           |         rw_verify_area() {
0)           |           security_file_permission() {
------------8<---------- thread sshd-1755 ------------8<----------

After:

 0)               |  sys_read() {
 0)      0.796 us |    fget_light();
 0)               |    vfs_read() {
 0)               |      rw_verify_area() {
 0)               |        security_file_permission() {
 ------------------------------------------
 | 1)  migration/0--1  =>  sshd-1755
 ------------------------------------------

Secondly:

> +	/* Must not exceed 8 characters: xxxx.yyy us */
> +	if (duration > 10000000ULL)
> +		duration = 9999999ULL;

10 milliseconds isnt much or full system calls, etc.- so i believe the 
rule should be what i outlined in an earlier mail. The relevant 
transition points go like this:

     0.000 
  xxxx.yyy
  9999.999
  10000.00
  xxxxx.yy
  99999.99
  100000.0
  xxxxxx.y
  999999.9
   1000000
   9999999
  10000000
  xxxxxxxx
  99999999 [ 100 seconds ]
  100000000 ... up to infinity

this way we can get up to 99999999 usecs with the same fixed width - 
or 100 seconds. _That_ is enough in practice.

But even beyond that we should print it all out: we should still not 
clip actual information but instead shift the line to the right. The 
slightly inconsistent line is not a big problem - we want a 100 
seconds delay to stand out anyway ;-)

The moving decimal point above 10 milliseconds is not a big problem 
with the '+' and '!' marker. Maybe add a '!!' marker to these lines?

	Ingo

------------------->
>From d51090b34602a20984ab0312ef04e47069c0aec6 Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@elte.hu>
Date: Fri, 28 Nov 2008 09:55:16 +0100
Subject: [PATCH] tracing/function-graph-tracer: more output tweaks

Impact: prettify the output some more

Before:

0)           |     sys_read() {
0)      0.796 us |   fget_light();
0)           |       vfs_read() {
0)           |         rw_verify_area() {
0)           |           security_file_permission() {
------------8<---------- thread sshd-1755 ------------8<----------

After:

 0)               |  sys_read() {
 0)      0.796 us |    fget_light();
 0)               |    vfs_read() {
 0)               |      rw_verify_area() {
 0)               |        security_file_permission() {
 ------------------------------------------
 | 1)  migration/0--1  =>  sshd-1755
 ------------------------------------------

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/trace/trace_functions_graph.c |   45 ++++++++++++++++++++++++++++-----
 1 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 596a3ee..894b50b 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -79,6 +79,19 @@ print_graph_cpu(struct trace_seq *s, int cpu)
 	int log10_all = log10_cpu(cpus_weight_nr(cpu_online_map));
 
 
+	/*
+	 * Start with a space character - to make it stand out
+	 * to the right a bit when trace output is pasted into
+	 * email:
+	 */
+	ret = trace_seq_printf(s, " ");
+
+	/*
+	 * Tricky - we space the CPU field according to the max
+	 * number of online CPUs. On a 2-cpu system it would take
+	 * a maximum of 1 digit - on a 128 cpu system it would
+	 * take up to 3 digits:
+	 */
 	for (i = 0; i < log10_all - log10_this; i++) {
 		ret = trace_seq_printf(s, " ");
 		if (!ret)
@@ -86,7 +99,8 @@ print_graph_cpu(struct trace_seq *s, int cpu)
 	}
 	ret = trace_seq_printf(s, "%d) ", cpu);
 	if (!ret)
-			return TRACE_TYPE_PARTIAL_LINE;
+		return TRACE_TYPE_PARTIAL_LINE;
+
 	return TRACE_TYPE_HANDLED;
 }
 
@@ -94,17 +108,34 @@ print_graph_cpu(struct trace_seq *s, int cpu)
 /* If the pid changed since the last trace, output this event */
 static int verif_pid(struct trace_seq *s, pid_t pid, int cpu)
 {
-	char *comm;
+	char *comm, *prev_comm;
+	pid_t prev_pid;
+	int ret;
 
 	if (last_pid[cpu] != -1 && last_pid[cpu] == pid)
 		return 1;
 
+	prev_pid = last_pid[cpu];
 	last_pid[cpu] = pid;
+
 	comm = trace_find_cmdline(pid);
+	prev_comm = trace_find_cmdline(prev_pid);
 
-	return trace_seq_printf(s, "\n------------8<---------- thread %s-%d"
-				    " ------------8<----------\n\n",
-				    cpu, comm, pid);
+/*
+ * Context-switch trace line:
+
+ ------------------------------------------
+ | 1)  migration/0--1  =>  sshd-1755
+ ------------------------------------------
+
+ */
+	ret = trace_seq_printf(s,
+		" ------------------------------------------\n");
+	ret += trace_seq_printf(s, " | %d)  %s-%d  =>  %s-%d\n",
+				  cpu, prev_comm, prev_pid, comm, pid);
+	ret += trace_seq_printf(s,
+		" ------------------------------------------\n\n");
+	return ret;
 }
 
 static bool
@@ -142,7 +173,7 @@ static inline int
 print_graph_duration(unsigned long long duration, struct trace_seq *s)
 {
 	unsigned long nsecs_rem = do_div(duration, 1000);
-	return trace_seq_printf(s, "%4llu.%3lu us | ", duration, nsecs_rem);
+	return trace_seq_printf(s, "%4llu.%3lu us |  ", duration, nsecs_rem);
 }
 
 /* Signal a overhead of time execution to the output */
@@ -229,7 +260,7 @@ print_graph_entry_nested(struct ftrace_graph_ent_entry *entry,
 	}
 
 	/* No time */
-	ret = trace_seq_printf(s, "        |     ");
+	ret = trace_seq_printf(s, "            |  ");
 
 	/* Function */
 	for (i = 0; i < call->depth * TRACE_GRAPH_INDENT; i++) {

^ permalink raw reply related	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-28  9:18 ` Ingo Molnar
@ 2008-11-28  9:25   ` Ingo Molnar
  2008-11-28 13:58     ` Frédéric Weisbecker
  2008-11-28 19:22     ` Ingo Molnar
  2008-11-28 13:55   ` Frédéric Weisbecker
  1 sibling, 2 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-11-28  9:25 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel


another thing which i'm sure you are aware of:

# tracer: function_graph
#
#           TASK-PID    CPU#    TIMESTAMP  FUNCTION
#              | |       |          |         |
 ------------------------------------------
 | 1)  S99local--1  =>  ssh-1724
 ------------------------------------------
 
 1)      0.867 us |        }
 1)               |        mark_page_accessed() {
 1)      0.293 us |          constant_test_bit();
 1)      0.294 us |          constant_test_bit();
[...]

the header description is now out of of sync :-)

Also, we should probably not start the trace with a PID-switch event. 
the last_pid[] markers are probably out of sync when we start printing 
the trace.

So the output should probably be something like:

# tracer: function_graph
#
# CPU      COST     FUNCTION-GRAPH
# |          |       |        

 1)      0.867 us |        }
 1)               |        mark_page_accessed() {
 1)      0.293 us |          constant_test_bit();
 1)      0.294 us |          constant_test_bit();
[...]

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-28  9:18 ` Ingo Molnar
  2008-11-28  9:25   ` Ingo Molnar
@ 2008-11-28 13:55   ` Frédéric Weisbecker
  2008-11-28 14:17     ` Ingo Molnar
  1 sibling, 1 reply; 11+ messages in thread
From: Frédéric Weisbecker @ 2008-11-28 13:55 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel

2008/11/28 Ingo Molnar <mingo@elte.hu>:
> Firstly, what do you think about the additional tweaks i did - see the
> patch below?
>
> Before:
>
> 0)           |     sys_read() {
> 0)      0.796 us |   fget_light();
> 0)           |       vfs_read() {
> 0)           |         rw_verify_area() {
> 0)           |           security_file_permission() {
> ------------8<---------- thread sshd-1755 ------------8<----------
>
> After:
>
>  0)               |  sys_read() {
>  0)      0.796 us |    fget_light();
>  0)               |    vfs_read() {
>  0)               |      rw_verify_area() {
>  0)               |        security_file_permission() {
>  ------------------------------------------
>  | 1)  migration/0--1  =>  sshd-1755
>  ------------------------------------------


Thanks! That good, but I remember you wanted a limited number of
characters for thread name/pid couple?


> Secondly:
>
>> +     /* Must not exceed 8 characters: xxxx.yyy us */
>> +     if (duration > 10000000ULL)
>> +             duration = 9999999ULL;
>
> 10 milliseconds isnt much or full system calls, etc.- so i believe the
> rule should be what i outlined in an earlier mail. The relevant
> transition points go like this:
>
>     0.000
>  xxxx.yyy
>  9999.999
>  10000.00
>  xxxxx.yy
>  99999.99
>  100000.0
>  xxxxxx.y
>  999999.9
>   1000000
>   9999999
>  10000000
>  xxxxxxxx
>  99999999 [ 100 seconds ]
>  100000000 ... up to infinity
>
> this way we can get up to 99999999 usecs with the same fixed width -
> or 100 seconds. _That_ is enough in practice.
>
> But even beyond that we should print it all out: we should still not
> clip actual information but instead shift the line to the right. The
> slightly inconsistent line is not a big problem - we want a 100
> seconds delay to stand out anyway ;-)
>
> The moving decimal point above 10 milliseconds is not a big problem
> with the '+' and '!' marker. Maybe add a '!!' marker to these lines?


Yeah, I was sure I misunderstood your idea about it in your last email :-)
Ok. I will do so, I felt a bit uncomfortable with the fixed width above a limit,
so I like it this way. Long sleeping functions will be rare enough in the trace
to let us move a bit the columns to the right in such cases...

Thanks.

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-28  9:25   ` Ingo Molnar
@ 2008-11-28 13:58     ` Frédéric Weisbecker
  2008-11-28 19:22     ` Ingo Molnar
  1 sibling, 0 replies; 11+ messages in thread
From: Frédéric Weisbecker @ 2008-11-28 13:58 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel

2008/11/28 Ingo Molnar <mingo@elte.hu>:
>
> another thing which i'm sure you are aware of:
>
> # tracer: function_graph
> #
> #           TASK-PID    CPU#    TIMESTAMP  FUNCTION
> #              | |       |          |         |
>  ------------------------------------------
>  | 1)  S99local--1  =>  ssh-1724
>  ------------------------------------------
>
>  1)      0.867 us |        }
>  1)               |        mark_page_accessed() {
>  1)      0.293 us |          constant_test_bit();
>  1)      0.294 us |          constant_test_bit();
> [...]
>
> the header description is now out of of sync :-)
>
> Also, we should probably not start the trace with a PID-switch event.
> the last_pid[] markers are probably out of sync when we start printing
> the trace.
>
> So the output should probably be something like:
>
> # tracer: function_graph
> #
> # CPU      COST     FUNCTION-GRAPH
> # |          |       |
>
>  1)      0.867 us |        }
>  1)               |        mark_page_accessed() {
>  1)      0.293 us |          constant_test_bit();
>  1)      0.294 us |          constant_test_bit();
> [...]


Ok I will just have to update something in the open callback....

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-28 13:55   ` Frédéric Weisbecker
@ 2008-11-28 14:17     ` Ingo Molnar
  2008-11-29  3:24       ` Frédéric Weisbecker
  0 siblings, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2008-11-28 14:17 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> 2008/11/28 Ingo Molnar <mingo@elte.hu>:
> > Firstly, what do you think about the additional tweaks i did - see the
> > patch below?
> >
> > Before:
> >
> > 0)           |     sys_read() {
> > 0)      0.796 us |   fget_light();
> > 0)           |       vfs_read() {
> > 0)           |         rw_verify_area() {
> > 0)           |           security_file_permission() {
> > ------------8<---------- thread sshd-1755 ------------8<----------
> >
> > After:
> >
> >  0)               |  sys_read() {
> >  0)      0.796 us |    fget_light();
> >  0)               |    vfs_read() {
> >  0)               |      rw_verify_area() {
> >  0)               |        security_file_permission() {
> >  ------------------------------------------
> >  | 1)  migration/0--1  =>  sshd-1755
> >  ------------------------------------------
> 
> Thanks! That good, but I remember you wanted a limited number of 
> characters for thread name/pid couple?

Yeah, that needs updating. I supposed you'd be working on the task+pid 
column anyway, and do a helper function that prints the task-pid couple. 
Then that helper can be used in the context-switch case too to print out 
a uniform ID.

Another thing that would be nice is to separate out the "cpu)" printing 
bits into a helper function. Right now what exists cannot be used in a 
seq-manner, so i couldnt reuse it.

Anyway, i wont change it (just wanted to get a final-ish output to look 
at) so feel free to clean it all up thoroughly :)

Today i played with the graph-tracer on a testbox _way_ too much. 
Combined with the wildcard filter it's _really_ addictive. Kernel 
developers, beware!

One thing that came up: it would be nice to have an 'inverted' wildcard 
to punch out certain functions from the filter list. For example i did in 
the shell:

 $ echo 'sys_*'    >> set_ftrace_filter
 $ echo '*socket*' >> set_ftrace_filter
 $ echo '*timer*'  >> set_ftrace_filter
 $ echo '*skb*'    >> set_ftrace_filter

and looked at the trace and found that certain functions are too verbose 
and not really interesting - so i wanted to exclude them.

We've got set_ftrace_notrace but it's not really an inverse wildcard but 
a complementary set of filter functions - which is not the same and not 
as easy to think about as a single set of filter functions.

what i think would be more natural to do is via the filter file itself, 
via an extension like:

 $ echo '!timer_*' >> set_ftrace_filter

which would eliminate the functions matching that negative pattern. Such 
negative wildcards would act on the current set of functions, while 
set_ftrace_notrace is permanent and cannot be used to shape an arbitrary 
set of functions iteratively.

> > Secondly:
> >
> >> +     /* Must not exceed 8 characters: xxxx.yyy us */
> >> +     if (duration > 10000000ULL)
> >> +             duration = 9999999ULL;
> >
> > 10 milliseconds isnt much or full system calls, etc.- so i believe the
> > rule should be what i outlined in an earlier mail. The relevant
> > transition points go like this:
> >
> >     0.000
> >  xxxx.yyy
> >  9999.999
> >  10000.00
> >  xxxxx.yy
> >  99999.99
> >  100000.0
> >  xxxxxx.y
> >  999999.9
> >   1000000
> >   9999999
> >  10000000
> >  xxxxxxxx
> >  99999999 [ 100 seconds ]
> >  100000000 ... up to infinity
> >
> > this way we can get up to 99999999 usecs with the same fixed width -
> > or 100 seconds. _That_ is enough in practice.
> >
> > But even beyond that we should print it all out: we should still not
> > clip actual information but instead shift the line to the right. The
> > slightly inconsistent line is not a big problem - we want a 100
> > seconds delay to stand out anyway ;-)
> >
> > The moving decimal point above 10 milliseconds is not a big problem
> > with the '+' and '!' marker. Maybe add a '!!' marker to these lines?
> 
> Yeah, I was sure I misunderstood your idea about it in your last email 
> :-) Ok. I will do so, I felt a bit uncomfortable with the fixed width 
> above a limit, so I like it this way. Long sleeping functions will be 
> rare enough in the trace to let us move a bit the columns to the right 
> in such cases...

yeah.

The other small detail in the scheme i suggested is that the loss of 
nanosec precision and its gradual weakening to usec precision is 
immaterial in practice as well: we still have at least 9 significant 
digits, so the relative precision is around 1:1000000000 - more than 
enough.

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-28  9:25   ` Ingo Molnar
  2008-11-28 13:58     ` Frédéric Weisbecker
@ 2008-11-28 19:22     ` Ingo Molnar
  2008-11-29  3:40       ` Frédéric Weisbecker
  1 sibling, 1 reply; 11+ messages in thread
From: Ingo Molnar @ 2008-11-28 19:22 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel


another detail: it would be nice to annotate IRQ entries some more.

right now if a syscall is interrupted by a hardware interrupt, it looks 
like this:

 0)               |              skb_dequeue() {
 0)      0.365 us |                _spin_lock_irqsave();
 0)      0.422 us |                _spin_unlock_irqrestore();
 0)      1.912 us |              }
 0)               |              memcpy_toiovec() {
 0)               |                copy_to_user() {
 0)               |                  smp_apic_timer_interrupt() {
 0)      0.450 us |                    native_apic_mem_write();
 0)               |                    irq_enter() {
 0)      0.410 us |                      idle_cpu();
 0)      1.167 us |                    }
 0)               |                    hrtimer_interrupt() {

the natural nesting is nice, but easy to miss. It would be nice to have 
something like:

 0)               |              skb_dequeue() {
 0)      0.365 us |                _spin_lock_irqsave();
 0)      0.422 us |                _spin_unlock_irqrestore();
 0)      1.912 us |              }
 0)               |              memcpy_toiovec() {
 0)               |                copy_to_user() {
 0)             =====>               smp_apic_timer_interrupt() {
 0)      0.450 us |                    native_apic_mem_write();
 0)               |                    irq_enter() {
 0)      0.410 us |                      idle_cpu();
 0)      1.167 us |                    }
 0)               |                    hrtimer_interrupt() {

and perhaps something like this at the end of the irq frame:

 0)      0.408 us |                          _local_bh_enable();
 0) +   13.139 us |                        }
 0) +   13.887 us |                      }
 0)      0.406 us |                      idle_cpu();
 0) +   15.373 us |                    }
 0) +   74.564 us |                  }
 0)               |                  _cond_resched() {
 0)               |                    need_resched() {
 0)      0.423 us |                      constant_test_bit();
 0)      1.164 us |                    }
 0)      1.915 us <===               } /* IRQ 1 end */
 0) +   78.396 us |                }
 0) +   79.123 us |              }
 0)      0.387 us |              skb_pull();
 0)               |              kfree_skb() {

?

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-28 14:17     ` Ingo Molnar
@ 2008-11-29  3:24       ` Frédéric Weisbecker
  2008-11-29 14:19         ` Ingo Molnar
  0 siblings, 1 reply; 11+ messages in thread
From: Frédéric Weisbecker @ 2008-11-29  3:24 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel

2008/11/28 Ingo Molnar <mingo@elte.hu>:
> Yeah, that needs updating. I supposed you'd be working on the task+pid
> column anyway, and do a helper function that prints the task-pid couple.
> Then that helper can be used in the context-switch case too to print out
> a uniform ID.


Ok.



> Another thing that would be nice is to separate out the "cpu)" printing
> bits into a helper function. Right now what exists cannot be used in a
> seq-manner, so i couldnt reuse it.
>


Isn't it the case with print_graph_cpu() ?

> Anyway, i wont change it (just wanted to get a final-ish output to look
> at) so feel free to clean it all up thoroughly :)
>
> Today i played with the graph-tracer on a testbox _way_ too much.
> Combined with the wildcard filter it's _really_ addictive. Kernel
> developers, beware!


:-)


>
> One thing that came up: it would be nice to have an 'inverted' wildcard
> to punch out certain functions from the filter list. For example i did in
> the shell:
>
>  $ echo 'sys_*'    >> set_ftrace_filter
>  $ echo '*socket*' >> set_ftrace_filter
>  $ echo '*timer*'  >> set_ftrace_filter
>  $ echo '*skb*'    >> set_ftrace_filter
>
> and looked at the trace and found that certain functions are too verbose
> and not really interesting - so i wanted to exclude them.
>
> We've got set_ftrace_notrace but it's not really an inverse wildcard but
> a complementary set of filter functions - which is not the same and not
> as easy to think about as a single set of filter functions.
>
> what i think would be more natural to do is via the filter file itself,
> via an extension like:
>
>  $ echo '!timer_*' >> set_ftrace_filter
>
> which would eliminate the functions matching that negative pattern. Such
> negative wildcards would act on the current set of functions, while
> set_ftrace_notrace is permanent and cannot be used to shape an arbitrary
> set of functions iteratively.


Ok, I'll add it in my workqueue (unless Steven does it).

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-28 19:22     ` Ingo Molnar
@ 2008-11-29  3:40       ` Frédéric Weisbecker
  2008-11-29 10:59         ` Peter Zijlstra
  0 siblings, 1 reply; 11+ messages in thread
From: Frédéric Weisbecker @ 2008-11-29  3:40 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel

2008/11/28 Ingo Molnar <mingo@elte.hu>:
>
> another detail: it would be nice to annotate IRQ entries some more.
>
> right now if a syscall is interrupted by a hardware interrupt, it looks
> like this:
>
>  0)               |              skb_dequeue() {
>  0)      0.365 us |                _spin_lock_irqsave();
>  0)      0.422 us |                _spin_unlock_irqrestore();
>  0)      1.912 us |              }
>  0)               |              memcpy_toiovec() {
>  0)               |                copy_to_user() {
>  0)               |                  smp_apic_timer_interrupt() {
>  0)      0.450 us |                    native_apic_mem_write();
>  0)               |                    irq_enter() {
>  0)      0.410 us |                      idle_cpu();
>  0)      1.167 us |                    }
>  0)               |                    hrtimer_interrupt() {
>
> the natural nesting is nice, but easy to miss. It would be nice to have
> something like:
>
>  0)               |              skb_dequeue() {
>  0)      0.365 us |                _spin_lock_irqsave();
>  0)      0.422 us |                _spin_unlock_irqrestore();
>  0)      1.912 us |              }
>  0)               |              memcpy_toiovec() {
>  0)               |                copy_to_user() {
>  0)             =====>               smp_apic_timer_interrupt() {
>  0)      0.450 us |                    native_apic_mem_write();
>  0)               |                    irq_enter() {
>  0)      0.410 us |                      idle_cpu();
>  0)      1.167 us |                    }
>  0)               |                    hrtimer_interrupt() {
>
> and perhaps something like this at the end of the irq frame:
>
>  0)      0.408 us |                          _local_bh_enable();
>  0) +   13.139 us |                        }
>  0) +   13.887 us |                      }
>  0)      0.406 us |                      idle_cpu();
>  0) +   15.373 us |                    }
>  0) +   74.564 us |                  }
>  0)               |                  _cond_resched() {
>  0)               |                    need_resched() {
>  0)      0.423 us |                      constant_test_bit();
>  0)      1.164 us |                    }
>  0)      1.915 us <===               } /* IRQ 1 end */
>  0) +   78.396 us |                }
>  0) +   79.123 us |              }
>  0)      0.387 us |              skb_pull();
>  0)               |              kfree_skb() {
>
> ?


I tried to figure out a way to signal a hardirq in a trace, I like
this arrow but It
breaks the indentation.
I would suggest:

  0)               |              skb_dequeue() {
  0)      0.365 us |                _spin_lock_irqsave();
  0)      0.422 us |                _spin_unlock_irqrestore();
  0)      1.912 us |              }
  0)               |              memcpy_toiovec() {
  0)               |                copy_to_user() {
             =====>
  0)               |               smp_apic_timer_interrupt() {
  0)      0.450 us |                    native_apic_mem_write();
  0)               |                    irq_enter() {
  0)      0.410 us |                      idle_cpu();
  0)      1.167 us |                    }
  0)               |                    hrtimer_interrupt() {

Your idea solves a part of one of the features I wanted to add: why
not using a single character column to print
the current context: workqueue, syscall, kernel thread, tasklet, hardirq .....

^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-29  3:40       ` Frédéric Weisbecker
@ 2008-11-29 10:59         ` Peter Zijlstra
  0 siblings, 0 replies; 11+ messages in thread
From: Peter Zijlstra @ 2008-11-29 10:59 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Ingo Molnar, Steven Rostedt, Tim Bird, Linux Kernel

On Sat, 2008-11-29 at 04:40 +0100, Frédéric Weisbecker wrote:
> 2008/11/28 Ingo Molnar <mingo@elte.hu>:

> I tried to figure out a way to signal a hardirq in a trace, I like
> this arrow but It
> breaks the indentation.
> I would suggest:
> 
>   0)               |              skb_dequeue() {
>   0)      0.365 us |                _spin_lock_irqsave();
>   0)      0.422 us |                _spin_unlock_irqrestore();
>   0)      1.912 us |              }
>   0)               |              memcpy_toiovec() {
>   0)               |                copy_to_user() {
>              =====>
>   0)               |               smp_apic_timer_interrupt() {
>   0)      0.450 us |                    native_apic_mem_write();
>   0)               |                    irq_enter() {
>   0)      0.410 us |                      idle_cpu();
>   0)      1.167 us |                    }
>   0)               |                    hrtimer_interrupt() {
> 
> Your idea solves a part of one of the features I wanted to add: why
> not using a single character column to print
> the current context: workqueue, syscall, kernel thread, tasklet, hardirq .....

That was my initial suggestion too, like the latency tracer has.

Then again, most of that would be obvious from the comm/pid column, that
will show preemption and all those things.

But showing Hard,Soft irq disabled and perhaps the preempt count would
be handy.


^ permalink raw reply	[flat|nested] 11+ messages in thread

* Re: [PATCH] tracing/function-graph-tracer: adjustments of the trace informations
  2008-11-29  3:24       ` Frédéric Weisbecker
@ 2008-11-29 14:19         ` Ingo Molnar
  0 siblings, 0 replies; 11+ messages in thread
From: Ingo Molnar @ 2008-11-29 14:19 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Steven Rostedt, Tim Bird, Peter Zijlstra, Linux Kernel


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> > Another thing that would be nice is to separate out the "cpu)" 
> > printing bits into a helper function. Right now what exists cannot be 
> > used in a seq-manner, so i couldnt reuse it.
> >
> 
> Isn't it the case with print_graph_cpu() ?

yeah, that should be okay. I didnt use it in verif_pid() because it 
returns print_line_t - but i guess it would be better to fix verif_pid() 
to check for !ret instead of the ret += thing i added.

	Ingo

^ permalink raw reply	[flat|nested] 11+ messages in thread

end of thread, other threads:[~2008-11-29 14:20 UTC | newest]

Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-11-27 23:42 [PATCH] tracing/function-graph-tracer: adjustments of the trace informations Frederic Weisbecker
2008-11-28  9:18 ` Ingo Molnar
2008-11-28  9:25   ` Ingo Molnar
2008-11-28 13:58     ` Frédéric Weisbecker
2008-11-28 19:22     ` Ingo Molnar
2008-11-29  3:40       ` Frédéric Weisbecker
2008-11-29 10:59         ` Peter Zijlstra
2008-11-28 13:55   ` Frédéric Weisbecker
2008-11-28 14:17     ` Ingo Molnar
2008-11-29  3:24       ` Frédéric Weisbecker
2008-11-29 14:19         ` Ingo Molnar

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.