* [PATCH] new irq tracer
@ 2009-02-18 19:53 Jason Baron
2009-02-18 20:08 ` Steven Rostedt
` (3 more replies)
0 siblings, 4 replies; 48+ messages in thread
From: Jason Baron @ 2009-02-18 19:53 UTC (permalink / raw)
To: mingo, rostedt; +Cc: linux-kernel, acme, fweisbec, fche, peterz, compudj
hi,
Using a copule of tracepoints i've implemented an 'irq tracer' which has
the following column format:
[time][irq][action][handled][cpu][length]
for example on my system:
[ 1038.927248381] 28 eth0 1 3 0.000002745
[ 1038.927544688] 28 eth0 1 3 0.000002520
[ 1038.927593609] 28 eth0 1 3 0.000002509
[ 1038.974087675] 29 ahci 1 1 0.000013135
[ 1038.974288475] 29 ahci 1 1 0.000005542
[ 1038.974414324] 29 ahci 1 1 0.000007953
I think this patch is useful in understanding irq system behavior, and
for tracking down irq driver handlers that are firing too often or
spending too much time handling an irq. This approach could be extended
to the timer irq and for softirqs...
thanks,
-Jason
Signed-off-by: Jason Baron <jbaron@redhat.com>
---
include/trace/irq.h | 24 +++++++
kernel/irq/handle.c | 6 ++
kernel/trace/Kconfig | 7 ++
kernel/trace/Makefile | 1
kernel/trace/trace.h | 8 ++
kernel/trace/trace_irq.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++
6 files changed, 197 insertions(+), 0 deletions(-)
create mode 100644 include/trace/irq.h
create mode 100644 kernel/trace/trace_irq.c
diff --git a/include/trace/irq.h b/include/trace/irq.h
new file mode 100644
index 0000000..e4669c2
--- /dev/null
+++ b/include/trace/irq.h
@@ -0,0 +1,24 @@
+#ifndef _TRACE_IRQ_H
+#define _TRACE_IRQ_H
+
+#include <linux/interrupt.h>
+#include <linux/tracepoint.h>
+
+struct irq_trace {
+ int retval;
+ int irq;
+ const char *name;
+ int cpu;
+ int handled;
+ struct timespec duration;
+ struct timespec start_time;
+};
+
+DECLARE_TRACE(irq_entry,
+ TPPROTO(unsigned int id),
+ TPARGS(id));
+DECLARE_TRACE(irq_exit,
+ TPPROTO(unsigned int id, irqreturn_t retval),
+ TPARGS(id, retval));
+
+#endif /* _TRACE_IRQ_H */
diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index fdff380..34053b5 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -18,6 +18,7 @@
#include <linux/rculist.h>
#include <linux/hash.h>
#include <linux/bootmem.h>
+#include <trace/irq.h>
#include "internals.h"
@@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id)
return IRQ_NONE;
}
+DEFINE_TRACE(irq_entry);
+DEFINE_TRACE(irq_exit);
+
/**
* handle_IRQ_event - irq action chain handler
* @irq: the interrupt number
@@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
local_irq_enable_in_hardirq();
do {
+ trace_irq_entry(irq);
ret = action->handler(irq, action->dev_id);
+ trace_irq_exit(irq, ret);
if (ret == IRQ_HANDLED)
status |= action->flags;
retval |= ret;
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 620f9cd..f62165c 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -239,6 +239,13 @@ config POWER_TRACER
power management decisions, specifically the C-state and P-state
behavior.
+config IRQ_TRACER
+ bool "Trace irq latencies"
+ depends on DEBUG_KERNEL
+ depends on X86
+ select TRACING
+ help
+ This tracer helps developers anaylze irq latencies.
config STACK_TRACER
bool "Trace max stack"
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 627090b..58b9f7e 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o
obj-$(CONFIG_KMEMTRACE) += kmemtrace.o
obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o
obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o
+obj-$(CONFIG_IRQ_TRACER) += trace_irq.o
libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index dbff020..6a34aaa 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -11,6 +11,7 @@
#include <trace/boot.h>
#include <trace/kmemtrace.h>
#include <trace/power.h>
+#include <trace/irq.h>
enum trace_type {
__TRACE_FIRST_TYPE = 0,
@@ -33,6 +34,7 @@ enum trace_type {
TRACE_KMEM_ALLOC,
TRACE_KMEM_FREE,
TRACE_POWER,
+ TRACE_IRQ,
TRACE_BLK,
__TRACE_LAST_TYPE,
@@ -173,6 +175,11 @@ struct trace_power {
struct power_trace state_data;
};
+struct trace_irq {
+ struct trace_entry ent;
+ struct irq_trace irq_data;
+};
+
struct kmemtrace_alloc_entry {
struct trace_entry ent;
enum kmemtrace_type_id type_id;
@@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void);
TRACE_GRAPH_RET); \
IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\
IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \
+ IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ); \
IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \
TRACE_KMEM_ALLOC); \
IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \
diff --git a/kernel/trace/trace_irq.c b/kernel/trace/trace_irq.c
new file mode 100644
index 0000000..1808316
--- /dev/null
+++ b/kernel/trace/trace_irq.c
@@ -0,0 +1,151 @@
+/*
+ * Copyright (C) 2009 Red Hat, Inc., Jason Baron <jbaron@redhat.com>
+ */
+
+#include <linux/init.h>
+#include <linux/debugfs.h>
+#include <trace/irq.h>
+#include <linux/kallsyms.h>
+#include <linux/module.h>
+#include <linux/interrupt.h>
+#include <linux/ktime.h>
+
+#include "trace.h"
+#include "trace_output.h"
+
+static struct trace_array *irq_trace;
+static int __read_mostly trace_irq_enabled;
+static ktime_t start_times[NR_IRQS];
+
+static void probe_irq_entry(unsigned int irq)
+{
+ if (!trace_irq_enabled)
+ return;
+ start_times[irq] = ktime_get();
+}
+
+static void probe_irq_exit(unsigned int irq, irqreturn_t retval)
+{
+ struct ring_buffer_event *event;
+ struct trace_irq *entry;
+ struct trace_array *tr = irq_trace;
+ struct irq_desc *desc;
+ ktime_t end_time;
+
+ if (!trace_irq_enabled)
+ return;
+ end_time = ktime_get();
+ desc = irq_to_desc(irq);
+ event = trace_buffer_lock_reserve(tr, TRACE_IRQ,
+ sizeof(*entry), 0, 0);
+ if (!event)
+ goto out;
+ entry = ring_buffer_event_data(event);
+ entry->irq_data.retval = retval;
+ entry->irq_data.name = desc->action->name;
+ entry->irq_data.irq = irq;
+ entry->irq_data.cpu = raw_smp_processor_id();
+ if (retval == IRQ_HANDLED)
+ entry->irq_data.handled = 1;
+ else
+ entry->irq_data.handled = 0;
+ entry->irq_data.duration =
+ ktime_to_timespec(ktime_sub(end_time, start_times[irq]));
+ entry->irq_data.start_time = ktime_to_timespec(start_times[irq]);
+ trace_buffer_unlock_commit(tr, event, 0, 0);
+ out:
+ preempt_enable();
+}
+
+static int tracing_irq_register(void)
+{
+ int ret;
+
+ ret = register_trace_irq_entry(probe_irq_entry);
+ if (ret) {
+ pr_info("irq trace: irq entry tracepoint failed to register\n");
+ return ret;
+ }
+ ret = register_trace_irq_exit(probe_irq_exit);
+ if (ret) {
+ pr_info("irq trace: irq exit tracepoint failed to register\n");
+ goto fail_exit;
+ }
+ return ret;
+fail_exit:
+ unregister_trace_irq_entry(probe_irq_entry);
+ return ret;
+}
+
+static void start_irq_trace(struct trace_array *tr)
+{
+ trace_irq_enabled = 1;
+ tracing_irq_register();
+}
+
+static void stop_irq_trace(struct trace_array *tr)
+{
+ trace_irq_enabled = 0;
+ unregister_trace_irq_entry(probe_irq_entry);
+ unregister_trace_irq_exit(probe_irq_exit);
+}
+
+
+static int irq_trace_init(struct trace_array *tr)
+{
+ int cpu;
+ irq_trace = tr;
+
+ trace_irq_enabled = 1;
+ tracing_irq_register();
+ for_each_cpu(cpu, cpu_possible_mask)
+ tracing_reset(tr, cpu);
+ return 0;
+}
+
+static enum print_line_t irq_print_line(struct trace_iterator *iter)
+{
+ int ret = 0;
+ struct trace_entry *entry = iter->ent;
+ struct trace_irq *field;
+ struct irq_trace *it;
+ struct trace_seq *s = &iter->seq;
+
+ trace_assign_type(field, entry);
+ it = &field->irq_data;
+ if (entry->type == TRACE_IRQ) {
+ ret = trace_seq_printf(s, "[%5ld.%09ld] %d %s %d %d %ld.%09ld\n",
+ it->start_time.tv_sec,
+ it->start_time.tv_nsec,
+ it->irq, it->name, it->handled, it->cpu,
+ it->duration.tv_sec,
+ it->duration.tv_nsec);
+ if (!ret)
+ return TRACE_TYPE_PARTIAL_LINE;
+ return TRACE_TYPE_HANDLED;
+ }
+ return TRACE_TYPE_UNHANDLED;
+}
+
+static void irq_print_headers(struct seq_file *s)
+{
+ seq_printf(s, " [time] [irq] [action] [handled] [cpu] [length]\n");
+ seq_printf(s, " | | | | | | \n\n");
+}
+
+static struct tracer irq_tracer __read_mostly =
+{
+ .name = "irq",
+ .init = irq_trace_init,
+ .start = start_irq_trace,
+ .stop = stop_irq_trace,
+ .reset = stop_irq_trace,
+ .print_header = irq_print_headers,
+ .print_line = irq_print_line,
+};
+
+static int init_irq_trace(void)
+{
+ return register_tracer(&irq_tracer);
+}
+device_initcall(init_irq_trace);
^ permalink raw reply related [flat|nested] 48+ messages in thread* Re: [PATCH] new irq tracer 2009-02-18 19:53 [PATCH] new irq tracer Jason Baron @ 2009-02-18 20:08 ` Steven Rostedt 2009-02-18 20:26 ` Mathieu Desnoyers ` (2 subsequent siblings) 3 siblings, 0 replies; 48+ messages in thread From: Steven Rostedt @ 2009-02-18 20:08 UTC (permalink / raw) To: Jason Baron; +Cc: mingo, linux-kernel, acme, fweisbec, fche, peterz, compudj On Wed, 18 Feb 2009, Jason Baron wrote: > hi, > > Using a copule of tracepoints i've implemented an 'irq tracer' which has > the following column format: > > [time][irq][action][handled][cpu][length] > > for example on my system: > > [ 1038.927248381] 28 eth0 1 3 0.000002745 > [ 1038.927544688] 28 eth0 1 3 0.000002520 > [ 1038.927593609] 28 eth0 1 3 0.000002509 > [ 1038.974087675] 29 ahci 1 1 0.000013135 > [ 1038.974288475] 29 ahci 1 1 0.000005542 > [ 1038.974414324] 29 ahci 1 1 0.000007953 > > I think this patch is useful in understanding irq system behavior, and > for tracking down irq driver handlers that are firing too often or > spending too much time handling an irq. This approach could be extended > to the timer irq and for softirqs... > > thanks, > > -Jason > > Signed-off-by: Jason Baron <jbaron@redhat.com> > > > --- > > include/trace/irq.h | 24 +++++++ > kernel/irq/handle.c | 6 ++ > kernel/trace/Kconfig | 7 ++ > kernel/trace/Makefile | 1 > kernel/trace/trace.h | 8 ++ > kernel/trace/trace_irq.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 197 insertions(+), 0 deletions(-) > create mode 100644 include/trace/irq.h > create mode 100644 kernel/trace/trace_irq.c > > > diff --git a/include/trace/irq.h b/include/trace/irq.h > new file mode 100644 > index 0000000..e4669c2 > --- /dev/null > +++ b/include/trace/irq.h > @@ -0,0 +1,24 @@ > +#ifndef _TRACE_IRQ_H > +#define _TRACE_IRQ_H > + > +#include <linux/interrupt.h> > +#include <linux/tracepoint.h> > + > +struct irq_trace { > + int retval; > + int irq; > + const char *name; > + int cpu; You can get rid of "cpu" and probably even name. > + int handled; > + struct timespec duration; > + struct timespec start_time; > +}; > + > +DECLARE_TRACE(irq_entry, > + TPPROTO(unsigned int id), > + TPARGS(id)); > +DECLARE_TRACE(irq_exit, > + TPPROTO(unsigned int id, irqreturn_t retval), > + TPARGS(id, retval)); > + > +#endif /* _TRACE_IRQ_H */ > diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c > index fdff380..34053b5 100644 > --- a/kernel/irq/handle.c > +++ b/kernel/irq/handle.c > @@ -18,6 +18,7 @@ > #include <linux/rculist.h> > #include <linux/hash.h> > #include <linux/bootmem.h> > +#include <trace/irq.h> > > #include "internals.h" > > @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id) > return IRQ_NONE; > } > > +DEFINE_TRACE(irq_entry); > +DEFINE_TRACE(irq_exit); > + > /** > * handle_IRQ_event - irq action chain handler > * @irq: the interrupt number > @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > local_irq_enable_in_hardirq(); > > do { > + trace_irq_entry(irq); > ret = action->handler(irq, action->dev_id); > + trace_irq_exit(irq, ret); > if (ret == IRQ_HANDLED) > status |= action->flags; > retval |= ret; > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig > index 620f9cd..f62165c 100644 > --- a/kernel/trace/Kconfig > +++ b/kernel/trace/Kconfig > @@ -239,6 +239,13 @@ config POWER_TRACER > power management decisions, specifically the C-state and P-state > behavior. > > +config IRQ_TRACER > + bool "Trace irq latencies" > + depends on DEBUG_KERNEL > + depends on X86 > + select TRACING > + help > + This tracer helps developers anaylze irq latencies. > > config STACK_TRACER > bool "Trace max stack" > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile > index 627090b..58b9f7e 100644 > --- a/kernel/trace/Makefile > +++ b/kernel/trace/Makefile > @@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o > obj-$(CONFIG_KMEMTRACE) += kmemtrace.o > obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o > obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o > +obj-$(CONFIG_IRQ_TRACER) += trace_irq.o > > libftrace-y := ftrace.o > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index dbff020..6a34aaa 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -11,6 +11,7 @@ > #include <trace/boot.h> > #include <trace/kmemtrace.h> > #include <trace/power.h> > +#include <trace/irq.h> > > enum trace_type { > __TRACE_FIRST_TYPE = 0, > @@ -33,6 +34,7 @@ enum trace_type { > TRACE_KMEM_ALLOC, > TRACE_KMEM_FREE, > TRACE_POWER, > + TRACE_IRQ, > TRACE_BLK, > > __TRACE_LAST_TYPE, > @@ -173,6 +175,11 @@ struct trace_power { > struct power_trace state_data; > }; > > +struct trace_irq { > + struct trace_entry ent; > + struct irq_trace irq_data; > +}; > + > struct kmemtrace_alloc_entry { > struct trace_entry ent; > enum kmemtrace_type_id type_id; > @@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void); > TRACE_GRAPH_RET); \ > IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ > IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ > + IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ); \ > IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ > TRACE_KMEM_ALLOC); \ > IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ > diff --git a/kernel/trace/trace_irq.c b/kernel/trace/trace_irq.c > new file mode 100644 > index 0000000..1808316 > --- /dev/null > +++ b/kernel/trace/trace_irq.c > @@ -0,0 +1,151 @@ > +/* > + * Copyright (C) 2009 Red Hat, Inc., Jason Baron <jbaron@redhat.com> > + */ > + > +#include <linux/init.h> > +#include <linux/debugfs.h> > +#include <trace/irq.h> > +#include <linux/kallsyms.h> > +#include <linux/module.h> > +#include <linux/interrupt.h> > +#include <linux/ktime.h> > + > +#include "trace.h" > +#include "trace_output.h" > + > +static struct trace_array *irq_trace; > +static int __read_mostly trace_irq_enabled; > +static ktime_t start_times[NR_IRQS]; > + > +static void probe_irq_entry(unsigned int irq) > +{ > + if (!trace_irq_enabled) > + return; > + start_times[irq] = ktime_get(); > +} > + > +static void probe_irq_exit(unsigned int irq, irqreturn_t retval) > +{ > + struct ring_buffer_event *event; > + struct trace_irq *entry; > + struct trace_array *tr = irq_trace; > + struct irq_desc *desc; > + ktime_t end_time; > + > + if (!trace_irq_enabled) > + return; > + end_time = ktime_get(); > + desc = irq_to_desc(irq); > + event = trace_buffer_lock_reserve(tr, TRACE_IRQ, > + sizeof(*entry), 0, 0); > + if (!event) > + goto out; > + entry = ring_buffer_event_data(event); > + entry->irq_data.retval = retval; > + entry->irq_data.name = desc->action->name; Can't you get the name information at output time? > + entry->irq_data.irq = irq; > + entry->irq_data.cpu = raw_smp_processor_id(); The CPU is held via which ring buffer is used. This information is passed to the tracer via iter->cpu. > + if (retval == IRQ_HANDLED) > + entry->irq_data.handled = 1; > + else > + entry->irq_data.handled = 0; What about: entry->irq_data.handled = retval == IRQ_HANDLED; > + entry->irq_data.duration = > + ktime_to_timespec(ktime_sub(end_time, start_times[irq])); > + entry->irq_data.start_time = ktime_to_timespec(start_times[irq]); > + trace_buffer_unlock_commit(tr, event, 0, 0); > + out: > + preempt_enable(); Where's the matching preempt_disable()? And since we are being called from an interrupt handler, I doubt it is even needed. > +} > + > +static int tracing_irq_register(void) > +{ > + int ret; > + > + ret = register_trace_irq_entry(probe_irq_entry); > + if (ret) { > + pr_info("irq trace: irq entry tracepoint failed to register\n"); > + return ret; > + } > + ret = register_trace_irq_exit(probe_irq_exit); > + if (ret) { > + pr_info("irq trace: irq exit tracepoint failed to register\n"); > + goto fail_exit; > + } > + return ret; > +fail_exit: > + unregister_trace_irq_entry(probe_irq_entry); > + return ret; > +} > + > +static void start_irq_trace(struct trace_array *tr) > +{ > + trace_irq_enabled = 1; > + tracing_irq_register(); > +} > + > +static void stop_irq_trace(struct trace_array *tr) > +{ > + trace_irq_enabled = 0; > + unregister_trace_irq_entry(probe_irq_entry); > + unregister_trace_irq_exit(probe_irq_exit); > +} > + > + > +static int irq_trace_init(struct trace_array *tr) > +{ > + int cpu; > + irq_trace = tr; > + > + trace_irq_enabled = 1; > + tracing_irq_register(); > + for_each_cpu(cpu, cpu_possible_mask) > + tracing_reset(tr, cpu); > + return 0; > +} > + > +static enum print_line_t irq_print_line(struct trace_iterator *iter) > +{ > + int ret = 0; > + struct trace_entry *entry = iter->ent; > + struct trace_irq *field; > + struct irq_trace *it; > + struct trace_seq *s = &iter->seq; > + > + trace_assign_type(field, entry); > + it = &field->irq_data; > + if (entry->type == TRACE_IRQ) { > + ret = trace_seq_printf(s, "[%5ld.%09ld] %d %s %d %d %ld.%09ld\n", > + it->start_time.tv_sec, > + it->start_time.tv_nsec, > + it->irq, it->name, it->handled, it->cpu, iter->cpu is the same as it->cpu. -- Steve > + it->duration.tv_sec, > + it->duration.tv_nsec); > + if (!ret) > + return TRACE_TYPE_PARTIAL_LINE; > + return TRACE_TYPE_HANDLED; > + } > + return TRACE_TYPE_UNHANDLED; > +} > + > +static void irq_print_headers(struct seq_file *s) > +{ > + seq_printf(s, " [time] [irq] [action] [handled] [cpu] [length]\n"); > + seq_printf(s, " | | | | | | \n\n"); > +} > + > +static struct tracer irq_tracer __read_mostly = > +{ > + .name = "irq", > + .init = irq_trace_init, > + .start = start_irq_trace, > + .stop = stop_irq_trace, > + .reset = stop_irq_trace, > + .print_header = irq_print_headers, > + .print_line = irq_print_line, > +}; > + > +static int init_irq_trace(void) > +{ > + return register_tracer(&irq_tracer); > +} > +device_initcall(init_irq_trace); > > > ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 19:53 [PATCH] new irq tracer Jason Baron 2009-02-18 20:08 ` Steven Rostedt @ 2009-02-18 20:26 ` Mathieu Desnoyers 2009-02-19 1:42 ` KOSAKI Motohiro 2009-02-18 20:30 ` Kyle McMartin 2009-02-18 21:15 ` Peter Zijlstra 3 siblings, 1 reply; 48+ messages in thread From: Mathieu Desnoyers @ 2009-02-18 20:26 UTC (permalink / raw) To: Jason Baron; +Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, peterz * Jason Baron (jbaron@redhat.com) wrote: > hi, > > Using a copule of tracepoints i've implemented an 'irq tracer' which has > the following column format: > > [time][irq][action][handled][cpu][length] > > for example on my system: > > [ 1038.927248381] 28 eth0 1 3 0.000002745 > [ 1038.927544688] 28 eth0 1 3 0.000002520 > [ 1038.927593609] 28 eth0 1 3 0.000002509 > [ 1038.974087675] 29 ahci 1 1 0.000013135 > [ 1038.974288475] 29 ahci 1 1 0.000005542 > [ 1038.974414324] 29 ahci 1 1 0.000007953 > > I think this patch is useful in understanding irq system behavior, and > for tracking down irq driver handlers that are firing too often or > spending too much time handling an irq. This approach could be extended > to the timer irq and for softirqs... > > thanks, > Hi Jason, If you are in the process of rewriting the LTTng instrumentation from scratch, please have a look at the git tree first, at least as an inspiration source, otherwise we will end up tring to merge two subtily different sets of instrumentation into the mainline. Please see : http://git.kernel.org/?p=linux/kernel/git/compudj/linux-2.6-lttng.git Thanks, Mathieu > -Jason > > Signed-off-by: Jason Baron <jbaron@redhat.com> > > > --- > > include/trace/irq.h | 24 +++++++ > kernel/irq/handle.c | 6 ++ > kernel/trace/Kconfig | 7 ++ > kernel/trace/Makefile | 1 > kernel/trace/trace.h | 8 ++ > kernel/trace/trace_irq.c | 151 ++++++++++++++++++++++++++++++++++++++++++++++ > 6 files changed, 197 insertions(+), 0 deletions(-) > create mode 100644 include/trace/irq.h > create mode 100644 kernel/trace/trace_irq.c > > > diff --git a/include/trace/irq.h b/include/trace/irq.h > new file mode 100644 > index 0000000..e4669c2 > --- /dev/null > +++ b/include/trace/irq.h > @@ -0,0 +1,24 @@ > +#ifndef _TRACE_IRQ_H > +#define _TRACE_IRQ_H > + > +#include <linux/interrupt.h> > +#include <linux/tracepoint.h> > + > +struct irq_trace { > + int retval; > + int irq; > + const char *name; > + int cpu; > + int handled; > + struct timespec duration; > + struct timespec start_time; > +}; > + > +DECLARE_TRACE(irq_entry, > + TPPROTO(unsigned int id), > + TPARGS(id)); > +DECLARE_TRACE(irq_exit, > + TPPROTO(unsigned int id, irqreturn_t retval), > + TPARGS(id, retval)); > + > +#endif /* _TRACE_IRQ_H */ > diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c > index fdff380..34053b5 100644 > --- a/kernel/irq/handle.c > +++ b/kernel/irq/handle.c > @@ -18,6 +18,7 @@ > #include <linux/rculist.h> > #include <linux/hash.h> > #include <linux/bootmem.h> > +#include <trace/irq.h> > > #include "internals.h" > > @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id) > return IRQ_NONE; > } > > +DEFINE_TRACE(irq_entry); > +DEFINE_TRACE(irq_exit); > + > /** > * handle_IRQ_event - irq action chain handler > * @irq: the interrupt number > @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > local_irq_enable_in_hardirq(); > > do { > + trace_irq_entry(irq); > ret = action->handler(irq, action->dev_id); > + trace_irq_exit(irq, ret); > if (ret == IRQ_HANDLED) > status |= action->flags; > retval |= ret; > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig > index 620f9cd..f62165c 100644 > --- a/kernel/trace/Kconfig > +++ b/kernel/trace/Kconfig > @@ -239,6 +239,13 @@ config POWER_TRACER > power management decisions, specifically the C-state and P-state > behavior. > > +config IRQ_TRACER > + bool "Trace irq latencies" > + depends on DEBUG_KERNEL > + depends on X86 > + select TRACING > + help > + This tracer helps developers anaylze irq latencies. > > config STACK_TRACER > bool "Trace max stack" > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile > index 627090b..58b9f7e 100644 > --- a/kernel/trace/Makefile > +++ b/kernel/trace/Makefile > @@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o > obj-$(CONFIG_KMEMTRACE) += kmemtrace.o > obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o > obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o > +obj-$(CONFIG_IRQ_TRACER) += trace_irq.o > > libftrace-y := ftrace.o > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index dbff020..6a34aaa 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -11,6 +11,7 @@ > #include <trace/boot.h> > #include <trace/kmemtrace.h> > #include <trace/power.h> > +#include <trace/irq.h> > > enum trace_type { > __TRACE_FIRST_TYPE = 0, > @@ -33,6 +34,7 @@ enum trace_type { > TRACE_KMEM_ALLOC, > TRACE_KMEM_FREE, > TRACE_POWER, > + TRACE_IRQ, > TRACE_BLK, > > __TRACE_LAST_TYPE, > @@ -173,6 +175,11 @@ struct trace_power { > struct power_trace state_data; > }; > > +struct trace_irq { > + struct trace_entry ent; > + struct irq_trace irq_data; > +}; > + > struct kmemtrace_alloc_entry { > struct trace_entry ent; > enum kmemtrace_type_id type_id; > @@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void); > TRACE_GRAPH_RET); \ > IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ > IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ > + IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ); \ > IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ > TRACE_KMEM_ALLOC); \ > IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ > diff --git a/kernel/trace/trace_irq.c b/kernel/trace/trace_irq.c > new file mode 100644 > index 0000000..1808316 > --- /dev/null > +++ b/kernel/trace/trace_irq.c > @@ -0,0 +1,151 @@ > +/* > + * Copyright (C) 2009 Red Hat, Inc., Jason Baron <jbaron@redhat.com> > + */ > + > +#include <linux/init.h> > +#include <linux/debugfs.h> > +#include <trace/irq.h> > +#include <linux/kallsyms.h> > +#include <linux/module.h> > +#include <linux/interrupt.h> > +#include <linux/ktime.h> > + > +#include "trace.h" > +#include "trace_output.h" > + > +static struct trace_array *irq_trace; > +static int __read_mostly trace_irq_enabled; > +static ktime_t start_times[NR_IRQS]; > + > +static void probe_irq_entry(unsigned int irq) > +{ > + if (!trace_irq_enabled) > + return; > + start_times[irq] = ktime_get(); > +} > + > +static void probe_irq_exit(unsigned int irq, irqreturn_t retval) > +{ > + struct ring_buffer_event *event; > + struct trace_irq *entry; > + struct trace_array *tr = irq_trace; > + struct irq_desc *desc; > + ktime_t end_time; > + > + if (!trace_irq_enabled) > + return; > + end_time = ktime_get(); > + desc = irq_to_desc(irq); > + event = trace_buffer_lock_reserve(tr, TRACE_IRQ, > + sizeof(*entry), 0, 0); > + if (!event) > + goto out; > + entry = ring_buffer_event_data(event); > + entry->irq_data.retval = retval; > + entry->irq_data.name = desc->action->name; > + entry->irq_data.irq = irq; > + entry->irq_data.cpu = raw_smp_processor_id(); > + if (retval == IRQ_HANDLED) > + entry->irq_data.handled = 1; > + else > + entry->irq_data.handled = 0; > + entry->irq_data.duration = > + ktime_to_timespec(ktime_sub(end_time, start_times[irq])); > + entry->irq_data.start_time = ktime_to_timespec(start_times[irq]); > + trace_buffer_unlock_commit(tr, event, 0, 0); > + out: > + preempt_enable(); > +} > + > +static int tracing_irq_register(void) > +{ > + int ret; > + > + ret = register_trace_irq_entry(probe_irq_entry); > + if (ret) { > + pr_info("irq trace: irq entry tracepoint failed to register\n"); > + return ret; > + } > + ret = register_trace_irq_exit(probe_irq_exit); > + if (ret) { > + pr_info("irq trace: irq exit tracepoint failed to register\n"); > + goto fail_exit; > + } > + return ret; > +fail_exit: > + unregister_trace_irq_entry(probe_irq_entry); > + return ret; > +} > + > +static void start_irq_trace(struct trace_array *tr) > +{ > + trace_irq_enabled = 1; > + tracing_irq_register(); > +} > + > +static void stop_irq_trace(struct trace_array *tr) > +{ > + trace_irq_enabled = 0; > + unregister_trace_irq_entry(probe_irq_entry); > + unregister_trace_irq_exit(probe_irq_exit); > +} > + > + > +static int irq_trace_init(struct trace_array *tr) > +{ > + int cpu; > + irq_trace = tr; > + > + trace_irq_enabled = 1; > + tracing_irq_register(); > + for_each_cpu(cpu, cpu_possible_mask) > + tracing_reset(tr, cpu); > + return 0; > +} > + > +static enum print_line_t irq_print_line(struct trace_iterator *iter) > +{ > + int ret = 0; > + struct trace_entry *entry = iter->ent; > + struct trace_irq *field; > + struct irq_trace *it; > + struct trace_seq *s = &iter->seq; > + > + trace_assign_type(field, entry); > + it = &field->irq_data; > + if (entry->type == TRACE_IRQ) { > + ret = trace_seq_printf(s, "[%5ld.%09ld] %d %s %d %d %ld.%09ld\n", > + it->start_time.tv_sec, > + it->start_time.tv_nsec, > + it->irq, it->name, it->handled, it->cpu, > + it->duration.tv_sec, > + it->duration.tv_nsec); > + if (!ret) > + return TRACE_TYPE_PARTIAL_LINE; > + return TRACE_TYPE_HANDLED; > + } > + return TRACE_TYPE_UNHANDLED; > +} > + > +static void irq_print_headers(struct seq_file *s) > +{ > + seq_printf(s, " [time] [irq] [action] [handled] [cpu] [length]\n"); > + seq_printf(s, " | | | | | | \n\n"); > +} > + > +static struct tracer irq_tracer __read_mostly = > +{ > + .name = "irq", > + .init = irq_trace_init, > + .start = start_irq_trace, > + .stop = stop_irq_trace, > + .reset = stop_irq_trace, > + .print_header = irq_print_headers, > + .print_line = irq_print_line, > +}; > + > +static int init_irq_trace(void) > +{ > + return register_tracer(&irq_tracer); > +} > +device_initcall(init_irq_trace); > -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 20:26 ` Mathieu Desnoyers @ 2009-02-19 1:42 ` KOSAKI Motohiro 0 siblings, 0 replies; 48+ messages in thread From: KOSAKI Motohiro @ 2009-02-19 1:42 UTC (permalink / raw) To: Mathieu Desnoyers, Jason Baron Cc: kosaki.motohiro, mingo, rostedt, linux-kernel, acme, fweisbec, fche, peterz Hi Jason, > * Jason Baron (jbaron@redhat.com) wrote: > > hi, > > > > Using a copule of tracepoints i've implemented an 'irq tracer' which has > > the following column format: > > > > [time][irq][action][handled][cpu][length] > > > > for example on my system: > > > > [ 1038.927248381] 28 eth0 1 3 0.000002745 > > [ 1038.927544688] 28 eth0 1 3 0.000002520 > > [ 1038.927593609] 28 eth0 1 3 0.000002509 > > [ 1038.974087675] 29 ahci 1 1 0.000013135 > > [ 1038.974288475] 29 ahci 1 1 0.000005542 > > [ 1038.974414324] 29 ahci 1 1 0.000007953 > > > > I think this patch is useful in understanding irq system behavior, and > > for tracking down irq driver handlers that are firing too often or > > spending too much time handling an irq. This approach could be extended > > to the timer irq and for softirqs... > > > > thanks, > > @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > > local_irq_enable_in_hardirq(); > > > > do { > > + trace_irq_entry(irq); > > ret = action->handler(irq, action->dev_id); > > + trace_irq_exit(irq, ret); > > if (ret == IRQ_HANDLED) > > status |= action->flags; > > retval |= ret; May I ask why you select this line? IOW, I guess your probe point is interest to irq action and mathieu's probe point is interest to interrupt itself. I guess you also want to observe shared irq problem, right? ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 19:53 [PATCH] new irq tracer Jason Baron 2009-02-18 20:08 ` Steven Rostedt 2009-02-18 20:26 ` Mathieu Desnoyers @ 2009-02-18 20:30 ` Kyle McMartin 2009-02-18 21:15 ` Peter Zijlstra 3 siblings, 0 replies; 48+ messages in thread From: Kyle McMartin @ 2009-02-18 20:30 UTC (permalink / raw) To: Jason Baron Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, peterz, compudj On Wed, Feb 18, 2009 at 02:53:28PM -0500, Jason Baron wrote: > hi, > > I think this patch is useful in understanding irq system behavior, and > for tracking down irq driver handlers that are firing too often or > spending too much time handling an irq. This approach could be extended > to the timer irq and for softirqs... > Very cool, I had written something similar a week or two ago to debug something but never had gotten around to posting it. :) Looks like it works just as well too. Glad to see it. cheers, Kyle ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 19:53 [PATCH] new irq tracer Jason Baron ` (2 preceding siblings ...) 2009-02-18 20:30 ` Kyle McMartin @ 2009-02-18 21:15 ` Peter Zijlstra 2009-02-18 21:35 ` Jason Baron 2009-02-19 1:46 ` Frederic Weisbecker 3 siblings, 2 replies; 48+ messages in thread From: Peter Zijlstra @ 2009-02-18 21:15 UTC (permalink / raw) To: Jason Baron; +Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, compudj On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote: > hi, > > Using a copule of tracepoints i've implemented an 'irq tracer' which has > the following column format: > > [time][irq][action][handled][cpu][length] > > for example on my system: > > [ 1038.927248381] 28 eth0 1 3 0.000002745 > [ 1038.927544688] 28 eth0 1 3 0.000002520 > [ 1038.927593609] 28 eth0 1 3 0.000002509 > [ 1038.974087675] 29 ahci 1 1 0.000013135 > [ 1038.974288475] 29 ahci 1 1 0.000005542 > [ 1038.974414324] 29 ahci 1 1 0.000007953 > > I think this patch is useful in understanding irq system behavior, and > for tracking down irq driver handlers that are firing too often or > spending too much time handling an irq. This approach could be extended > to the timer irq and for softirqs... The function graph tracer can already do this, it has a special section to recognize irq entry function, and already measures time spend. I guess with Steve's new triggers (probes or whatever they ended up being called) you could limit it to just IRQ entry points. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 21:15 ` Peter Zijlstra @ 2009-02-18 21:35 ` Jason Baron 2009-02-18 21:46 ` Peter Zijlstra 2009-02-19 1:46 ` Frederic Weisbecker 1 sibling, 1 reply; 48+ messages in thread From: Jason Baron @ 2009-02-18 21:35 UTC (permalink / raw) To: Peter Zijlstra Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, compudj On Wed, Feb 18, 2009 at 10:15:25PM +0100, Peter Zijlstra wrote: > On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote: > > hi, > > > > Using a copule of tracepoints i've implemented an 'irq tracer' which has > > the following column format: > > > > [time][irq][action][handled][cpu][length] > > > > for example on my system: > > > > [ 1038.927248381] 28 eth0 1 3 0.000002745 > > [ 1038.927544688] 28 eth0 1 3 0.000002520 > > [ 1038.927593609] 28 eth0 1 3 0.000002509 > > [ 1038.974087675] 29 ahci 1 1 0.000013135 > > [ 1038.974288475] 29 ahci 1 1 0.000005542 > > [ 1038.974414324] 29 ahci 1 1 0.000007953 > > > > I think this patch is useful in understanding irq system behavior, and > > for tracking down irq driver handlers that are firing too often or > > spending too much time handling an irq. This approach could be extended > > to the timer irq and for softirqs... > > The function graph tracer can already do this, it has a special section > to recognize irq entry function, and already measures time spend. > > I guess with Steve's new triggers (probes or whatever they ended up > being called) you could limit it to just IRQ entry points. > i did look at the graph tracer first. While it does measure irq related functions, it does not give this level of detail concerning which irq #, which irq handler is involved, and whether or not the irq was handled successfully or not. So I believe this tracepoints add a level of detail that the graph tracer does not have. Furthermore, this patch requires 2 tracepoints, not instrumentation for all kernel functions. thanks, -Jason ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 21:35 ` Jason Baron @ 2009-02-18 21:46 ` Peter Zijlstra 2009-02-18 22:02 ` Frank Ch. Eigler ` (2 more replies) 0 siblings, 3 replies; 48+ messages in thread From: Peter Zijlstra @ 2009-02-18 21:46 UTC (permalink / raw) To: Jason Baron; +Cc: mingo, rostedt, linux-kernel, acme, fweisbec, fche, compudj On Wed, 2009-02-18 at 16:35 -0500, Jason Baron wrote: > On Wed, Feb 18, 2009 at 10:15:25PM +0100, Peter Zijlstra wrote: > > On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote: > > > hi, > > > > > > Using a copule of tracepoints i've implemented an 'irq tracer' which has > > > the following column format: > > > > > > [time][irq][action][handled][cpu][length] > > > > > > for example on my system: > > > > > > [ 1038.927248381] 28 eth0 1 3 0.000002745 > > > [ 1038.927544688] 28 eth0 1 3 0.000002520 > > > [ 1038.927593609] 28 eth0 1 3 0.000002509 > > > [ 1038.974087675] 29 ahci 1 1 0.000013135 > > > [ 1038.974288475] 29 ahci 1 1 0.000005542 > > > [ 1038.974414324] 29 ahci 1 1 0.000007953 > > > > > > I think this patch is useful in understanding irq system behavior, and > > > for tracking down irq driver handlers that are firing too often or > > > spending too much time handling an irq. This approach could be extended > > > to the timer irq and for softirqs... > > > > The function graph tracer can already do this, it has a special section > > to recognize irq entry function, and already measures time spend. > > > > I guess with Steve's new triggers (probes or whatever they ended up > > being called) you could limit it to just IRQ entry points. > > > > i did look at the graph tracer first. While it does measure irq related > functions, it does not give this level of detail concerning which irq #, > which irq handler is involved, and whether or not the irq was handled > successfully or not. So I believe this tracepoints add a level of detail > that the graph tracer does not have. Furthermore, this patch requires 2 > tracepoints, not instrumentation for all kernel functions. How useful is that return value? Much of the other data is already available, /proc/interrupts will happily tell you the source of your interrupt storm. The irq-off latency tracer will tell you if stuff takes too much time, the graph tracer can tell you what is taking how much time. I really am having a difficult time seeing the use in such narrow tracers. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 21:46 ` Peter Zijlstra @ 2009-02-18 22:02 ` Frank Ch. Eigler 2009-02-18 22:10 ` Peter Zijlstra 2009-02-18 23:34 ` Kyle McMartin 2009-02-19 2:13 ` Frederic Weisbecker 2 siblings, 1 reply; 48+ messages in thread From: Frank Ch. Eigler @ 2009-02-18 22:02 UTC (permalink / raw) To: Peter Zijlstra Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, compudj Hi - On Wed, Feb 18, 2009 at 10:46:20PM +0100, Peter Zijlstra wrote: > [...] > How useful is that return value? "somewhat"? > Much of the other data is already available, /proc/interrupts will > happily tell you the source of your interrupt storm. The irq-off > latency tracer will tell you if stuff takes too much time, the graph > tracer can tell you what is taking how much time. Doesn't it seem like this is too much work to have answer a simple if particular question? > I really am having a difficult time seeing the use in such narrow > tracers. Part of the problem may come from defining "tracers" as something limited to ftrace engines. Once such tracepoints are in the kernel, more powerful analytical tools may be attached to them. - FChE ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 22:02 ` Frank Ch. Eigler @ 2009-02-18 22:10 ` Peter Zijlstra 2009-02-18 22:23 ` Frank Ch. Eigler 2009-02-20 19:52 ` Jason Baron 0 siblings, 2 replies; 48+ messages in thread From: Peter Zijlstra @ 2009-02-18 22:10 UTC (permalink / raw) To: Frank Ch. Eigler Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, compudj On Wed, 2009-02-18 at 17:02 -0500, Frank Ch. Eigler wrote: > > I really am having a difficult time seeing the use in such narrow > > tracers. > > Part of the problem may come from defining "tracers" as something > limited to ftrace engines. Once such tracepoints are in the kernel, > more powerful analytical tools may be attached to them. ftrace graph tracer is by far the most powerful thing I've seen, there's nothing limiting about ftrace. What is limiting are these puny little tracers that have no real value. A much better purpose for these tracepoints would be augmenting data in existing tracers like the graph/function/sched tracer. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 22:10 ` Peter Zijlstra @ 2009-02-18 22:23 ` Frank Ch. Eigler 2009-02-18 23:21 ` Peter Zijlstra 2009-02-20 19:52 ` Jason Baron 1 sibling, 1 reply; 48+ messages in thread From: Frank Ch. Eigler @ 2009-02-18 22:23 UTC (permalink / raw) To: Peter Zijlstra Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, compudj Hi - On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote: > > > I really am having a difficult time seeing the use in such narrow > > > tracers. > > > > Part of the problem may come from defining "tracers" as something > > limited to ftrace engines. Once such tracepoints are in the kernel, > > more powerful analytical tools may be attached to them. > > ftrace graph tracer is by far the most powerful thing I've seen [...] Be that as it may, what you suggested required separate correlation of data with /proc/interrupts contents. > What is limiting are these puny little tracers that have no real value. Which limited resource would even puny tracers exhaust? > A much better purpose for these tracepoints would be augmenting data in > existing tracers like the graph/function/sched tracer. Be more specific. How would you augment those tracers with e.g. individual irq numbers, their disposition status (HANDLED etc.). - FChE ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 22:23 ` Frank Ch. Eigler @ 2009-02-18 23:21 ` Peter Zijlstra 0 siblings, 0 replies; 48+ messages in thread From: Peter Zijlstra @ 2009-02-18 23:21 UTC (permalink / raw) To: Frank Ch. Eigler Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, compudj On Wed, 2009-02-18 at 17:23 -0500, Frank Ch. Eigler wrote: > Hi - > > On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote: > > > > I really am having a difficult time seeing the use in such narrow > > > > tracers. > > > > > > Part of the problem may come from defining "tracers" as something > > > limited to ftrace engines. Once such tracepoints are in the kernel, > > > more powerful analytical tools may be attached to them. > > > > ftrace graph tracer is by far the most powerful thing I've seen [...] > > Be that as it may, what you suggested required separate correlation of > data with /proc/interrupts contents. That was to illustrate that the proposed tracer doesn't add much value. Its almost 200 lines of code that needs maintenance to provide information that's mostly already available, seems like the wrong tradeoff to me. > > What is limiting are these puny little tracers that have no real value. > > Which limited resource would even puny tracers exhaust? Usability and maintenance. But loss of the bigger picture is the main complaint. > > A much better purpose for these tracepoints would be augmenting data in > > existing tracers like the graph/function/sched tracer. > > Be more specific. How would you augment those tracers with e.g. > individual irq numbers, their disposition status (HANDLED etc.). With a mixture of creativity and code ;-) Perhaps by creating a way to provide argument and return values to the function call data, and registering tracepoints to obtain these. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 22:10 ` Peter Zijlstra 2009-02-18 22:23 ` Frank Ch. Eigler @ 2009-02-20 19:52 ` Jason Baron 2009-02-21 3:39 ` Frederic Weisbecker 2009-02-22 3:38 ` KOSAKI Motohiro 1 sibling, 2 replies; 48+ messages in thread From: Jason Baron @ 2009-02-20 19:52 UTC (permalink / raw) To: Peter Zijlstra Cc: Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec, compudj On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote: > On Wed, 2009-02-18 at 17:02 -0500, Frank Ch. Eigler wrote: > > > > I really am having a difficult time seeing the use in such narrow > > > tracers. > > > > Part of the problem may come from defining "tracers" as something > > limited to ftrace engines. Once such tracepoints are in the kernel, > > more powerful analytical tools may be attached to them. > > ftrace graph tracer is by far the most powerful thing I've seen, there's > nothing limiting about ftrace. > > What is limiting are these puny little tracers that have no real value. > > A much better purpose for these tracepoints would be augmenting data in > existing tracers like the graph/function/sched tracer. > ok...using these two tracepoints i've augmented the graph tracer to add a new column that prints the irq # and handler we're in, or -1 otherwise. its a new 'funcgraph-irq' in 'trace_options', sample output: 1) #-1:none 2.323 us | native_apic_mem_write(); 1) #-1:none + 21.891 us | } 1) #-1:none | handle_IRQ_event() { 1) #29:ahci 1.938 us | irq_to_desc(); 1) #29:ahci 2.626 us | runqueue_is_locked(); 1) #29:ahci + 21.173 us | } . . . 1) #29:ahci + 32.047 us | } 1) #29:ahci + 43.746 us | } 1) #29:ahci + 51.634 us | } 1) #29:ahci + 61.954 us | } 1) #-1:none 1.014 us | runqueue_is_locked(); 1) #-1:none | __wake_up() { 1) #-1:none 0.920 us | _spin_lock_irqsave(); 1) #-1:none 1.028 us | __wake_up_common(); This seems to work pretty well - although it does get confused by certain preemption events which it can't handle...I think this can make the logs more readable. I can probably get it working if ppl thinks its valuable...or have better ways of implementing it. Code is below. thanks, -Jason diff --git a/include/trace/irq.h b/include/trace/irq.h new file mode 100644 index 0000000..ecec94a --- /dev/null +++ b/include/trace/irq.h @@ -0,0 +1,25 @@ +#ifndef _TRACE_IRQ_H +#define _TRACE_IRQ_H + +#include <linux/interrupt.h> +#include <linux/tracepoint.h> + +enum { + IRQ_ENTRY = 0, + IRQ_EXIT = 1, +}; + +struct irq_trace { + int type; + int irq; + char irq_name[10]; +}; + +DECLARE_TRACE(irq_entry, + TPPROTO(unsigned int id), + TPARGS(id)); +DECLARE_TRACE(irq_exit, + TPPROTO(unsigned int id, irqreturn_t retval), + TPARGS(id, retval)); + +#endif /* _TRACE_IRQ_H */ diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index fdff380..34053b5 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -18,6 +18,7 @@ #include <linux/rculist.h> #include <linux/hash.h> #include <linux/bootmem.h> +#include <trace/irq.h> #include "internals.h" @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id) return IRQ_NONE; } +DEFINE_TRACE(irq_entry); +DEFINE_TRACE(irq_exit); + /** * handle_IRQ_event - irq action chain handler * @irq: the interrupt number @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) local_irq_enable_in_hardirq(); do { + trace_irq_entry(irq); ret = action->handler(irq, action->dev_id); + trace_irq_exit(irq, ret); if (ret == IRQ_HANDLED) status |= action->flags; retval |= ret; diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile index 627090b..58b9f7e 100644 --- a/kernel/trace/Makefile +++ b/kernel/trace/Makefile @@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o obj-$(CONFIG_KMEMTRACE) += kmemtrace.o obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o +obj-$(CONFIG_IRQ_TRACER) += trace_irq.o libftrace-y := ftrace.o diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h index dbff020..6a34aaa 100644 --- a/kernel/trace/trace.h +++ b/kernel/trace/trace.h @@ -11,6 +11,7 @@ #include <trace/boot.h> #include <trace/kmemtrace.h> #include <trace/power.h> +#include <trace/irq.h> enum trace_type { __TRACE_FIRST_TYPE = 0, @@ -33,6 +34,7 @@ enum trace_type { TRACE_KMEM_ALLOC, TRACE_KMEM_FREE, TRACE_POWER, + TRACE_IRQ, TRACE_BLK, __TRACE_LAST_TYPE, @@ -173,6 +175,11 @@ struct trace_power { struct power_trace state_data; }; +struct trace_irq { + struct trace_entry ent; + struct irq_trace irq_data; +}; + struct kmemtrace_alloc_entry { struct trace_entry ent; enum kmemtrace_type_id type_id; @@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void); TRACE_GRAPH_RET); \ IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ + IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ); \ IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ TRACE_KMEM_ALLOC); \ IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c index 519a0ca..4c4c777 100644 --- a/kernel/trace/trace_functions_graph.c +++ b/kernel/trace/trace_functions_graph.c @@ -10,10 +10,19 @@ #include <linux/uaccess.h> #include <linux/ftrace.h> #include <linux/fs.h> +#include <trace/irq.h> #include "trace.h" #include "trace_output.h" + +struct per_cpu_irq_data { + int irq_num; + char action_str[10]; +}; +static struct per_cpu_irq_data cpu_irq_data[NR_CPUS]; +static struct trace_array *graph_trace_tr; + #define TRACE_GRAPH_INDENT 2 /* Flag options */ @@ -23,6 +32,7 @@ #define TRACE_GRAPH_PRINT_PROC 0x8 #define TRACE_GRAPH_PRINT_DURATION 0x10 #define TRACE_GRAPH_PRINT_ABS_TIME 0X20 +#define TRACE_GRAPH_PRINT_IRQ 0X40 static struct tracer_opt trace_opts[] = { /* Display overruns? (for self-debug purpose) */ @@ -37,6 +47,8 @@ static struct tracer_opt trace_opts[] = { { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, /* Display absolute time of an entry */ { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, + /* Display irq number, -1 otherwise */ + { TRACER_OPT(funcgraph-irq, TRACE_GRAPH_PRINT_IRQ) }, { } /* Empty entry */ }; @@ -52,10 +64,17 @@ static struct tracer_flags tracer_flags = { static int graph_trace_init(struct trace_array *tr) { + int i; int ret = register_ftrace_graph(&trace_graph_return, &trace_graph_entry); if (ret) return ret; + graph_trace_tr = tr; + for (i=0;i<NR_CPUS;i++) { + cpu_irq_data[i].irq_num = -1; + sprintf(cpu_irq_data[i].action_str, "none"); + cpu_irq_data[i].action_str[4] = '\0'; + } tracing_start_cmdline_record(); return 0; @@ -63,6 +82,7 @@ static int graph_trace_init(struct trace_array *tr) static void graph_trace_reset(struct trace_array *tr) { + graph_trace_tr = tr; tracing_stop_cmdline_record(); unregister_ftrace_graph(); } @@ -110,6 +130,27 @@ print_graph_cpu(struct trace_seq *s, int cpu) return TRACE_TYPE_HANDLED; } +#define TRACE_GRAPH_IRQ_LENGTH 14 + +static enum print_line_t +print_graph_irqnum(struct trace_seq *s, int cpu) +{ + char irq_str[TRACE_GRAPH_IRQ_LENGTH]; + int len, ret; + + len = snprintf(irq_str, TRACE_GRAPH_IRQ_LENGTH, "#%d:%s", cpu_irq_data[cpu].irq_num, cpu_irq_data[cpu].action_str); + ret = trace_seq_printf(s, "%s", irq_str); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + while (len < TRACE_GRAPH_IRQ_LENGTH) { + ret = trace_seq_printf(s, " "); + if (!ret) + return TRACE_TYPE_PARTIAL_LINE; + len++; + } + return TRACE_TYPE_HANDLED; +} + #define TRACE_GRAPH_PROCINFO_LENGTH 14 static enum print_line_t @@ -504,6 +545,13 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, return TRACE_TYPE_PARTIAL_LINE; } + /* irq */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) { + ret = print_graph_irqnum(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + /* Proc */ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, ent->pid); @@ -551,6 +599,13 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, return TRACE_TYPE_PARTIAL_LINE; } + /* irq */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) { + ret = print_graph_irqnum(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + /* Proc */ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, ent->pid); @@ -627,6 +682,13 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s, return TRACE_TYPE_PARTIAL_LINE; } + /* irq */ + if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) { + ret = print_graph_irqnum(s, cpu); + if (ret == TRACE_TYPE_PARTIAL_LINE) + return TRACE_TYPE_PARTIAL_LINE; + } + /* Proc */ if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { ret = print_graph_proc(s, ent->pid); @@ -699,6 +761,22 @@ print_graph_function(struct trace_iterator *iter) trace_assign_type(field, entry); return print_graph_comment(field, s, entry, iter); } + case TRACE_IRQ: { + struct trace_irq *field; + int cpu = iter->cpu; + + trace_assign_type(field, entry); + if (field->irq_data.type == IRQ_ENTRY) { + cpu_irq_data[cpu].irq_num = field->irq_data.irq; + strncpy(cpu_irq_data[cpu].action_str,field->irq_data.irq_name, 9); + cpu_irq_data[cpu].action_str[9] = '\0'; + } else { + cpu_irq_data[cpu].irq_num = -1; + sprintf(cpu_irq_data[cpu].action_str, "none"); + cpu_irq_data[cpu].action_str[4] = '\0'; + } + return TRACE_TYPE_HANDLED; + } default: return TRACE_TYPE_UNHANDLED; } @@ -753,6 +831,67 @@ static void graph_trace_close(struct trace_iterator *iter) percpu_free(iter->private); } +static void probe_irq_exit(unsigned int irq, irqreturn_t retval) +{ + struct ring_buffer_event *event; + struct trace_irq *entry; + + event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ, + sizeof(*entry), 0, 0); + if (!event) + return; + entry = ring_buffer_event_data(event); + entry->irq_data.type = IRQ_EXIT; + trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0); +} + +static void probe_irq_entry(unsigned int id) +{ + struct ring_buffer_event *event; + struct trace_irq *entry; + struct irq_desc *desc; + + event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ, + sizeof(*entry), 0, 0); + if (!event) + return; + entry = ring_buffer_event_data(event); + entry->irq_data.irq = id; + entry->irq_data.type = IRQ_ENTRY; + desc = irq_to_desc(id); + strncpy(entry->irq_data.irq_name, desc->action->name, 9); + entry->irq_data.irq_name[9] = '\0'; + trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0); +} + +static int graph_trace_set_flag(u32 old_flags, u32 bit, int set) +{ + int ret = 0; + + if (bit == TRACE_GRAPH_PRINT_IRQ) { + /* do nothing if already set */ + //if (!!set == !!(func_flags.val & TRACE_GRAPH_PRINT_IRQ)) + // return 0; + if (set) { + ret = register_trace_irq_entry(probe_irq_entry); + if (ret) { + pr_info("irq trace: irq entry tracepoint failed to register\n"); + return ret; + } + ret = register_trace_irq_exit(probe_irq_exit); + if (ret) { + pr_info("irq trace: irq exit tracepoint failed to register\n"); + unregister_trace_irq_entry(probe_irq_entry); + return ret; + } + } else { + unregister_trace_irq_entry(probe_irq_entry); + unregister_trace_irq_exit(probe_irq_exit); + } + } + return ret; +} + static struct tracer graph_trace __read_mostly = { .name = "function_graph", .open = graph_trace_open, @@ -762,6 +901,7 @@ static struct tracer graph_trace __read_mostly = { .print_line = print_graph_function, .print_header = print_graph_headers, .flags = &tracer_flags, + .set_flag = graph_trace_set_flag, #ifdef CONFIG_FTRACE_SELFTEST .selftest = trace_selftest_startup_function_graph, #endif ^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-20 19:52 ` Jason Baron @ 2009-02-21 3:39 ` Frederic Weisbecker 2009-02-22 3:38 ` KOSAKI Motohiro 1 sibling, 0 replies; 48+ messages in thread From: Frederic Weisbecker @ 2009-02-21 3:39 UTC (permalink / raw) To: Jason Baron Cc: Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, compudj On Fri, Feb 20, 2009 at 02:52:36PM -0500, Jason Baron wrote: > On Wed, Feb 18, 2009 at 11:10:35PM +0100, Peter Zijlstra wrote: > > On Wed, 2009-02-18 at 17:02 -0500, Frank Ch. Eigler wrote: > > > > > > I really am having a difficult time seeing the use in such narrow > > > > tracers. > > > > > > Part of the problem may come from defining "tracers" as something > > > limited to ftrace engines. Once such tracepoints are in the kernel, > > > more powerful analytical tools may be attached to them. > > > > ftrace graph tracer is by far the most powerful thing I've seen, there's > > nothing limiting about ftrace. > > > > What is limiting are these puny little tracers that have no real value. > > > > A much better purpose for these tracepoints would be augmenting data in > > existing tracers like the graph/function/sched tracer. > > > > ok...using these two tracepoints i've augmented the graph tracer to add > a new column that prints the irq # and handler we're in, or -1 > otherwise. its a new 'funcgraph-irq' in 'trace_options', sample output: > > > 1) #-1:none 2.323 us | native_apic_mem_write(); > 1) #-1:none + 21.891 us | } > 1) #-1:none | handle_IRQ_event() { My dream would be to see: 1) | handle_IRQ_event(29:ahci) { But it's not possible for now, not until we have the hashtable. Anyway, instead of having #-1:none, it would be better to have nothing in case of user context (or softirq). Or actually why not just an ftrace_printk when the probe is on? Since it immediately follows the call to handle_IRQ_event, we would have the following displayed: 1) | handle_IRQ_event() { 1) | /* 29 : ahci */ Because ftrace_printk send TRACE_PRINT entries which are displayed like this by the function graph tracer. It would be much more light than a whole column. > 1) #29:ahci 1.938 us | irq_to_desc(); > 1) #29:ahci 2.626 us | runqueue_is_locked(); > 1) #29:ahci + 21.173 us | } > . > . > . > 1) #29:ahci + 32.047 us | } > 1) #29:ahci + 43.746 us | } > 1) #29:ahci + 51.634 us | } > 1) #29:ahci + 61.954 us | } > 1) #-1:none 1.014 us | runqueue_is_locked(); > 1) #-1:none | __wake_up() { > 1) #-1:none 0.920 us | _spin_lock_irqsave(); > 1) #-1:none 1.028 us | __wake_up_common(); > > > This seems to work pretty well - although it does get confused by > certain preemption events which it can't handle...I think this can make > the logs more readable. I can probably get it working if ppl thinks its > valuable...or have better ways of implementing it. Code is below. > > thanks, > > -Jason > > > diff --git a/include/trace/irq.h b/include/trace/irq.h > new file mode 100644 > index 0000000..ecec94a > --- /dev/null > +++ b/include/trace/irq.h > @@ -0,0 +1,25 @@ > +#ifndef _TRACE_IRQ_H > +#define _TRACE_IRQ_H > + > +#include <linux/interrupt.h> > +#include <linux/tracepoint.h> > + > +enum { > + IRQ_ENTRY = 0, > + IRQ_EXIT = 1, > +}; > + > +struct irq_trace { > + int type; > + int irq; > + char irq_name[10]; > +}; > + > +DECLARE_TRACE(irq_entry, > + TPPROTO(unsigned int id), > + TPARGS(id)); > +DECLARE_TRACE(irq_exit, > + TPPROTO(unsigned int id, irqreturn_t retval), > + TPARGS(id, retval)); > + > +#endif /* _TRACE_IRQ_H */ > diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c > index fdff380..34053b5 100644 > --- a/kernel/irq/handle.c > +++ b/kernel/irq/handle.c > @@ -18,6 +18,7 @@ > #include <linux/rculist.h> > #include <linux/hash.h> > #include <linux/bootmem.h> > +#include <trace/irq.h> > > #include "internals.h" > > @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id) > return IRQ_NONE; > } > > +DEFINE_TRACE(irq_entry); > +DEFINE_TRACE(irq_exit); > + > /** > * handle_IRQ_event - irq action chain handler > * @irq: the interrupt number > @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > local_irq_enable_in_hardirq(); > > do { > + trace_irq_entry(irq); > ret = action->handler(irq, action->dev_id); > + trace_irq_exit(irq, ret); > if (ret == IRQ_HANDLED) > status |= action->flags; > retval |= ret; > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile > index 627090b..58b9f7e 100644 > --- a/kernel/trace/Makefile > +++ b/kernel/trace/Makefile > @@ -38,5 +38,6 @@ obj-$(CONFIG_POWER_TRACER) += trace_power.o > obj-$(CONFIG_KMEMTRACE) += kmemtrace.o > obj-$(CONFIG_WORKQUEUE_TRACER) += trace_workqueue.o > obj-$(CONFIG_BLK_DEV_IO_TRACE) += blktrace.o > +obj-$(CONFIG_IRQ_TRACER) += trace_irq.o > > libftrace-y := ftrace.o > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index dbff020..6a34aaa 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -11,6 +11,7 @@ > #include <trace/boot.h> > #include <trace/kmemtrace.h> > #include <trace/power.h> > +#include <trace/irq.h> > > enum trace_type { > __TRACE_FIRST_TYPE = 0, > @@ -33,6 +34,7 @@ enum trace_type { > TRACE_KMEM_ALLOC, > TRACE_KMEM_FREE, > TRACE_POWER, > + TRACE_IRQ, > TRACE_BLK, > > __TRACE_LAST_TYPE, > @@ -173,6 +175,11 @@ struct trace_power { > struct power_trace state_data; > }; > > +struct trace_irq { > + struct trace_entry ent; > + struct irq_trace irq_data; > +}; > + > struct kmemtrace_alloc_entry { > struct trace_entry ent; > enum kmemtrace_type_id type_id; > @@ -298,6 +305,7 @@ extern void __ftrace_bad_type(void); > TRACE_GRAPH_RET); \ > IF_ASSIGN(var, ent, struct hw_branch_entry, TRACE_HW_BRANCHES);\ > IF_ASSIGN(var, ent, struct trace_power, TRACE_POWER); \ > + IF_ASSIGN(var, ent, struct trace_irq, TRACE_IRQ); \ > IF_ASSIGN(var, ent, struct kmemtrace_alloc_entry, \ > TRACE_KMEM_ALLOC); \ > IF_ASSIGN(var, ent, struct kmemtrace_free_entry, \ > diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c > index 519a0ca..4c4c777 100644 > --- a/kernel/trace/trace_functions_graph.c > +++ b/kernel/trace/trace_functions_graph.c > @@ -10,10 +10,19 @@ > #include <linux/uaccess.h> > #include <linux/ftrace.h> > #include <linux/fs.h> > +#include <trace/irq.h> > > #include "trace.h" > #include "trace_output.h" > > + > +struct per_cpu_irq_data { > + int irq_num; > + char action_str[10]; > +}; > +static struct per_cpu_irq_data cpu_irq_data[NR_CPUS]; > +static struct trace_array *graph_trace_tr; > + > #define TRACE_GRAPH_INDENT 2 > > /* Flag options */ > @@ -23,6 +32,7 @@ > #define TRACE_GRAPH_PRINT_PROC 0x8 > #define TRACE_GRAPH_PRINT_DURATION 0x10 > #define TRACE_GRAPH_PRINT_ABS_TIME 0X20 > +#define TRACE_GRAPH_PRINT_IRQ 0X40 > > static struct tracer_opt trace_opts[] = { > /* Display overruns? (for self-debug purpose) */ > @@ -37,6 +47,8 @@ static struct tracer_opt trace_opts[] = { > { TRACER_OPT(funcgraph-duration, TRACE_GRAPH_PRINT_DURATION) }, > /* Display absolute time of an entry */ > { TRACER_OPT(funcgraph-abstime, TRACE_GRAPH_PRINT_ABS_TIME) }, > + /* Display irq number, -1 otherwise */ > + { TRACER_OPT(funcgraph-irq, TRACE_GRAPH_PRINT_IRQ) }, > { } /* Empty entry */ > }; > > @@ -52,10 +64,17 @@ static struct tracer_flags tracer_flags = { > > static int graph_trace_init(struct trace_array *tr) > { > + int i; > int ret = register_ftrace_graph(&trace_graph_return, > &trace_graph_entry); > if (ret) > return ret; > + graph_trace_tr = tr; > + for (i=0;i<NR_CPUS;i++) { > + cpu_irq_data[i].irq_num = -1; > + sprintf(cpu_irq_data[i].action_str, "none"); > + cpu_irq_data[i].action_str[4] = '\0'; > + } > tracing_start_cmdline_record(); > > return 0; > @@ -63,6 +82,7 @@ static int graph_trace_init(struct trace_array *tr) > > static void graph_trace_reset(struct trace_array *tr) > { > + graph_trace_tr = tr; > tracing_stop_cmdline_record(); > unregister_ftrace_graph(); > } > @@ -110,6 +130,27 @@ print_graph_cpu(struct trace_seq *s, int cpu) > return TRACE_TYPE_HANDLED; > } > > +#define TRACE_GRAPH_IRQ_LENGTH 14 > + > +static enum print_line_t > +print_graph_irqnum(struct trace_seq *s, int cpu) > +{ > + char irq_str[TRACE_GRAPH_IRQ_LENGTH]; > + int len, ret; > + > + len = snprintf(irq_str, TRACE_GRAPH_IRQ_LENGTH, "#%d:%s", cpu_irq_data[cpu].irq_num, cpu_irq_data[cpu].action_str); > + ret = trace_seq_printf(s, "%s", irq_str); > + if (!ret) > + return TRACE_TYPE_PARTIAL_LINE; > + while (len < TRACE_GRAPH_IRQ_LENGTH) { > + ret = trace_seq_printf(s, " "); > + if (!ret) > + return TRACE_TYPE_PARTIAL_LINE; > + len++; > + } > + return TRACE_TYPE_HANDLED; > +} > + > #define TRACE_GRAPH_PROCINFO_LENGTH 14 > > static enum print_line_t > @@ -504,6 +545,13 @@ print_graph_entry(struct ftrace_graph_ent_entry *field, struct trace_seq *s, > return TRACE_TYPE_PARTIAL_LINE; > } > > + /* irq */ > + if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) { > + ret = print_graph_irqnum(s, cpu); > + if (ret == TRACE_TYPE_PARTIAL_LINE) > + return TRACE_TYPE_PARTIAL_LINE; > + } > + > /* Proc */ > if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { > ret = print_graph_proc(s, ent->pid); > @@ -551,6 +599,13 @@ print_graph_return(struct ftrace_graph_ret *trace, struct trace_seq *s, > return TRACE_TYPE_PARTIAL_LINE; > } > > + /* irq */ > + if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) { > + ret = print_graph_irqnum(s, cpu); > + if (ret == TRACE_TYPE_PARTIAL_LINE) > + return TRACE_TYPE_PARTIAL_LINE; > + } > + > /* Proc */ > if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { > ret = print_graph_proc(s, ent->pid); > @@ -627,6 +682,13 @@ print_graph_comment(struct print_entry *trace, struct trace_seq *s, > return TRACE_TYPE_PARTIAL_LINE; > } > > + /* irq */ > + if (tracer_flags.val & TRACE_GRAPH_PRINT_IRQ) { > + ret = print_graph_irqnum(s, cpu); > + if (ret == TRACE_TYPE_PARTIAL_LINE) > + return TRACE_TYPE_PARTIAL_LINE; > + } > + > /* Proc */ > if (tracer_flags.val & TRACE_GRAPH_PRINT_PROC) { > ret = print_graph_proc(s, ent->pid); > @@ -699,6 +761,22 @@ print_graph_function(struct trace_iterator *iter) > trace_assign_type(field, entry); > return print_graph_comment(field, s, entry, iter); > } > + case TRACE_IRQ: { > + struct trace_irq *field; > + int cpu = iter->cpu; > + > + trace_assign_type(field, entry); > + if (field->irq_data.type == IRQ_ENTRY) { > + cpu_irq_data[cpu].irq_num = field->irq_data.irq; > + strncpy(cpu_irq_data[cpu].action_str,field->irq_data.irq_name, 9); > + cpu_irq_data[cpu].action_str[9] = '\0'; > + } else { > + cpu_irq_data[cpu].irq_num = -1; > + sprintf(cpu_irq_data[cpu].action_str, "none"); > + cpu_irq_data[cpu].action_str[4] = '\0'; > + } > + return TRACE_TYPE_HANDLED; > + } > default: > return TRACE_TYPE_UNHANDLED; > } > @@ -753,6 +831,67 @@ static void graph_trace_close(struct trace_iterator *iter) > percpu_free(iter->private); > } > > +static void probe_irq_exit(unsigned int irq, irqreturn_t retval) > +{ > + struct ring_buffer_event *event; > + struct trace_irq *entry; > + > + event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ, > + sizeof(*entry), 0, 0); > + if (!event) > + return; > + entry = ring_buffer_event_data(event); > + entry->irq_data.type = IRQ_EXIT; > + trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0); > +} > + > +static void probe_irq_entry(unsigned int id) > +{ > + struct ring_buffer_event *event; > + struct trace_irq *entry; > + struct irq_desc *desc; > + > + event = trace_buffer_lock_reserve(graph_trace_tr, TRACE_IRQ, > + sizeof(*entry), 0, 0); > + if (!event) > + return; > + entry = ring_buffer_event_data(event); > + entry->irq_data.irq = id; > + entry->irq_data.type = IRQ_ENTRY; > + desc = irq_to_desc(id); > + strncpy(entry->irq_data.irq_name, desc->action->name, 9); > + entry->irq_data.irq_name[9] = '\0'; > + trace_buffer_unlock_commit(graph_trace_tr, event, 0, 0); > +} > + > +static int graph_trace_set_flag(u32 old_flags, u32 bit, int set) > +{ > + int ret = 0; > + > + if (bit == TRACE_GRAPH_PRINT_IRQ) { > + /* do nothing if already set */ > + //if (!!set == !!(func_flags.val & TRACE_GRAPH_PRINT_IRQ)) > + // return 0; > + if (set) { > + ret = register_trace_irq_entry(probe_irq_entry); > + if (ret) { > + pr_info("irq trace: irq entry tracepoint failed to register\n"); > + return ret; > + } > + ret = register_trace_irq_exit(probe_irq_exit); > + if (ret) { > + pr_info("irq trace: irq exit tracepoint failed to register\n"); > + unregister_trace_irq_entry(probe_irq_entry); > + return ret; > + } > + } else { > + unregister_trace_irq_entry(probe_irq_entry); > + unregister_trace_irq_exit(probe_irq_exit); > + } > + } > + return ret; > +} > + > static struct tracer graph_trace __read_mostly = { > .name = "function_graph", > .open = graph_trace_open, > @@ -762,6 +901,7 @@ static struct tracer graph_trace __read_mostly = { > .print_line = print_graph_function, > .print_header = print_graph_headers, > .flags = &tracer_flags, > + .set_flag = graph_trace_set_flag, > #ifdef CONFIG_FTRACE_SELFTEST > .selftest = trace_selftest_startup_function_graph, > #endif > ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-20 19:52 ` Jason Baron 2009-02-21 3:39 ` Frederic Weisbecker @ 2009-02-22 3:38 ` KOSAKI Motohiro 2009-02-25 16:48 ` Masami Hiramatsu 1 sibling, 1 reply; 48+ messages in thread From: KOSAKI Motohiro @ 2009-02-22 3:38 UTC (permalink / raw) To: Jason Baron Cc: kosaki.motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec, compudj, Mathieu Desnoyers > /** > * handle_IRQ_event - irq action chain handler > * @irq: the interrupt number > @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > local_irq_enable_in_hardirq(); > > do { > + trace_irq_entry(irq); > ret = action->handler(irq, action->dev_id); > + trace_irq_exit(irq, ret); > if (ret == IRQ_HANDLED) > status |= action->flags; > retval |= ret; Nobdy want unnecessary redundant tracepoint. Please discuss with mathieu, and merge his tracepoint. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-22 3:38 ` KOSAKI Motohiro @ 2009-02-25 16:48 ` Masami Hiramatsu 2009-02-25 16:57 ` Jason Baron 2009-02-25 16:58 ` Mathieu Desnoyers 0 siblings, 2 replies; 48+ messages in thread From: Masami Hiramatsu @ 2009-02-25 16:48 UTC (permalink / raw) To: KOSAKI Motohiro, Mathieu Desnoyers Cc: Jason Baron, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec, compudj KOSAKI Motohiro wrote: >> /** >> * handle_IRQ_event - irq action chain handler >> * @irq: the interrupt number >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) >> local_irq_enable_in_hardirq(); >> >> do { >> + trace_irq_entry(irq); >> ret = action->handler(irq, action->dev_id); >> + trace_irq_exit(irq, ret); >> if (ret == IRQ_HANDLED) >> status |= action->flags; >> retval |= ret; > > Nobdy want unnecessary redundant tracepoint. > Please discuss with mathieu, and merge his tracepoint. Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint is enough. However, from the same viewpoint, it should pass irq-number to irq-exit event too, because we may lost some previous events by buffer-overflow etc. trace_irq_entry(irq, NULL); ret = _handle_IRQ_event(irq, action); trace_irq_exit(irq, ret); ^^^^ Thank you, -- Masami Hiramatsu Software Engineer Hitachi Computer Products (America) Inc. Software Solutions Division e-mail: mhiramat@redhat.com ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 16:48 ` Masami Hiramatsu @ 2009-02-25 16:57 ` Jason Baron 2009-02-25 17:34 ` Mathieu Desnoyers 2009-02-25 16:58 ` Mathieu Desnoyers 1 sibling, 1 reply; 48+ messages in thread From: Jason Baron @ 2009-02-25 16:57 UTC (permalink / raw) To: Masami Hiramatsu Cc: KOSAKI Motohiro, Mathieu Desnoyers, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec, compudj On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote: > KOSAKI Motohiro wrote: > >> /** > >> * handle_IRQ_event - irq action chain handler > >> * @irq: the interrupt number > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > >> local_irq_enable_in_hardirq(); > >> > >> do { > >> + trace_irq_entry(irq); > >> ret = action->handler(irq, action->dev_id); > >> + trace_irq_exit(irq, ret); > >> if (ret == IRQ_HANDLED) > >> status |= action->flags; > >> retval |= ret; > > > > Nobdy want unnecessary redundant tracepoint. > > Please discuss with mathieu, and merge his tracepoint. > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > is enough. However, from the same viewpoint, it should pass irq-number > to irq-exit event too, because we may lost some previous events by buffer-overflow > etc. > > trace_irq_entry(irq, NULL); > ret = _handle_IRQ_event(irq, action); > trace_irq_exit(irq, ret); > ^^^^ > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 different places. So the above suggested irq tracepoint provides the same information with 4 less tracepoints in the code. So I believe its simpler - plus we can understand which action handlers are handling the interrupt. thanks, -Jason ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 16:57 ` Jason Baron @ 2009-02-25 17:34 ` Mathieu Desnoyers 2009-02-25 18:05 ` Steven Rostedt ` (2 more replies) 0 siblings, 3 replies; 48+ messages in thread From: Mathieu Desnoyers @ 2009-02-25 17:34 UTC (permalink / raw) To: Jason Baron Cc: Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec * Jason Baron (jbaron@redhat.com) wrote: > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote: > > KOSAKI Motohiro wrote: > > >> /** > > >> * handle_IRQ_event - irq action chain handler > > >> * @irq: the interrupt number > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > > >> local_irq_enable_in_hardirq(); > > >> > > >> do { > > >> + trace_irq_entry(irq); > > >> ret = action->handler(irq, action->dev_id); > > >> + trace_irq_exit(irq, ret); > > >> if (ret == IRQ_HANDLED) > > >> status |= action->flags; > > >> retval |= ret; > > > > > > Nobdy want unnecessary redundant tracepoint. > > > Please discuss with mathieu, and merge his tracepoint. > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > > is enough. However, from the same viewpoint, it should pass irq-number > > to irq-exit event too, because we may lost some previous events by buffer-overflow > > etc. > > > > trace_irq_entry(irq, NULL); > > ret = _handle_IRQ_event(irq, action); > > trace_irq_exit(irq, ret); > > ^^^^ > > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 > different places. So the above suggested irq tracepoint provides the > same information with 4 less tracepoints in the code. So I believe its > simpler - plus we can understand which action handlers are handling the > interrupt. > The main thing I dislike about only tracing action->handler() calls is that you are not tracing an IRQ per se, but rather the invocation of a given handler within the interrupt. For instance, it would be difficult to calculate the maximum interrupt latency for a given interrupt line, because you don't have the "real" irq entry/exit events, just the individual handler() calls. But I agree that knowing which handler is called is important. How about this compromise : trace_irq_entry(irq, action) _handle_IRQ_event() for each action { trace_irq_handler(action, ret); ret = action->handler(irq, action->dev_id); ... } trace_irq_exit(action_ret); Would that give you the information you need ? Here trace_irq_handler would be passed the _current_ action invoked and the _previous_ action return value. Note that we should initialize irqreturn_t ret to some initial value if we do this. That should keep the tracing overhead minimal. Mathieu > thanks, > > -Jason > -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 17:34 ` Mathieu Desnoyers @ 2009-02-25 18:05 ` Steven Rostedt 2009-02-25 22:12 ` Mathieu Desnoyers 2009-02-26 15:11 ` Jason Baron 2009-02-27 3:33 ` KOSAKI Motohiro 2 siblings, 1 reply; 48+ messages in thread From: Steven Rostedt @ 2009-02-25 18:05 UTC (permalink / raw) To: Mathieu Desnoyers Cc: Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme, fweisbec On Wed, 25 Feb 2009, Mathieu Desnoyers wrote: > * Jason Baron (jbaron@redhat.com) wrote: > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote: > > > KOSAKI Motohiro wrote: > > > >> /** > > > >> * handle_IRQ_event - irq action chain handler > > > >> * @irq: the interrupt number > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > > > >> local_irq_enable_in_hardirq(); > > > >> > > > >> do { > > > >> + trace_irq_entry(irq); > > > >> ret = action->handler(irq, action->dev_id); > > > >> + trace_irq_exit(irq, ret); > > > >> if (ret == IRQ_HANDLED) > > > >> status |= action->flags; > > > >> retval |= ret; > > > > > > > > Nobdy want unnecessary redundant tracepoint. > > > > Please discuss with mathieu, and merge his tracepoint. > > > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > > > is enough. However, from the same viewpoint, it should pass irq-number > > > to irq-exit event too, because we may lost some previous events by buffer-overflow > > > etc. > > > > > > trace_irq_entry(irq, NULL); > > > ret = _handle_IRQ_event(irq, action); > > > trace_irq_exit(irq, ret); > > > ^^^^ > > > > > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 > > different places. So the above suggested irq tracepoint provides the > > same information with 4 less tracepoints in the code. So I believe its > > simpler - plus we can understand which action handlers are handling the > > interrupt. > > > > The main thing I dislike about only tracing action->handler() calls is > that you are not tracing an IRQ per se, but rather the invocation of a > given handler within the interrupt. For instance, it would be difficult > to calculate the maximum interrupt latency for a given interrupt line, > because you don't have the "real" irq entry/exit events, just the > individual handler() calls. Then use the function_graph tracer. # echo smp_apic_timer_interrupt > /debug/tracing/set_ftrace_filter # echo __irqentry_text_start >> /debug/tracing/set_ftrace_filter # echo function_graph > /debug/tracing/current_tracer # cat /debug/tracing/trace # tracer: function_graph # # CPU DURATION FUNCTION CALLS # | | | | | | | ------------------------------------------ 1) kblockd-4012 => <idle>-0 ------------------------------------------ 1) ==========> | 1) + 35.783 us | smp_apic_timer_interrupt(); ------------------------------------------ 2) kstop/2-4013 => <idle>-0 ------------------------------------------ 2) ==========> | 2) + 13.819 us | smp_apic_timer_interrupt(); ------------------------------------------ 3) kstop/3-4014 => <idle>-0 ------------------------------------------ 3) ==========> | 3) + 24.919 us | __irqentry_text_start(); ------------------------------------------ 0) kstop/0-4011 => <idle>-0 ------------------------------------------ 0) ==========> | 0) + 24.444 us | smp_apic_timer_interrupt(); ------------------------------------------ 1) <idle>-0 => bash-3899 ------------------------------------------ 1) ==========> | 1) + 13.642 us | smp_apic_timer_interrupt(); 2) ==========> | 2) + 15.014 us | smp_apic_timer_interrupt(); 3) ==========> | 3) + 31.004 us | smp_apic_timer_interrupt(); ------------------------------------------ 0) <idle>-0 => sshd-3892 ------------------------------------------ 0) ==========> | 0) + 15.655 us | __irqentry_text_start(); 0) ==========> | 0) + 37.722 us | __irqentry_text_start(); ------------------------------------------ 0) sshd-3892 => <idle>-0 ------------------------------------------ 0) ==========> | 0) + 26.139 us | __irqentry_text_start(); 3) ==========> | 3) + 27.240 us | __irqentry_text_start(); 3) ==========> | 3) + 21.987 us | __irqentry_text_start(); And there you have your latencies ;-) With Jasons added trace points, we could add the to the event tracer and those would show up as comments here. -- Steve > > But I agree that knowing which handler is called is important. > > How about this compromise : > > trace_irq_entry(irq, action) > _handle_IRQ_event() > for each action { > trace_irq_handler(action, ret); > ret = action->handler(irq, action->dev_id); > ... > } > trace_irq_exit(action_ret); > > Would that give you the information you need ? > > Here trace_irq_handler would be passed the _current_ action invoked and > the _previous_ action return value. Note that we should initialize > irqreturn_t ret to some initial value if we do this. That should keep > the tracing overhead minimal. > ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 18:05 ` Steven Rostedt @ 2009-02-25 22:12 ` Mathieu Desnoyers 2009-02-25 22:20 ` Frederic Weisbecker 2009-02-25 22:21 ` Steven Rostedt 0 siblings, 2 replies; 48+ messages in thread From: Mathieu Desnoyers @ 2009-02-25 22:12 UTC (permalink / raw) To: Steven Rostedt Cc: Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme, fweisbec * Steven Rostedt (rostedt@goodmis.org) wrote: > > > On Wed, 25 Feb 2009, Mathieu Desnoyers wrote: > > > * Jason Baron (jbaron@redhat.com) wrote: > > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote: > > > > KOSAKI Motohiro wrote: > > > > >> /** > > > > >> * handle_IRQ_event - irq action chain handler > > > > >> * @irq: the interrupt number > > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > > > > >> local_irq_enable_in_hardirq(); > > > > >> > > > > >> do { > > > > >> + trace_irq_entry(irq); > > > > >> ret = action->handler(irq, action->dev_id); > > > > >> + trace_irq_exit(irq, ret); > > > > >> if (ret == IRQ_HANDLED) > > > > >> status |= action->flags; > > > > >> retval |= ret; > > > > > > > > > > Nobdy want unnecessary redundant tracepoint. > > > > > Please discuss with mathieu, and merge his tracepoint. > > > > > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > > > > is enough. However, from the same viewpoint, it should pass irq-number > > > > to irq-exit event too, because we may lost some previous events by buffer-overflow > > > > etc. > > > > > > > > trace_irq_entry(irq, NULL); > > > > ret = _handle_IRQ_event(irq, action); > > > > trace_irq_exit(irq, ret); > > > > ^^^^ > > > > > > > > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 > > > different places. So the above suggested irq tracepoint provides the > > > same information with 4 less tracepoints in the code. So I believe its > > > simpler - plus we can understand which action handlers are handling the > > > interrupt. > > > > > > > The main thing I dislike about only tracing action->handler() calls is > > that you are not tracing an IRQ per se, but rather the invocation of a > > given handler within the interrupt. For instance, it would be difficult > > to calculate the maximum interrupt latency for a given interrupt line, > > because you don't have the "real" irq entry/exit events, just the > > individual handler() calls. > > Then use the function_graph tracer. > Sadly, the function tracer cannot be enabled on production systems. Therefore it's not a usable solution in the context where I need this. Mathieu > # echo smp_apic_timer_interrupt > /debug/tracing/set_ftrace_filter > # echo __irqentry_text_start >> /debug/tracing/set_ftrace_filter > # echo function_graph > /debug/tracing/current_tracer > # cat /debug/tracing/trace > > # tracer: function_graph > # > # CPU DURATION FUNCTION CALLS > # | | | | | | | > ------------------------------------------ > 1) kblockd-4012 => <idle>-0 > ------------------------------------------ > > 1) ==========> | > 1) + 35.783 us | smp_apic_timer_interrupt(); > ------------------------------------------ > 2) kstop/2-4013 => <idle>-0 > ------------------------------------------ > > 2) ==========> | > 2) + 13.819 us | smp_apic_timer_interrupt(); > ------------------------------------------ > 3) kstop/3-4014 => <idle>-0 > ------------------------------------------ > > 3) ==========> | > 3) + 24.919 us | __irqentry_text_start(); > ------------------------------------------ > 0) kstop/0-4011 => <idle>-0 > ------------------------------------------ > > 0) ==========> | > 0) + 24.444 us | smp_apic_timer_interrupt(); > ------------------------------------------ > 1) <idle>-0 => bash-3899 > ------------------------------------------ > > 1) ==========> | > 1) + 13.642 us | smp_apic_timer_interrupt(); > 2) ==========> | > 2) + 15.014 us | smp_apic_timer_interrupt(); > 3) ==========> | > 3) + 31.004 us | smp_apic_timer_interrupt(); > ------------------------------------------ > 0) <idle>-0 => sshd-3892 > ------------------------------------------ > > 0) ==========> | > 0) + 15.655 us | __irqentry_text_start(); > 0) ==========> | > 0) + 37.722 us | __irqentry_text_start(); > ------------------------------------------ > 0) sshd-3892 => <idle>-0 > ------------------------------------------ > > 0) ==========> | > 0) + 26.139 us | __irqentry_text_start(); > 3) ==========> | > 3) + 27.240 us | __irqentry_text_start(); > 3) ==========> | > 3) + 21.987 us | __irqentry_text_start(); > > > And there you have your latencies ;-) > > With Jasons added trace points, we could add the to the event tracer > and those would show up as comments here. > > -- Steve > > > > > > > But I agree that knowing which handler is called is important. > > > > How about this compromise : > > > > trace_irq_entry(irq, action) > > _handle_IRQ_event() > > for each action { > > trace_irq_handler(action, ret); > > ret = action->handler(irq, action->dev_id); > > ... > > } > > trace_irq_exit(action_ret); > > > > Would that give you the information you need ? > > > > Here trace_irq_handler would be passed the _current_ action invoked and > > the _previous_ action return value. Note that we should initialize > > irqreturn_t ret to some initial value if we do this. That should keep > > the tracing overhead minimal. > > > -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 22:12 ` Mathieu Desnoyers @ 2009-02-25 22:20 ` Frederic Weisbecker 2009-02-25 23:13 ` Mathieu Desnoyers 2009-02-25 22:21 ` Steven Rostedt 1 sibling, 1 reply; 48+ messages in thread From: Frederic Weisbecker @ 2009-02-25 22:20 UTC (permalink / raw) To: Mathieu Desnoyers Cc: Steven Rostedt, Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme On Wed, Feb 25, 2009 at 05:12:10PM -0500, Mathieu Desnoyers wrote: > * Steven Rostedt (rostedt@goodmis.org) wrote: > > > > > > On Wed, 25 Feb 2009, Mathieu Desnoyers wrote: > > > > > * Jason Baron (jbaron@redhat.com) wrote: > > > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote: > > > > > KOSAKI Motohiro wrote: > > > > > >> /** > > > > > >> * handle_IRQ_event - irq action chain handler > > > > > >> * @irq: the interrupt number > > > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > > > > > >> local_irq_enable_in_hardirq(); > > > > > >> > > > > > >> do { > > > > > >> + trace_irq_entry(irq); > > > > > >> ret = action->handler(irq, action->dev_id); > > > > > >> + trace_irq_exit(irq, ret); > > > > > >> if (ret == IRQ_HANDLED) > > > > > >> status |= action->flags; > > > > > >> retval |= ret; > > > > > > > > > > > > Nobdy want unnecessary redundant tracepoint. > > > > > > Please discuss with mathieu, and merge his tracepoint. > > > > > > > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > > > > > is enough. However, from the same viewpoint, it should pass irq-number > > > > > to irq-exit event too, because we may lost some previous events by buffer-overflow > > > > > etc. > > > > > > > > > > trace_irq_entry(irq, NULL); > > > > > ret = _handle_IRQ_event(irq, action); > > > > > trace_irq_exit(irq, ret); > > > > > ^^^^ > > > > > > > > > > > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 > > > > different places. So the above suggested irq tracepoint provides the > > > > same information with 4 less tracepoints in the code. So I believe its > > > > simpler - plus we can understand which action handlers are handling the > > > > interrupt. > > > > > > > > > > The main thing I dislike about only tracing action->handler() calls is > > > that you are not tracing an IRQ per se, but rather the invocation of a > > > given handler within the interrupt. For instance, it would be difficult > > > to calculate the maximum interrupt latency for a given interrupt line, > > > because you don't have the "real" irq entry/exit events, just the > > > individual handler() calls. > > > > Then use the function_graph tracer. > > > > Sadly, the function tracer cannot be enabled on production systems. > Therefore it's not a usable solution in the context where I need this. > > Mathieu Why? If this is about the interrupt latency caused by stop_machine, I still don't understand. This latency happens before the tracing (function filter which use patching), not during tracing. > > # echo smp_apic_timer_interrupt > /debug/tracing/set_ftrace_filter > > # echo __irqentry_text_start >> /debug/tracing/set_ftrace_filter > > # echo function_graph > /debug/tracing/current_tracer > > # cat /debug/tracing/trace > > > > # tracer: function_graph > > # > > # CPU DURATION FUNCTION CALLS > > # | | | | | | | > > ------------------------------------------ > > 1) kblockd-4012 => <idle>-0 > > ------------------------------------------ > > > > 1) ==========> | > > 1) + 35.783 us | smp_apic_timer_interrupt(); > > ------------------------------------------ > > 2) kstop/2-4013 => <idle>-0 > > ------------------------------------------ > > > > 2) ==========> | > > 2) + 13.819 us | smp_apic_timer_interrupt(); > > ------------------------------------------ > > 3) kstop/3-4014 => <idle>-0 > > ------------------------------------------ > > > > 3) ==========> | > > 3) + 24.919 us | __irqentry_text_start(); > > ------------------------------------------ > > 0) kstop/0-4011 => <idle>-0 > > ------------------------------------------ > > > > 0) ==========> | > > 0) + 24.444 us | smp_apic_timer_interrupt(); > > ------------------------------------------ > > 1) <idle>-0 => bash-3899 > > ------------------------------------------ > > > > 1) ==========> | > > 1) + 13.642 us | smp_apic_timer_interrupt(); > > 2) ==========> | > > 2) + 15.014 us | smp_apic_timer_interrupt(); > > 3) ==========> | > > 3) + 31.004 us | smp_apic_timer_interrupt(); > > ------------------------------------------ > > 0) <idle>-0 => sshd-3892 > > ------------------------------------------ > > > > 0) ==========> | > > 0) + 15.655 us | __irqentry_text_start(); > > 0) ==========> | > > 0) + 37.722 us | __irqentry_text_start(); > > ------------------------------------------ > > 0) sshd-3892 => <idle>-0 > > ------------------------------------------ > > > > 0) ==========> | > > 0) + 26.139 us | __irqentry_text_start(); > > 3) ==========> | > > 3) + 27.240 us | __irqentry_text_start(); > > 3) ==========> | > > 3) + 21.987 us | __irqentry_text_start(); > > > > > > And there you have your latencies ;-) > > > > With Jasons added trace points, we could add the to the event tracer > > and those would show up as comments here. > > > > -- Steve > > > > > > > > > > > > But I agree that knowing which handler is called is important. > > > > > > How about this compromise : > > > > > > trace_irq_entry(irq, action) > > > _handle_IRQ_event() > > > for each action { > > > trace_irq_handler(action, ret); > > > ret = action->handler(irq, action->dev_id); > > > ... > > > } > > > trace_irq_exit(action_ret); > > > > > > Would that give you the information you need ? > > > > > > Here trace_irq_handler would be passed the _current_ action invoked and > > > the _previous_ action return value. Note that we should initialize > > > irqreturn_t ret to some initial value if we do this. That should keep > > > the tracing overhead minimal. > > > > > > > -- > Mathieu Desnoyers > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 22:20 ` Frederic Weisbecker @ 2009-02-25 23:13 ` Mathieu Desnoyers 2009-02-26 1:41 ` Steven Rostedt 0 siblings, 1 reply; 48+ messages in thread From: Mathieu Desnoyers @ 2009-02-25 23:13 UTC (permalink / raw) To: Frederic Weisbecker Cc: Steven Rostedt, Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme, Dominique Toupin * Frederic Weisbecker (fweisbec@gmail.com) wrote: > On Wed, Feb 25, 2009 at 05:12:10PM -0500, Mathieu Desnoyers wrote: > > * Steven Rostedt (rostedt@goodmis.org) wrote: > > > > > > > > > On Wed, 25 Feb 2009, Mathieu Desnoyers wrote: > > > > > > > * Jason Baron (jbaron@redhat.com) wrote: > > > > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote: > > > > > > KOSAKI Motohiro wrote: > > > > > > >> /** > > > > > > >> * handle_IRQ_event - irq action chain handler > > > > > > >> * @irq: the interrupt number > > > > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > > > > > > >> local_irq_enable_in_hardirq(); > > > > > > >> > > > > > > >> do { > > > > > > >> + trace_irq_entry(irq); > > > > > > >> ret = action->handler(irq, action->dev_id); > > > > > > >> + trace_irq_exit(irq, ret); > > > > > > >> if (ret == IRQ_HANDLED) > > > > > > >> status |= action->flags; > > > > > > >> retval |= ret; > > > > > > > > > > > > > > Nobdy want unnecessary redundant tracepoint. > > > > > > > Please discuss with mathieu, and merge his tracepoint. > > > > > > > > > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > > > > > > is enough. However, from the same viewpoint, it should pass irq-number > > > > > > to irq-exit event too, because we may lost some previous events by buffer-overflow > > > > > > etc. > > > > > > > > > > > > trace_irq_entry(irq, NULL); > > > > > > ret = _handle_IRQ_event(irq, action); > > > > > > trace_irq_exit(irq, ret); > > > > > > ^^^^ > > > > > > > > > > > > > > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 > > > > > different places. So the above suggested irq tracepoint provides the > > > > > same information with 4 less tracepoints in the code. So I believe its > > > > > simpler - plus we can understand which action handlers are handling the > > > > > interrupt. > > > > > > > > > > > > > The main thing I dislike about only tracing action->handler() calls is > > > > that you are not tracing an IRQ per se, but rather the invocation of a > > > > given handler within the interrupt. For instance, it would be difficult > > > > to calculate the maximum interrupt latency for a given interrupt line, > > > > because you don't have the "real" irq entry/exit events, just the > > > > individual handler() calls. > > > > > > Then use the function_graph tracer. > > > > > > > Sadly, the function tracer cannot be enabled on production systems. > > Therefore it's not a usable solution in the context where I need this. > > > > Mathieu > > > Why? > If this is about the interrupt latency caused by stop_machine, I still don't understand. > This latency happens before the tracing (function filter which use patching), not during > tracing. > Given this scenario : A telecommunication system runs, but the client notices something wrong. They call their service provider. The provider enables tracing _remotely_ on the _production system_ while it's _active in the field_. Bam, those few milliseconds interrupt latencies become unacceptable. Hopefully this scenario makes the use-case clearer. The problem is not that interrupt latencies would occur while tracing is on, but rather that it would happen on a running production system when switching tracing on. This is what is totally unacceptable for this use-case. For more details about such requirements, I'm CCing Dominique Toupin from Ericsson who I'm sure would be happy to give more details about this if needed. Mathieu > > > > # echo smp_apic_timer_interrupt > /debug/tracing/set_ftrace_filter > > > # echo __irqentry_text_start >> /debug/tracing/set_ftrace_filter > > > # echo function_graph > /debug/tracing/current_tracer > > > # cat /debug/tracing/trace > > > > > > # tracer: function_graph > > > # > > > # CPU DURATION FUNCTION CALLS > > > # | | | | | | | > > > ------------------------------------------ > > > 1) kblockd-4012 => <idle>-0 > > > ------------------------------------------ > > > > > > 1) ==========> | > > > 1) + 35.783 us | smp_apic_timer_interrupt(); > > > ------------------------------------------ > > > 2) kstop/2-4013 => <idle>-0 > > > ------------------------------------------ > > > > > > 2) ==========> | > > > 2) + 13.819 us | smp_apic_timer_interrupt(); > > > ------------------------------------------ > > > 3) kstop/3-4014 => <idle>-0 > > > ------------------------------------------ > > > > > > 3) ==========> | > > > 3) + 24.919 us | __irqentry_text_start(); > > > ------------------------------------------ > > > 0) kstop/0-4011 => <idle>-0 > > > ------------------------------------------ > > > > > > 0) ==========> | > > > 0) + 24.444 us | smp_apic_timer_interrupt(); > > > ------------------------------------------ > > > 1) <idle>-0 => bash-3899 > > > ------------------------------------------ > > > > > > 1) ==========> | > > > 1) + 13.642 us | smp_apic_timer_interrupt(); > > > 2) ==========> | > > > 2) + 15.014 us | smp_apic_timer_interrupt(); > > > 3) ==========> | > > > 3) + 31.004 us | smp_apic_timer_interrupt(); > > > ------------------------------------------ > > > 0) <idle>-0 => sshd-3892 > > > ------------------------------------------ > > > > > > 0) ==========> | > > > 0) + 15.655 us | __irqentry_text_start(); > > > 0) ==========> | > > > 0) + 37.722 us | __irqentry_text_start(); > > > ------------------------------------------ > > > 0) sshd-3892 => <idle>-0 > > > ------------------------------------------ > > > > > > 0) ==========> | > > > 0) + 26.139 us | __irqentry_text_start(); > > > 3) ==========> | > > > 3) + 27.240 us | __irqentry_text_start(); > > > 3) ==========> | > > > 3) + 21.987 us | __irqentry_text_start(); > > > > > > > > > And there you have your latencies ;-) > > > > > > With Jasons added trace points, we could add the to the event tracer > > > and those would show up as comments here. > > > > > > -- Steve > > > > > > > > > > > > > > > > > But I agree that knowing which handler is called is important. > > > > > > > > How about this compromise : > > > > > > > > trace_irq_entry(irq, action) > > > > _handle_IRQ_event() > > > > for each action { > > > > trace_irq_handler(action, ret); > > > > ret = action->handler(irq, action->dev_id); > > > > ... > > > > } > > > > trace_irq_exit(action_ret); > > > > > > > > Would that give you the information you need ? > > > > > > > > Here trace_irq_handler would be passed the _current_ action invoked and > > > > the _previous_ action return value. Note that we should initialize > > > > irqreturn_t ret to some initial value if we do this. That should keep > > > > the tracing overhead minimal. > > > > > > > > > > > -- > > Mathieu Desnoyers > > OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 > -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 23:13 ` Mathieu Desnoyers @ 2009-02-26 1:41 ` Steven Rostedt 2009-02-26 12:37 ` Dominique Toupin 0 siblings, 1 reply; 48+ messages in thread From: Steven Rostedt @ 2009-02-26 1:41 UTC (permalink / raw) To: Mathieu Desnoyers Cc: Frederic Weisbecker, Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme, Dominique Toupin On Wed, 25 Feb 2009, Mathieu Desnoyers wrote: > > > > > > > > Then use the function_graph tracer. > > > > > > > > > > Sadly, the function tracer cannot be enabled on production systems. > > > Therefore it's not a usable solution in the context where I need this. > > > > > > Mathieu > > > > > > Why? > > If this is about the interrupt latency caused by stop_machine, I still don't understand. > > This latency happens before the tracing (function filter which use patching), not during > > tracing. > > > > Given this scenario : > > A telecommunication system runs, but the client notices something wrong. > They call their service provider. The provider enables tracing > _remotely_ on the _production system_ while it's _active in the field_. > > Bam, those few milliseconds interrupt latencies become unacceptable. > > Hopefully this scenario makes the use-case clearer. The problem is not > that interrupt latencies would occur while tracing is on, but rather > that it would happen on a running production system when switching > tracing on. This is what is totally unacceptable for this use-case. > > For more details about such requirements, I'm CCing Dominique Toupin > from Ericsson who I'm sure would be happy to give more details about > this if needed. Hmm, so this system in the field is running Linux with the Real-Time Patch? Because if it isn't it will suffer from millisecond latencies in normal operation. -- Steve ^ permalink raw reply [flat|nested] 48+ messages in thread
* RE: [PATCH] new irq tracer 2009-02-26 1:41 ` Steven Rostedt @ 2009-02-26 12:37 ` Dominique Toupin 2009-02-27 3:14 ` KOSAKI Motohiro 0 siblings, 1 reply; 48+ messages in thread From: Dominique Toupin @ 2009-02-26 12:37 UTC (permalink / raw) To: Steven Rostedt, Mathieu Desnoyers Cc: Frederic Weisbecker, Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme In many cases we don't use Linux real-time, we have many systems that are soft-real-time an non real-time Linux is good enough. > -----Original Message----- > From: Steven Rostedt [mailto:rostedt@goodmis.org] > Sent: 25-Feb-09 20:42 > To: Mathieu Desnoyers > Cc: Frederic Weisbecker; Jason Baron; Masami Hiramatsu; > KOSAKI Motohiro; Peter Zijlstra; Frank Ch. Eigler; > mingo@elte.hu; linux-kernel@vger.kernel.org; > acme@ghostprotocols.net; Dominique Toupin > Subject: Re: [PATCH] new irq tracer > > > On Wed, 25 Feb 2009, Mathieu Desnoyers wrote: > > > > > > > > > > Then use the function_graph tracer. > > > > > > > > > > > > > Sadly, the function tracer cannot be enabled on > production systems. > > > > Therefore it's not a usable solution in the context > where I need this. > > > > > > > > Mathieu > > > > > > > > > Why? > > > If this is about the interrupt latency caused by > stop_machine, I still don't understand. > > > This latency happens before the tracing (function filter > which use > > > patching), not during tracing. > > > > > > > Given this scenario : > > > > A telecommunication system runs, but the client notices > something wrong. > > They call their service provider. The provider enables tracing > > _remotely_ on the _production system_ while it's _active in > the field_. > > > > Bam, those few milliseconds interrupt latencies become unacceptable. > > > > Hopefully this scenario makes the use-case clearer. The > problem is not > > that interrupt latencies would occur while tracing is on, > but rather > > that it would happen on a running production system when switching > > tracing on. This is what is totally unacceptable for this use-case. > > > > For more details about such requirements, I'm CCing > Dominique Toupin > > from Ericsson who I'm sure would be happy to give more > details about > > this if needed. > > Hmm, so this system in the field is running Linux with the > Real-Time Patch? Because if it isn't it will suffer from > millisecond latencies in normal operation. > > -- Steve > > ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-26 12:37 ` Dominique Toupin @ 2009-02-27 3:14 ` KOSAKI Motohiro 2009-02-27 3:29 ` Steven Rostedt ` (2 more replies) 0 siblings, 3 replies; 48+ messages in thread From: KOSAKI Motohiro @ 2009-02-27 3:14 UTC (permalink / raw) To: Dominique Toupin Cc: kosaki.motohiro, Steven Rostedt, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme > > > Given this scenario : > > > > > > A telecommunication system runs, but the client notices > > something wrong. > > > They call their service provider. The provider enables tracing > > > _remotely_ on the _production system_ while it's _active in > > the field_. > > > > > > Bam, those few milliseconds interrupt latencies become unacceptable. > > > > > > Hopefully this scenario makes the use-case clearer. The > > problem is not > > > that interrupt latencies would occur while tracing is on, > > but rather > > > that it would happen on a running production system when switching > > > tracing on. This is what is totally unacceptable for this use-case. > > > > > > For more details about such requirements, I'm CCing > > Dominique Toupin > > > from Ericsson who I'm sure would be happy to give more > > details about > > > this if needed. > > > > Hmm, so this system in the field is running Linux with the > > Real-Time Patch? Because if it isn't it will suffer from > > millisecond latencies in normal operation. > > In many cases we don't use Linux real-time, we have many systems that > are soft-real-time an non real-time Linux is good enough. > Agreed, rt-patch seems off topics. we discuss to mainline kernel. Steven, I sitll think nobody use ftrace on production system now yet. Do you know actual production user? ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 3:14 ` KOSAKI Motohiro @ 2009-02-27 3:29 ` Steven Rostedt 2009-02-27 3:36 ` Steven Rostedt 2009-02-27 7:23 ` Peter Zijlstra 2 siblings, 0 replies; 48+ messages in thread From: Steven Rostedt @ 2009-02-27 3:29 UTC (permalink / raw) To: KOSAKI Motohiro Cc: Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme On Fri, 27 Feb 2009, KOSAKI Motohiro wrote: > > > > Given this scenario : > > > > > > > > A telecommunication system runs, but the client notices > > > something wrong. > > > > They call their service provider. The provider enables tracing > > > > _remotely_ on the _production system_ while it's _active in > > > the field_. > > > > > > > > Bam, those few milliseconds interrupt latencies become unacceptable. > > > > > > > > Hopefully this scenario makes the use-case clearer. The > > > problem is not > > > > that interrupt latencies would occur while tracing is on, > > > but rather > > > > that it would happen on a running production system when switching > > > > tracing on. This is what is totally unacceptable for this use-case. > > > > > > > > For more details about such requirements, I'm CCing > > > Dominique Toupin > > > > from Ericsson who I'm sure would be happy to give more > > > details about > > > > this if needed. > > > > > > Hmm, so this system in the field is running Linux with the > > > Real-Time Patch? Because if it isn't it will suffer from > > > millisecond latencies in normal operation. > > > > In many cases we don't use Linux real-time, we have many systems that > > are soft-real-time an non real-time Linux is good enough. > > > > Agreed, rt-patch seems off topics. we discuss to mainline kernel. > > Steven, I sitll think nobody use ftrace on production system now yet. > Do you know actual production user? First lets separate out "ftrace" and the function tracer. ftrace is the infrastructure that has schedule tracing, event tracing and lots and lots of other kinds of tracing. The "stop_machine" talked about in this thread is specific for dynamic_ftrace (which is used for the function and graph tracers). Your question about "production user" is a little unfair, since function tracing did not appear till 2.6.28. (I'm ignoring 2.6.27, since the version of the function tracer there had a notorious bug). No distribution is shipping a production 2.6.28 AFIAK. Which means, I do not know of any production use of the function tracer. I have already recommended that people enable the function tracer if they are using 2.6.28 in a production environment. Given some time, when 2.6.28 is more in use, my answer would probably be "Yes, I do know it is used in a production environment". -- Steve ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 3:14 ` KOSAKI Motohiro 2009-02-27 3:29 ` Steven Rostedt @ 2009-02-27 3:36 ` Steven Rostedt 2009-02-27 7:48 ` KOSAKI Motohiro 2009-02-27 7:23 ` Peter Zijlstra 2 siblings, 1 reply; 48+ messages in thread From: Steven Rostedt @ 2009-02-27 3:36 UTC (permalink / raw) To: KOSAKI Motohiro Cc: Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme On Fri, 27 Feb 2009, KOSAKI Motohiro wrote: > > > > In many cases we don't use Linux real-time, we have many systems that > > are soft-real-time an non real-time Linux is good enough. > > > > Agreed, rt-patch seems off topics. we discuss to mainline kernel. The reason I brought up the rt-patch is that Mathieu is concerned about the small latency that happens when we run stop_machine to switch the nops to pointers to trace functions. This latency can be a couple of millisecs, but I can easily make a normal kernel suffer a couple of millisec latency by simply running hackbench (as a normal user). The point being, I think Mathieu's point about the latency of enabling the function tracer is frivolous. It is a one time shot that happens when the sysadmin purposely enables the function tracing. If this is unacceptable, then the system had better be running the rt patch because it will be hitting this unacceptable latency in normal operation. -- Steve ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 3:36 ` Steven Rostedt @ 2009-02-27 7:48 ` KOSAKI Motohiro 2009-02-27 8:06 ` Peter Zijlstra 2009-02-27 14:43 ` Steven Rostedt 0 siblings, 2 replies; 48+ messages in thread From: KOSAKI Motohiro @ 2009-02-27 7:48 UTC (permalink / raw) To: Steven Rostedt Cc: kosaki.motohiro, Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme > > On Fri, 27 Feb 2009, KOSAKI Motohiro wrote: > > > > > > In many cases we don't use Linux real-time, we have many systems that > > > are soft-real-time an non real-time Linux is good enough. > > > > > > > Agreed, rt-patch seems off topics. we discuss to mainline kernel. > > The reason I brought up the rt-patch is that Mathieu is concerned about > the small latency that happens when we run stop_machine to switch the nops > to pointers to trace functions. This latency can be a couple of millisecs, > but I can easily make a normal kernel suffer a couple of millisec latency > by simply running hackbench (as a normal user). > > The point being, I think Mathieu's point about the latency of enabling the > function tracer is frivolous. It is a one time shot that happens when the > sysadmin purposely enables the function tracing. If this is unacceptable, > then the system had better be running the rt patch because it will be > hitting this unacceptable latency in normal operation. ok, I missed last thread. thanks. I think you talk about "ftrace, x86: make kernel text writable only for conversions" thread, right? hm, it seems very good discussion. My concern is, both your and Mathieu's point is fairly reasonable and good direction. However, I would bring up another viewpoint. so, - the latency issue can happen actual production server? Mathieu and Dominique explained ACTUAL production server problem. but I believe Dominique's server never run hackbench. In other word, I agree mainline kernel has many latency corner case problems. but I don't agree corner case existing indicate no need latency improvement. Generally, actual experience indicate it's valuable to hear them reqirement. HOWEVER, I still also agree with your disliking messiness. Then, I would suggest we go into next step. I mean make the patch of mathieu's plan and mesure it. Then we can mesure number by number. my number mean, (1) How much increasing line and messiness (2) How much improving latency that's fairly comparision and typical lkml discussion style, imho. What do you think? ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 7:48 ` KOSAKI Motohiro @ 2009-02-27 8:06 ` Peter Zijlstra 2009-02-27 8:13 ` KOSAKI Motohiro 2009-02-27 14:43 ` Steven Rostedt 1 sibling, 1 reply; 48+ messages in thread From: Peter Zijlstra @ 2009-02-27 8:06 UTC (permalink / raw) To: KOSAKI Motohiro Cc: Steven Rostedt, Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Frank Ch. Eigler, mingo, linux-kernel, acme On Fri, 2009-02-27 at 16:48 +0900, KOSAKI Motohiro wrote: > I mean make the patch of mathieu's plan and mesure it. Then > we can mesure number by number. my number mean, > > (1) How much increasing line and messiness > (2) How much improving latency > > that's fairly comparision and typical lkml discussion style, imho. > > What do you think? Patches are welcome ;-) ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 8:06 ` Peter Zijlstra @ 2009-02-27 8:13 ` KOSAKI Motohiro 2009-02-27 13:10 ` Arnaldo Carvalho de Melo 0 siblings, 1 reply; 48+ messages in thread From: KOSAKI Motohiro @ 2009-02-27 8:13 UTC (permalink / raw) To: Peter Zijlstra Cc: kosaki.motohiro, Steven Rostedt, Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Frank Ch. Eigler, mingo, linux-kernel, acme > On Fri, 2009-02-27 at 16:48 +0900, KOSAKI Motohiro wrote: > > > I mean make the patch of mathieu's plan and mesure it. Then > > we can mesure number by number. my number mean, > > > > (1) How much increasing line and messiness > > (2) How much improving latency > > > > that's fairly comparision and typical lkml discussion style, imho. > > > > What do you think? > > Patches are welcome ;-) Why sad face? Am I talking about funny thing? ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 8:13 ` KOSAKI Motohiro @ 2009-02-27 13:10 ` Arnaldo Carvalho de Melo 0 siblings, 0 replies; 48+ messages in thread From: Arnaldo Carvalho de Melo @ 2009-02-27 13:10 UTC (permalink / raw) To: KOSAKI Motohiro Cc: Peter Zijlstra, Steven Rostedt, Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Frank Ch. Eigler, mingo, linux-kernel On Fri, Feb 27, 2009 at 5:13 AM, KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> wrote: >> On Fri, 2009-02-27 at 16:48 +0900, KOSAKI Motohiro wrote: >> >> > I mean make the patch of mathieu's plan and mesure it. Then >> > we can mesure number by number. my number mean, >> > >> > (1) How much increasing line and messiness >> > (2) How much improving latency >> > >> > that's fairly comparision and typical lkml discussion style, imho. >> > >> > What do you think? >> >> Patches are welcome ;-) > > Why sad face? Am I talking about funny thing? Nope, its just that the way you're talking is looking too managerial. IOW, instead of proposing measurements, do them and show the results, how does that sound? And ;-) is not a sad face in my dictionary, but an ironic one :-) - Arnaldo ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 7:48 ` KOSAKI Motohiro 2009-02-27 8:06 ` Peter Zijlstra @ 2009-02-27 14:43 ` Steven Rostedt 1 sibling, 0 replies; 48+ messages in thread From: Steven Rostedt @ 2009-02-27 14:43 UTC (permalink / raw) To: KOSAKI Motohiro Cc: Dominique Toupin, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler, Ingo Molnar, LKML, Arnaldo Carvalho de Melo On Fri, 27 Feb 2009, KOSAKI Motohiro wrote: > > > > On Fri, 27 Feb 2009, KOSAKI Motohiro wrote: > > > > > > > > In many cases we don't use Linux real-time, we have many systems that > > > > are soft-real-time an non real-time Linux is good enough. > > > > > > > > > > Agreed, rt-patch seems off topics. we discuss to mainline kernel. > > > > The reason I brought up the rt-patch is that Mathieu is concerned about > > the small latency that happens when we run stop_machine to switch the nops > > to pointers to trace functions. This latency can be a couple of millisecs, > > but I can easily make a normal kernel suffer a couple of millisec latency > > by simply running hackbench (as a normal user). > > > > The point being, I think Mathieu's point about the latency of enabling the > > function tracer is frivolous. It is a one time shot that happens when the > > sysadmin purposely enables the function tracing. If this is unacceptable, > > then the system had better be running the rt patch because it will be > > hitting this unacceptable latency in normal operation. > > ok, I missed last thread. thanks. > > I think you talk about "ftrace, x86: make kernel text writable only for > conversions" thread, right? > hm, it seems very good discussion. Yes, and as Mathieu and I finally convinced each other that there are two different issues there (we both were trying to convince the other about the same point. Sorta like the two blind man explaining what an Elephant looks like ;-) text_poke vs touching kernel mappings break_points vs stop_machine The real issue came down to the latter. > > My concern is, both your and Mathieu's point is fairly reasonable and > good direction. > > However, I would bring up another viewpoint. so, > > - the latency issue can happen actual production server? > > > Mathieu and Dominique explained ACTUAL production server problem. > but I believe Dominique's server never run hackbench. > > In other word, I agree mainline kernel has many latency corner case problems. > but I don't agree corner case existing indicate no need latency improvement. > Generally, actual experience indicate it's valuable to hear them requirement. Then the production system had better not do any load at all. hackbench is a trivial way to produce the problem, and I used it only as an example that one can see. But anytime you have a large quantity of requests coming in, and waking up many threads, you run the risk of taking a 3 or 4ms interrupts off latency. I've measured this running database applications. Using 2.6.28, I did some measurements of the function tracer stop_machine. Simply by using irqsoff tracer: # wc -l available_filter_functions 19043 That is 19 thousand available trace points in the kernel. All of them are nops when tracing is disabled. But to activate all of them by running stop_machine can take a bit of time. It will also add a bit of overhead to the system when tracing is active simply because you have 19 thousand trace points! # cd /debug/tracing # echo irqsoff > current_tracer # sysctl kernel.ftrace_enabled kernel.ftrace_enabled = 1 # echo 0 > tracing_max_latency # sysctl kernel.ftrace_enabled=0 kernel.ftrace_enabled = 0 # cat latency_trace > full-disable.txt # echo 0 > tracing_max_latency # sysctl kernel.ftrace_enabled=1 kernel.ftrace_enabled = 1 # cat latency_trace > full-enable.txt # echo do_fork > set_ftrace_filter # echo 0 > tracing_max_latency # sysctl kernel.ftrace_enabled=0 kernel.ftrace_enabled = 0 # cat latency_trace > one-func-disable.txt # echo 0 > tracing_max_latency # sysctl kernel.ftrace_enabled=1 kernel.ftrace_enabled = 1 # cat latency_trace > one-func-enable.txt # head available_filter_functions > set_ftrace_filter # echo 0 > tracing_max_latency # sysctl kernel.ftrace_enabled=0 kernel.ftrace_enabled = 0 # cat latency_trace > ten-func-disable.txt # echo 0 > tracing_max_latency # sysctl kernel.ftrace_enabled=1 kernel.ftrace_enabled = 1 # cat latency_trace > ten-func-enable.txt # head -100 available_filter_functions > set_ftrace_filter # echo 0 > tracing_max_latency # sysctl kernel.ftrace_enabled=0 kernel.ftrace_enabled = 0 # cat latency_trace > hundred-func-disable.txt # echo 0 > tracing_max_latency # sysctl kernel.ftrace_enabled=1 kernel.ftrace_enabled = 1 # cat latency_trace > hundred-func-enable.txt # sysctrl kernel.ftrace_enabled=0 kernel.ftrace_enabled = 0 # echo 0 > tracing_max_latency # /usr/local/bin/hackbench 50 Time: 18.693 # cat latency_trace > hackbench.txt The above output traces and the available_filter_functions can be found at http://rostedt.homelinux.com/traces/ The actual measured latencies are up on the top left corner. The trace itself might be a little off because it is still recording the trace after it recorded the latency, and you might see a few more microsecs in the trace output. Here's the latencies: one-disable: 564us one-enable: 567us ten-disable: 547us ten-enable: 549us Funny how enabling 10 took less than one? Perhaps cache issues. hundred-disable: 580us hundred-enable: 581us full-disable: (19043) 9352us full-enable: (19043) 19081us hackbench: 3859us For some reason, full enabling always seems to take longer than full disabling. Not sure why. Now lets look at this data. You can enable 100 different call sites in just over a half a millisec. The latency that Mathieu is complaining about is just a half a millisec. That I can guarantee you, a production system doing any kind of work, will hit that latency, with any non RT Linux kernel. Yes, you could say, "19ms! That is a lot!" Well, do not enable all 19 thousand trace points at once. Just having them enabled will cause a bit of overhead that is noticeable. Mathieu pointed out that I said I would not have this done on an active production system. He is right. Although if something is going wrong with the production system, it may be worth taking this hit to analyze the situation. Or ever a periodical thing on off hours with a notice to customers that analysis of the system is being performed from (2am to 3am EST), expect some degradation in performance. You can do this. It is a controlled environment. > > HOWEVER, I still also agree with your disliking messiness. > Then, I would suggest we go into next step. > > I mean make the patch of Mathieu's plan and measure it. Then > we can mesure number by number. my number mean, > > (1) How much increasing line and messiness > (2) How much improving latency > > that's fairly comparision and typical lkml discussion style, imho. > > What do you think? The problem with the above is that to replace stop_machine, I would need to implement the breakpoint method. This is by no means a trivial change. It may take weeks to get right. And for what? To get rid of a half a millisec latency that can happen at any time during normal operation? Note, the stop_machine method can be ported to other archs trivially. David Miller ported it in 20 minutes to Sparc64. A break point scheme would take a lot more development by each arch maintainer, and risk adding many more bugs. -- Steve ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 3:14 ` KOSAKI Motohiro 2009-02-27 3:29 ` Steven Rostedt 2009-02-27 3:36 ` Steven Rostedt @ 2009-02-27 7:23 ` Peter Zijlstra 2 siblings, 0 replies; 48+ messages in thread From: Peter Zijlstra @ 2009-02-27 7:23 UTC (permalink / raw) To: KOSAKI Motohiro Cc: Dominique Toupin, Steven Rostedt, Mathieu Desnoyers, Frederic Weisbecker, Jason Baron, Masami Hiramatsu, Frank Ch. Eigler, mingo, linux-kernel, acme On Fri, 2009-02-27 at 12:14 +0900, KOSAKI Motohiro wrote: > > > > Given this scenario : > > > > > > > > A telecommunication system runs, but the client notices > > > something wrong. > > > > They call their service provider. The provider enables tracing > > > > _remotely_ on the _production system_ while it's _active in > > > the field_. > > > > > > > > Bam, those few milliseconds interrupt latencies become unacceptable. > > > > > > > > Hopefully this scenario makes the use-case clearer. The > > > problem is not > > > > that interrupt latencies would occur while tracing is on, > > > but rather > > > > that it would happen on a running production system when switching > > > > tracing on. This is what is totally unacceptable for this use-case. > > > > > > > > For more details about such requirements, I'm CCing > > > Dominique Toupin > > > > from Ericsson who I'm sure would be happy to give more > > > details about > > > > this if needed. > > > > > > Hmm, so this system in the field is running Linux with the > > > Real-Time Patch? Because if it isn't it will suffer from > > > millisecond latencies in normal operation. > > > > In many cases we don't use Linux real-time, we have many systems that > > are soft-real-time an non real-time Linux is good enough. > > > > Agreed, rt-patch seems off topics. we discuss to mainline kernel. Exactly, hence you should not worry about ms irq-off latencies, since mainline is happy to generate those for you, regardless of function-trace. The only way to complain about those is if your base system is better than that, and currently only preempt-rt provides that. So unless you're on preempt-rt, complaining about ms-order irq latencies just isn't a valid argument. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 22:12 ` Mathieu Desnoyers 2009-02-25 22:20 ` Frederic Weisbecker @ 2009-02-25 22:21 ` Steven Rostedt 1 sibling, 0 replies; 48+ messages in thread From: Steven Rostedt @ 2009-02-25 22:21 UTC (permalink / raw) To: Mathieu Desnoyers Cc: Jason Baron, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, linux-kernel, acme, fweisbec On Wed, 25 Feb 2009, Mathieu Desnoyers wrote: > > > The main thing I dislike about only tracing action->handler() calls is > > > that you are not tracing an IRQ per se, but rather the invocation of a > > > given handler within the interrupt. For instance, it would be difficult > > > to calculate the maximum interrupt latency for a given interrupt line, > > > because you don't have the "real" irq entry/exit events, just the > > > individual handler() calls. > > > > Then use the function_graph tracer. > > > > Sadly, the function tracer cannot be enabled on production systems. > Therefore it's not a usable solution in the context where I need this. And why not? It has zero overhead when disabled. -- Steve ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 17:34 ` Mathieu Desnoyers 2009-02-25 18:05 ` Steven Rostedt @ 2009-02-26 15:11 ` Jason Baron 2009-02-26 15:32 ` Steven Rostedt ` (3 more replies) 2009-02-27 3:33 ` KOSAKI Motohiro 2 siblings, 4 replies; 48+ messages in thread From: Jason Baron @ 2009-02-26 15:11 UTC (permalink / raw) To: Mathieu Desnoyers, mingo Cc: Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec On Wed, Feb 25, 2009 at 12:34:12PM -0500, Mathieu Desnoyers wrote: > * Jason Baron (jbaron@redhat.com) wrote: > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote: > > > KOSAKI Motohiro wrote: > > > >> /** > > > >> * handle_IRQ_event - irq action chain handler > > > >> * @irq: the interrupt number > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > > > >> local_irq_enable_in_hardirq(); > > > >> > > > >> do { > > > >> + trace_irq_entry(irq); > > > >> ret = action->handler(irq, action->dev_id); > > > >> + trace_irq_exit(irq, ret); > > > >> if (ret == IRQ_HANDLED) > > > >> status |= action->flags; > > > >> retval |= ret; > > > > > > > > Nobdy want unnecessary redundant tracepoint. > > > > Please discuss with mathieu, and merge his tracepoint. > > > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > > > is enough. However, from the same viewpoint, it should pass irq-number > > > to irq-exit event too, because we may lost some previous events by buffer-overflow > > > etc. > > > > > > trace_irq_entry(irq, NULL); > > > ret = _handle_IRQ_event(irq, action); > > > trace_irq_exit(irq, ret); > > > ^^^^ > > > > > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 > > different places. So the above suggested irq tracepoint provides the > > same information with 4 less tracepoints in the code. So I believe its > > simpler - plus we can understand which action handlers are handling the > > interrupt. > > > > The main thing I dislike about only tracing action->handler() calls is > that you are not tracing an IRQ per se, but rather the invocation of a > given handler within the interrupt. For instance, it would be difficult > to calculate the maximum interrupt latency for a given interrupt line, > because you don't have the "real" irq entry/exit events, just the > individual handler() calls. > > But I agree that knowing which handler is called is important. > > How about this compromise : > > trace_irq_entry(irq, action) > _handle_IRQ_event() > for each action { > trace_irq_handler(action, ret); > ret = action->handler(irq, action->dev_id); > ... > } > trace_irq_exit(action_ret); > > Would that give you the information you need ? > > Here trace_irq_handler would be passed the _current_ action invoked and > the _previous_ action return value. Note that we should initialize > irqreturn_t ret to some initial value if we do this. That should keep > the tracing overhead minimal. > maybe...although that would require re-arranging the 'while' loop in 'handle_IRQ_event' from a do..while loop to a 'while' loop, which will require an extra branch check, and then we still have to record the last 'ret' value. I'm not that keen on re-arranging this for trace data... Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace as follows using the original two tracepoints (patch below): 3) | handle_IRQ_event() { 3) | /* (irq_handler_entry) irq=28 handler=eth0 */ 3) | e1000_intr_msi() { 3) 2.460 us | __napi_schedule(); 3) 9.416 us | } 3) | /* (irq_handler_exit) irq=28 handler=eth0 return=handled */ 3) + 22.935 us | } thanks, -Jason Signed-off-by: Jason Baron <jbaron@redhat.com> --- include/trace/irq.h | 9 +++++++++ include/trace/irq_event_types.h | 17 +++++++++++++++++ kernel/irq/handle.c | 6 ++++++ kernel/trace/events.c | 2 ++ 4 files changed, 34 insertions(+), 0 deletions(-) create mode 100644 include/trace/irq.h create mode 100644 include/trace/irq_event_types.h diff --git a/include/trace/irq.h b/include/trace/irq.h new file mode 100644 index 0000000..ff5d449 --- /dev/null +++ b/include/trace/irq.h @@ -0,0 +1,9 @@ +#ifndef _TRACE_IRQ_H +#define _TRACE_IRQ_H + +#include <linux/interrupt.h> +#include <linux/tracepoint.h> + +#include <trace/irq_event_types.h> + +#endif diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h new file mode 100644 index 0000000..5889bfd --- /dev/null +++ b/include/trace/irq_event_types.h @@ -0,0 +1,17 @@ + +/* use <trace/irq.h> instead */ +#ifndef DEFINE_TRACE_FMT +# error Do not include this file directly. +# error Unless you know what you are doing. +#endif + +DEFINE_TRACE_FMT(irq_handler_entry, + TPPROTO(int irq, struct irqaction *action), + TPARGS(irq, action), + TPFMT("irq=%d handler=%s", irq, action->name)); + +DEFINE_TRACE_FMT(irq_handler_exit, + TPPROTO(int irq, struct irqaction *action, int ret), + TPARGS(irq, action, ret), + TPFMT("irq=%d handler=%s return=%s", + irq, action->name, ret ? "handled" : "unhandled")); diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c index fdff380..bcebc2e 100644 --- a/kernel/irq/handle.c +++ b/kernel/irq/handle.c @@ -18,6 +18,7 @@ #include <linux/rculist.h> #include <linux/hash.h> #include <linux/bootmem.h> +#include <trace/irq.h> #include "internals.h" @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id) return IRQ_NONE; } +DEFINE_TRACE(irq_handler_entry); +DEFINE_TRACE(irq_handler_exit); + /** * handle_IRQ_event - irq action chain handler * @irq: the interrupt number @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) local_irq_enable_in_hardirq(); do { + trace_irq_handler_entry(irq, action); ret = action->handler(irq, action->dev_id); + trace_irq_handler_exit(irq, action, ret); if (ret == IRQ_HANDLED) status |= action->flags; retval |= ret; diff --git a/kernel/trace/events.c b/kernel/trace/events.c index 38c89ee..3c75623 100644 --- a/kernel/trace/events.c +++ b/kernel/trace/events.c @@ -6,8 +6,10 @@ /* trace/<type>.h here */ #include <trace/sched.h> +#include <trace/irq.h> #include "trace_events.h" /* trace/<type>_event_types.h here */ #include <trace/sched_event_types.h> +#include <trace/irq_event_types.h> ^ permalink raw reply related [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-26 15:11 ` Jason Baron @ 2009-02-26 15:32 ` Steven Rostedt 2009-02-26 15:35 ` Ingo Molnar 2009-02-26 15:40 ` Peter Zijlstra ` (2 subsequent siblings) 3 siblings, 1 reply; 48+ messages in thread From: Steven Rostedt @ 2009-02-26 15:32 UTC (permalink / raw) To: Jason Baron Cc: Mathieu Desnoyers, mingo, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, linux-kernel, acme, fweisbec On Thu, 26 Feb 2009, Jason Baron wrote: > > maybe...although that would require re-arranging the 'while' loop in > 'handle_IRQ_event' from a do..while loop to a 'while' loop, which will > require an extra branch check, and then we still have to record the last 'ret' > value. I'm not that keen on re-arranging this for trace data... > > Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace > as follows using the original two tracepoints (patch below): > > 3) | handle_IRQ_event() { > 3) | /* (irq_handler_entry) irq=28 handler=eth0 */ > 3) | e1000_intr_msi() { > 3) 2.460 us | __napi_schedule(); > 3) 9.416 us | } > 3) | /* (irq_handler_exit) irq=28 handler=eth0 return=handled */ > 3) + 22.935 us | } > > thanks, > > -Jason Looks good, one minor note: > > > Signed-off-by: Jason Baron <jbaron@redhat.com> > --- > > include/trace/irq.h | 9 +++++++++ > include/trace/irq_event_types.h | 17 +++++++++++++++++ > kernel/irq/handle.c | 6 ++++++ > kernel/trace/events.c | 2 ++ > 4 files changed, 34 insertions(+), 0 deletions(-) > create mode 100644 include/trace/irq.h > create mode 100644 include/trace/irq_event_types.h > > > diff --git a/include/trace/irq.h b/include/trace/irq.h > new file mode 100644 > index 0000000..ff5d449 > --- /dev/null > +++ b/include/trace/irq.h > @@ -0,0 +1,9 @@ > +#ifndef _TRACE_IRQ_H > +#define _TRACE_IRQ_H > + > +#include <linux/interrupt.h> > +#include <linux/tracepoint.h> > + > +#include <trace/irq_event_types.h> > + > +#endif > diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h > new file mode 100644 > index 0000000..5889bfd > --- /dev/null > +++ b/include/trace/irq_event_types.h > @@ -0,0 +1,17 @@ > + > +/* use <trace/irq.h> instead */ > +#ifndef DEFINE_TRACE_FMT > +# error Do not include this file directly. > +# error Unless you know what you are doing. > +#endif > + > +DEFINE_TRACE_FMT(irq_handler_entry, I've renamed DEFINE_TRACE_FMT to TRACE_FORMAT. I believe that is already in Ingo's tree. Other than that: Acked-by: Steven Rostedt <srostedt@redhat.com> Thanks, -- Steve > + TPPROTO(int irq, struct irqaction *action), > + TPARGS(irq, action), > + TPFMT("irq=%d handler=%s", irq, action->name)); > + > +DEFINE_TRACE_FMT(irq_handler_exit, > + TPPROTO(int irq, struct irqaction *action, int ret), > + TPARGS(irq, action, ret), > + TPFMT("irq=%d handler=%s return=%s", > + irq, action->name, ret ? "handled" : "unhandled")); > diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c > index fdff380..bcebc2e 100644 > --- a/kernel/irq/handle.c > +++ b/kernel/irq/handle.c > @@ -18,6 +18,7 @@ > #include <linux/rculist.h> > #include <linux/hash.h> > #include <linux/bootmem.h> > +#include <trace/irq.h> > > #include "internals.h" > > @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id) > return IRQ_NONE; > } > > +DEFINE_TRACE(irq_handler_entry); > +DEFINE_TRACE(irq_handler_exit); > + > /** > * handle_IRQ_event - irq action chain handler > * @irq: the interrupt number > @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > local_irq_enable_in_hardirq(); > > do { > + trace_irq_handler_entry(irq, action); > ret = action->handler(irq, action->dev_id); > + trace_irq_handler_exit(irq, action, ret); > if (ret == IRQ_HANDLED) > status |= action->flags; > retval |= ret; > diff --git a/kernel/trace/events.c b/kernel/trace/events.c > index 38c89ee..3c75623 100644 > --- a/kernel/trace/events.c > +++ b/kernel/trace/events.c > @@ -6,8 +6,10 @@ > > /* trace/<type>.h here */ > #include <trace/sched.h> > +#include <trace/irq.h> > > #include "trace_events.h" > > /* trace/<type>_event_types.h here */ > #include <trace/sched_event_types.h> > +#include <trace/irq_event_types.h> > > > > > > > > ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-26 15:32 ` Steven Rostedt @ 2009-02-26 15:35 ` Ingo Molnar 0 siblings, 0 replies; 48+ messages in thread From: Ingo Molnar @ 2009-02-26 15:35 UTC (permalink / raw) To: Steven Rostedt Cc: Jason Baron, Mathieu Desnoyers, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, linux-kernel, acme, fweisbec * Steven Rostedt <rostedt@goodmis.org> wrote: > On Thu, 26 Feb 2009, Jason Baron wrote: > > > > maybe...although that would require re-arranging the 'while' loop in > > 'handle_IRQ_event' from a do..while loop to a 'while' loop, which will > > require an extra branch check, and then we still have to record the last 'ret' > > value. I'm not that keen on re-arranging this for trace data... > > > > Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace > > as follows using the original two tracepoints (patch below): > > > > 3) | handle_IRQ_event() { > > 3) | /* (irq_handler_entry) irq=28 handler=eth0 */ > > 3) | e1000_intr_msi() { > > 3) 2.460 us | __napi_schedule(); > > 3) 9.416 us | } > > 3) | /* (irq_handler_exit) irq=28 handler=eth0 return=handled */ > > 3) + 22.935 us | } > > > > thanks, > > > > -Jason > > Looks good, one minor note: [...] > > +DEFINE_TRACE_FMT(irq_handler_entry, > > I've renamed DEFINE_TRACE_FMT to TRACE_FORMAT. I believe that > is already in Ingo's tree. > > Other than that: > > Acked-by: Steven Rostedt <srostedt@redhat.com> i've changed that in the patch and have applied Jason's patch to tip:tracing/ftrace. Thanks guys! Ingo ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-26 15:11 ` Jason Baron 2009-02-26 15:32 ` Steven Rostedt @ 2009-02-26 15:40 ` Peter Zijlstra 2009-02-26 16:20 ` Frederic Weisbecker 2009-02-27 3:35 ` KOSAKI Motohiro 3 siblings, 0 replies; 48+ messages in thread From: Peter Zijlstra @ 2009-02-26 15:40 UTC (permalink / raw) To: Jason Baron Cc: Mathieu Desnoyers, mingo, Masami Hiramatsu, KOSAKI Motohiro, Frank Ch. Eigler, rostedt, linux-kernel, acme, fweisbec On Thu, 2009-02-26 at 10:11 -0500, Jason Baron wrote: > > Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace > as follows using the original two tracepoints (patch below): > > 3) | handle_IRQ_event() { > 3) | /* (irq_handler_entry) irq=28 handler=eth0 */ > 3) | e1000_intr_msi() { > 3) 2.460 us | __napi_schedule(); > 3) 9.416 us | } > 3) | /* (irq_handler_exit) irq=28 handler=eth0 return=handled */ > 3) + 22.935 us | } > > thanks, > > -Jason > > > Signed-off-by: Jason Baron <jbaron@redhat.com> Looks good, thanks Jason! Acked-by: Peter Zijlstra <a.p.zijlstra@chello.nl> ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-26 15:11 ` Jason Baron 2009-02-26 15:32 ` Steven Rostedt 2009-02-26 15:40 ` Peter Zijlstra @ 2009-02-26 16:20 ` Frederic Weisbecker 2009-02-27 3:35 ` KOSAKI Motohiro 3 siblings, 0 replies; 48+ messages in thread From: Frederic Weisbecker @ 2009-02-26 16:20 UTC (permalink / raw) To: Jason Baron Cc: Mathieu Desnoyers, mingo, Masami Hiramatsu, KOSAKI Motohiro, Peter Zijlstra, Frank Ch. Eigler, rostedt, linux-kernel, acme On Thu, Feb 26, 2009 at 10:11:05AM -0500, Jason Baron wrote: > On Wed, Feb 25, 2009 at 12:34:12PM -0500, Mathieu Desnoyers wrote: > > * Jason Baron (jbaron@redhat.com) wrote: > > > On Wed, Feb 25, 2009 at 11:48:28AM -0500, Masami Hiramatsu wrote: > > > > KOSAKI Motohiro wrote: > > > > >> /** > > > > >> * handle_IRQ_event - irq action chain handler > > > > >> * @irq: the interrupt number > > > > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > > > > >> local_irq_enable_in_hardirq(); > > > > >> > > > > >> do { > > > > >> + trace_irq_entry(irq); > > > > >> ret = action->handler(irq, action->dev_id); > > > > >> + trace_irq_exit(irq, ret); > > > > >> if (ret == IRQ_HANDLED) > > > > >> status |= action->flags; > > > > >> retval |= ret; > > > > > > > > > > Nobdy want unnecessary redundant tracepoint. > > > > > Please discuss with mathieu, and merge his tracepoint. > > > > > > > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > > > > is enough. However, from the same viewpoint, it should pass irq-number > > > > to irq-exit event too, because we may lost some previous events by buffer-overflow > > > > etc. > > > > > > > > trace_irq_entry(irq, NULL); > > > > ret = _handle_IRQ_event(irq, action); > > > > trace_irq_exit(irq, ret); > > > > ^^^^ > > > > > > > > > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 > > > different places. So the above suggested irq tracepoint provides the > > > same information with 4 less tracepoints in the code. So I believe its > > > simpler - plus we can understand which action handlers are handling the > > > interrupt. > > > > > > > The main thing I dislike about only tracing action->handler() calls is > > that you are not tracing an IRQ per se, but rather the invocation of a > > given handler within the interrupt. For instance, it would be difficult > > to calculate the maximum interrupt latency for a given interrupt line, > > because you don't have the "real" irq entry/exit events, just the > > individual handler() calls. > > > > But I agree that knowing which handler is called is important. > > > > How about this compromise : > > > > trace_irq_entry(irq, action) > > _handle_IRQ_event() > > for each action { > > trace_irq_handler(action, ret); > > ret = action->handler(irq, action->dev_id); > > ... > > } > > trace_irq_exit(action_ret); > > > > Would that give you the information you need ? > > > > Here trace_irq_handler would be passed the _current_ action invoked and > > the _previous_ action return value. Note that we should initialize > > irqreturn_t ret to some initial value if we do this. That should keep > > the tracing overhead minimal. > > > > maybe...although that would require re-arranging the 'while' loop in > 'handle_IRQ_event' from a do..while loop to a 'while' loop, which will > require an extra branch check, and then we still have to record the last 'ret' > value. I'm not that keen on re-arranging this for trace data... > > Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace > as follows using the original two tracepoints (patch below): > > 3) | handle_IRQ_event() { > 3) | /* (irq_handler_entry) irq=28 handler=eth0 */ > 3) | e1000_intr_msi() { > 3) 2.460 us | __napi_schedule(); > 3) 9.416 us | } > 3) | /* (irq_handler_exit) irq=28 handler=eth0 return=handled */ > 3) + 22.935 us | } > > thanks, > > -Jason > I'm impressed by this new TRACE_FMT system. It means that I will just need to toggle a value on a /debug/trace/events/irq_stuff/enable to have the useful informations as comments inside a trace, or in a whole dedicated traces. I've played with it a part of the night to test the bprintk patch, this is awesome! ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-26 15:11 ` Jason Baron ` (2 preceding siblings ...) 2009-02-26 16:20 ` Frederic Weisbecker @ 2009-02-27 3:35 ` KOSAKI Motohiro 3 siblings, 0 replies; 48+ messages in thread From: KOSAKI Motohiro @ 2009-02-27 3:35 UTC (permalink / raw) To: Jason Baron Cc: kosaki.motohiro, Mathieu Desnoyers, mingo, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler, rostedt, linux-kernel, acme, fweisbec > Using Steve's new 'DEFINE_TRACE_FMT', I can get function graph trace > as follows using the original two tracepoints (patch below): > > 3) | handle_IRQ_event() { > 3) | /* (irq_handler_entry) irq=28 handler=eth0 */ > 3) | e1000_intr_msi() { > 3) 2.460 us | __napi_schedule(); > 3) 9.416 us | } > 3) | /* (irq_handler_exit) irq=28 handler=eth0 return=handled */ > 3) + 22.935 us | } > > thanks, > > -Jason OK. I think you prove this patch is valuable enough. very thanks! Acked-by: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> > > > Signed-off-by: Jason Baron <jbaron@redhat.com> > --- > > include/trace/irq.h | 9 +++++++++ > include/trace/irq_event_types.h | 17 +++++++++++++++++ > kernel/irq/handle.c | 6 ++++++ > kernel/trace/events.c | 2 ++ > 4 files changed, 34 insertions(+), 0 deletions(-) > create mode 100644 include/trace/irq.h > create mode 100644 include/trace/irq_event_types.h > > > diff --git a/include/trace/irq.h b/include/trace/irq.h > new file mode 100644 > index 0000000..ff5d449 > --- /dev/null > +++ b/include/trace/irq.h > @@ -0,0 +1,9 @@ > +#ifndef _TRACE_IRQ_H > +#define _TRACE_IRQ_H > + > +#include <linux/interrupt.h> > +#include <linux/tracepoint.h> > + > +#include <trace/irq_event_types.h> > + > +#endif > diff --git a/include/trace/irq_event_types.h b/include/trace/irq_event_types.h > new file mode 100644 > index 0000000..5889bfd > --- /dev/null > +++ b/include/trace/irq_event_types.h > @@ -0,0 +1,17 @@ > + > +/* use <trace/irq.h> instead */ > +#ifndef DEFINE_TRACE_FMT > +# error Do not include this file directly. > +# error Unless you know what you are doing. > +#endif > + > +DEFINE_TRACE_FMT(irq_handler_entry, > + TPPROTO(int irq, struct irqaction *action), > + TPARGS(irq, action), > + TPFMT("irq=%d handler=%s", irq, action->name)); > + > +DEFINE_TRACE_FMT(irq_handler_exit, > + TPPROTO(int irq, struct irqaction *action, int ret), > + TPARGS(irq, action, ret), > + TPFMT("irq=%d handler=%s return=%s", > + irq, action->name, ret ? "handled" : "unhandled")); > diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c > index fdff380..bcebc2e 100644 > --- a/kernel/irq/handle.c > +++ b/kernel/irq/handle.c > @@ -18,6 +18,7 @@ > #include <linux/rculist.h> > #include <linux/hash.h> > #include <linux/bootmem.h> > +#include <trace/irq.h> > > #include "internals.h" > > @@ -338,6 +339,9 @@ irqreturn_t no_action(int cpl, void *dev_id) > return IRQ_NONE; > } > > +DEFINE_TRACE(irq_handler_entry); > +DEFINE_TRACE(irq_handler_exit); > + > /** > * handle_IRQ_event - irq action chain handler > * @irq: the interrupt number > @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > local_irq_enable_in_hardirq(); > > do { > + trace_irq_handler_entry(irq, action); > ret = action->handler(irq, action->dev_id); > + trace_irq_handler_exit(irq, action, ret); > if (ret == IRQ_HANDLED) > status |= action->flags; > retval |= ret; > diff --git a/kernel/trace/events.c b/kernel/trace/events.c > index 38c89ee..3c75623 100644 > --- a/kernel/trace/events.c > +++ b/kernel/trace/events.c > @@ -6,8 +6,10 @@ > > /* trace/<type>.h here */ > #include <trace/sched.h> > +#include <trace/irq.h> > > #include "trace_events.h" > > /* trace/<type>_event_types.h here */ > #include <trace/sched_event_types.h> > +#include <trace/irq_event_types.h> > > > > > > ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 17:34 ` Mathieu Desnoyers 2009-02-25 18:05 ` Steven Rostedt 2009-02-26 15:11 ` Jason Baron @ 2009-02-27 3:33 ` KOSAKI Motohiro 2009-02-27 7:25 ` Peter Zijlstra 2 siblings, 1 reply; 48+ messages in thread From: KOSAKI Motohiro @ 2009-02-27 3:33 UTC (permalink / raw) To: Mathieu Desnoyers Cc: kosaki.motohiro, Jason Baron, Masami Hiramatsu, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec > > the lttng tracepoints wrap the calls to _handle_IRQ_event in 3 > > different places. So the above suggested irq tracepoint provides the > > same information with 4 less tracepoints in the code. So I believe its > > simpler - plus we can understand which action handlers are handling the > > interrupt. > > > > The main thing I dislike about only tracing action->handler() calls is > that you are not tracing an IRQ per se, but rather the invocation of a > given handler within the interrupt. For instance, it would be difficult > to calculate the maximum interrupt latency for a given interrupt line, > because you don't have the "real" irq entry/exit events, just the > individual handler() calls. I agree with IRQ latency tracing is very important. So now, We understand we have another two good requirement. Therefore, I can agree to Mathieu's separete trace point idea and Jason's current patch. Thanks! good discussion. > > But I agree that knowing which handler is called is important. > > How about this compromise : > > trace_irq_entry(irq, action) > _handle_IRQ_event() > for each action { > trace_irq_handler(action, ret); > ret = action->handler(irq, action->dev_id); > ... > } > trace_irq_exit(action_ret); > > Would that give you the information you need ? > > Here trace_irq_handler would be passed the _current_ action invoked and > the _previous_ action return value. Note that we should initialize > irqreturn_t ret to some initial value if we do this. That should keep > the tracing overhead minimal. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-27 3:33 ` KOSAKI Motohiro @ 2009-02-27 7:25 ` Peter Zijlstra 0 siblings, 0 replies; 48+ messages in thread From: Peter Zijlstra @ 2009-02-27 7:25 UTC (permalink / raw) To: KOSAKI Motohiro Cc: Mathieu Desnoyers, Jason Baron, Masami Hiramatsu, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec On Fri, 2009-02-27 at 12:33 +0900, KOSAKI Motohiro wrote: > > I agree with IRQ latency tracing is very important. Then use the irq-latency tracer, that thing is specifically created to measure and locate irq-off latency sources. We've been using it for years in the preempt-rt patch-set. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 16:48 ` Masami Hiramatsu 2009-02-25 16:57 ` Jason Baron @ 2009-02-25 16:58 ` Mathieu Desnoyers 2009-02-25 17:19 ` Masami Hiramatsu 1 sibling, 1 reply; 48+ messages in thread From: Mathieu Desnoyers @ 2009-02-25 16:58 UTC (permalink / raw) To: Masami Hiramatsu Cc: KOSAKI Motohiro, Jason Baron, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec * Masami Hiramatsu (mhiramat@redhat.com) wrote: > > > KOSAKI Motohiro wrote: > >> /** > >> * handle_IRQ_event - irq action chain handler > >> * @irq: the interrupt number > >> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > >> local_irq_enable_in_hardirq(); > >> > >> do { > >> + trace_irq_entry(irq); > >> ret = action->handler(irq, action->dev_id); > >> + trace_irq_exit(irq, ret); > >> if (ret == IRQ_HANDLED) > >> status |= action->flags; > >> retval |= ret; > > > > Nobdy want unnecessary redundant tracepoint. > > Please discuss with mathieu, and merge his tracepoint. > > Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > is enough. However, from the same viewpoint, it should pass irq-number > to irq-exit event too, because we may lost some previous events by buffer-overflow > etc. > > trace_irq_entry(irq, NULL); > ret = _handle_IRQ_event(irq, action); > trace_irq_exit(irq, ret); > ^^^^ > I seriously doubt we should consider a trace with missing events as "reliable". If your only argument is that when the buffers are not large enough we could lose events, then I think we should just hint people at doing the right thing, which is to tweak the tracer parameters (e.g. larger buffers) so they stop losing events. A trace with events lost is really a scenario close to a corrupted trace because we don't know which event has been lost, nor where. I don't think we should increase the event size to support that kind of broken scenario. Mathieu > Thank you, > > -- > Masami Hiramatsu > > Software Engineer > Hitachi Computer Products (America) Inc. > Software Solutions Division > > e-mail: mhiramat@redhat.com > -- Mathieu Desnoyers OpenPGP key fingerprint: 8CD5 52C3 8E3C 4140 715F BA06 3F25 A8FE 3BAE 9A68 ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 16:58 ` Mathieu Desnoyers @ 2009-02-25 17:19 ` Masami Hiramatsu 2009-02-27 3:08 ` KOSAKI Motohiro 0 siblings, 1 reply; 48+ messages in thread From: Masami Hiramatsu @ 2009-02-25 17:19 UTC (permalink / raw) To: Mathieu Desnoyers Cc: KOSAKI Motohiro, Jason Baron, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec Mathieu Desnoyers wrote: > * Masami Hiramatsu (mhiramat@redhat.com) wrote: >> >> KOSAKI Motohiro wrote: >>>> /** >>>> * handle_IRQ_event - irq action chain handler >>>> * @irq: the interrupt number >>>> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) >>>> local_irq_enable_in_hardirq(); >>>> >>>> do { >>>> + trace_irq_entry(irq); >>>> ret = action->handler(irq, action->dev_id); >>>> + trace_irq_exit(irq, ret); >>>> if (ret == IRQ_HANDLED) >>>> status |= action->flags; >>>> retval |= ret; >>> Nobdy want unnecessary redundant tracepoint. >>> Please discuss with mathieu, and merge his tracepoint. >> Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint >> is enough. However, from the same viewpoint, it should pass irq-number >> to irq-exit event too, because we may lost some previous events by buffer-overflow >> etc. >> >> trace_irq_entry(irq, NULL); >> ret = _handle_IRQ_event(irq, action); >> trace_irq_exit(irq, ret); >> ^^^^ >> > > I seriously doubt we should consider a trace with missing events as > "reliable". If your only argument is that when the buffers are not large > enough we could lose events, then I think we should just hint people at > doing the right thing, which is to tweak the tracer parameters (e.g. > larger buffers) so they stop losing events. Hi Mathieu, I think it depends on what events we'd like to trace on what environment (not have so much memories). Someone(like me) may want "reliability" for those events, someone not. > A trace with events lost is really a scenario close to a corrupted > trace because we don't know which event has been lost, nor where. I > don't think we should increase the event size to support that kind of > broken scenario. Why LTTng doesn't filter those unused arguments inside their handlers? Is there no room for compromise on sharing tracepoints with other tracers? IMHO, tracepoints in the kernel should be a common infrastructure for many users. Thank you, > > Mathieu > >> Thank you, >> >> -- >> Masami Hiramatsu >> >> Software Engineer >> Hitachi Computer Products (America) Inc. >> Software Solutions Division >> >> e-mail: mhiramat@redhat.com >> > -- Masami Hiramatsu Software Engineer Hitachi Computer Products (America) Inc. Software Solutions Division e-mail: mhiramat@redhat.com ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-25 17:19 ` Masami Hiramatsu @ 2009-02-27 3:08 ` KOSAKI Motohiro 0 siblings, 0 replies; 48+ messages in thread From: KOSAKI Motohiro @ 2009-02-27 3:08 UTC (permalink / raw) To: Masami Hiramatsu Cc: kosaki.motohiro, Mathieu Desnoyers, Jason Baron, Peter Zijlstra, Frank Ch. Eigler, mingo, rostedt, linux-kernel, acme, fweisbec > >>>> /** > >>>> * handle_IRQ_event - irq action chain handler > >>>> * @irq: the interrupt number > >>>> @@ -354,7 +358,9 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action) > >>>> local_irq_enable_in_hardirq(); > >>>> > >>>> do { > >>>> + trace_irq_entry(irq); > >>>> ret = action->handler(irq, action->dev_id); > >>>> + trace_irq_exit(irq, ret); > >>>> if (ret == IRQ_HANDLED) > >>>> status |= action->flags; > >>>> retval |= ret; > >>> Nobdy want unnecessary redundant tracepoint. > >>> Please discuss with mathieu, and merge his tracepoint. > >> Hmm, from the viewpoint of trouble shooting, the place of LTTng's tracepoint > >> is enough. However, from the same viewpoint, it should pass irq-number > >> to irq-exit event too, because we may lost some previous events by buffer-overflow > >> etc. > >> > >> trace_irq_entry(irq, NULL); > >> ret = _handle_IRQ_event(irq, action); > >> trace_irq_exit(irq, ret); > >> ^^^^ > >> > > > > I seriously doubt we should consider a trace with missing events as > > "reliable". If your only argument is that when the buffers are not large > > enough we could lose events, then I think we should just hint people at > > doing the right thing, which is to tweak the tracer parameters (e.g. > > larger buffers) so they stop losing events. > > Hi Mathieu, > > I think it depends on what events we'd like to trace on what environment > (not have so much memories). > Someone(like me) may want "reliability" for those events, someone not. I don't think this viewpoint is valueable. buffer overflow problem depend on the number of AVERAGE output from tracer. typically, one irq has only one irq handler. if this change makes buffer overflow, the tracer buffer management is too silly. Just change it. > > A trace with events lost is really a scenario close to a corrupted > > trace because we don't know which event has been lost, nor where. I > > don't think we should increase the event size to support that kind of > > broken scenario. > > Why LTTng doesn't filter those unused arguments inside their handlers? > Is there no room for compromise on sharing tracepoints with other > tracers? IMHO, tracepoints in the kernel should be a common > infrastructure for many users. Agreed. last mathieu's mail only talk about lttng issue. I can imazine systemtap folks need explicit irq number argument. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 21:46 ` Peter Zijlstra 2009-02-18 22:02 ` Frank Ch. Eigler @ 2009-02-18 23:34 ` Kyle McMartin 2009-02-19 2:13 ` Frederic Weisbecker 2 siblings, 0 replies; 48+ messages in thread From: Kyle McMartin @ 2009-02-18 23:34 UTC (permalink / raw) To: Peter Zijlstra Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fweisbec, fche, compudj On Wed, Feb 18, 2009 at 10:46:20PM +0100, Peter Zijlstra wrote: > Much of the other data is already available, /proc/interrupts will > happily tell you the source of your interrupt storm. The irq-off latency > tracer will tell you if stuff takes too much time, the graph tracer can > tell you what is taking how much time. > How so? It will only tell you which irq line you need to be looking at, not which handlers device is spewing the IRQs... Perhaps I'm missing something obvious, but that's what I've used a similar instrument for. regards, Kyle ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 21:46 ` Peter Zijlstra 2009-02-18 22:02 ` Frank Ch. Eigler 2009-02-18 23:34 ` Kyle McMartin @ 2009-02-19 2:13 ` Frederic Weisbecker 2 siblings, 0 replies; 48+ messages in thread From: Frederic Weisbecker @ 2009-02-19 2:13 UTC (permalink / raw) To: Peter Zijlstra Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fche, compudj On Wed, Feb 18, 2009 at 10:46:20PM +0100, Peter Zijlstra wrote: > On Wed, 2009-02-18 at 16:35 -0500, Jason Baron wrote: > > On Wed, Feb 18, 2009 at 10:15:25PM +0100, Peter Zijlstra wrote: > > > On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote: > > > > hi, > > > > > > > > Using a copule of tracepoints i've implemented an 'irq tracer' which has > > > > the following column format: > > > > > > > > [time][irq][action][handled][cpu][length] > > > > > > > > for example on my system: > > > > > > > > [ 1038.927248381] 28 eth0 1 3 0.000002745 > > > > [ 1038.927544688] 28 eth0 1 3 0.000002520 > > > > [ 1038.927593609] 28 eth0 1 3 0.000002509 > > > > [ 1038.974087675] 29 ahci 1 1 0.000013135 > > > > [ 1038.974288475] 29 ahci 1 1 0.000005542 > > > > [ 1038.974414324] 29 ahci 1 1 0.000007953 > > > > > > > > I think this patch is useful in understanding irq system behavior, and > > > > for tracking down irq driver handlers that are firing too often or > > > > spending too much time handling an irq. This approach could be extended > > > > to the timer irq and for softirqs... > > > > > > The function graph tracer can already do this, it has a special section > > > to recognize irq entry function, and already measures time spend. > > > > > > I guess with Steve's new triggers (probes or whatever they ended up > > > being called) you could limit it to just IRQ entry points. > > > > > > > i did look at the graph tracer first. While it does measure irq related > > functions, it does not give this level of detail concerning which irq #, > > which irq handler is involved, and whether or not the irq was handled > > successfully or not. So I believe this tracepoints add a level of detail > > that the graph tracer does not have. Furthermore, this patch requires 2 > > tracepoints, not instrumentation for all kernel functions. > > How useful is that return value? > > Much of the other data is already available, /proc/interrupts will > happily tell you the source of your interrupt storm. The irq-off latency > tracer will tell you if stuff takes too much time, the graph tracer can > tell you what is taking how much time. > > I really am having a difficult time seeing the use in such narrow > tracers. > Well, something that could be done would be to use this irq tracing framework to produce the average of time spent in each irq handler. kernel/trace/trace_stat.c gives some infrastructure for that. BTW, I have the project to build a function hashlist, based on the function list provided by dynamic ftrace, to store the average of time spent for each traced function, using the function graph tracer. ^ permalink raw reply [flat|nested] 48+ messages in thread
* Re: [PATCH] new irq tracer 2009-02-18 21:15 ` Peter Zijlstra 2009-02-18 21:35 ` Jason Baron @ 2009-02-19 1:46 ` Frederic Weisbecker 1 sibling, 0 replies; 48+ messages in thread From: Frederic Weisbecker @ 2009-02-19 1:46 UTC (permalink / raw) To: Peter Zijlstra Cc: Jason Baron, mingo, rostedt, linux-kernel, acme, fche, compudj On Wed, Feb 18, 2009 at 10:15:25PM +0100, Peter Zijlstra wrote: > On Wed, 2009-02-18 at 14:53 -0500, Jason Baron wrote: > > hi, > > > > Using a copule of tracepoints i've implemented an 'irq tracer' which has > > the following column format: > > > > [time][irq][action][handled][cpu][length] > > > > for example on my system: > > > > [ 1038.927248381] 28 eth0 1 3 0.000002745 > > [ 1038.927544688] 28 eth0 1 3 0.000002520 > > [ 1038.927593609] 28 eth0 1 3 0.000002509 > > [ 1038.974087675] 29 ahci 1 1 0.000013135 > > [ 1038.974288475] 29 ahci 1 1 0.000005542 > > [ 1038.974414324] 29 ahci 1 1 0.000007953 > > > > I think this patch is useful in understanding irq system behavior, and > > for tracking down irq driver handlers that are firing too often or > > spending too much time handling an irq. This approach could be extended > > to the timer irq and for softirqs... > > The function graph tracer can already do this, it has a special section > to recognize irq entry function, and already measures time spend. Right, except that it doesn't trace the irq number, action and the return value, though the latter will be handled one day (probably soon). > I guess with Steve's new triggers (probes or whatever they ended up > being called) you could limit it to just IRQ entry points. We already could trace only irq function call tree through set_graph_function: echo smp_apic_timer_interrupt > set_graph_function echo do_IRQ >> set_graph_function But you will see the whole call graph: 321.913057 | 1) | do_IRQ() { 321.913058 | 1) 0.571 us | exit_idle(); 321.913059 | 1) | irq_enter() { 321.913059 | 1) 0.548 us | rcu_irq_enter(); 321.913060 | 1) 0.533 us | idle_cpu(); 321.913064 | 1) 2.696 us | } 321.913065 | 1) | handle_irq() { 321.913065 | 1) 0.533 us | irq_to_desc(); 321.913067 | 1) | handle_edge_irq() { 321.913067 | 1) 0.653 us | _spin_lock(); 321.913068 | 1) | ack_apic_edge() { 321.913069 | 1) 0.533 us | irq_to_desc(); 321.913070 | 1) | move_native_irq() { 321.913070 | 1) 0.525 us | irq_to_desc(); 321.913072 | 1) 1.577 us | } 321.913072 | 1) 0.563 us | native_apic_mem_write(); 321.913073 | 1) 4.813 us | } 321.913074 | 1) 0.601 us | _spin_unlock(); 321.913075 | 1) | handle_IRQ_event() { 321.913075 | 1) | timer_interrupt() { 321.913076 | 1) | tick_handle_oneshot_broadcast() { 321.913077 | 1) 0.631 us | _spin_lock(); 321.913078 | 1) | ktime_get() { 321.913078 | 1) | ktime_get_ts() { 321.913079 | 1) | getnstimeofday() { 321.913079 | 1) 1.735 us | read_hpet(); 321.913082 | 1) 2.854 us | } 321.913082 | 1) 0.534 us | set_normalized_timespec(); 321.913083 | 1) 4.963 us | } 321.913084 | 1) 6.015 us | } 321.913084 | 1) | tick_do_broadcast() { 321.913085 | 1) | lapic_timer_broadcast() { 321.913086 | 1) 0.624 us | flat_send_IPI_mask(); 321.913087 | 1) 1.697 us | } 321.913087 | 1) 2.944 us | } 321.913088 | 1) 0.585 us | _spin_unlock(); 321.913089 | 1) + 12.892 us | } 321.913089 | 1) + 14.079 us | } 321.913090 | 1) + 15.176 us | } 321.913091 | 1) 0.540 us | note_interrupt(); 321.913092 | 1) 0.653 us | _spin_lock(); 321.913093 | 1) 0.586 us | _spin_unlock(); 321.913094 | 1) + 27.354 us | } 321.913094 | 1) + 29.547 us | } 321.913095 | 1) | irq_exit() { 321.913096 | 1) 0.526 us | rcu_irq_exit(); 321.913097 | 1) 0.526 us | idle_cpu(); 321.913098 | 1) 2.674 us | } 321.913098 | 1) + 41.096 us | } 321.913098 | 1) <========== | Or you can only trace do_IRQ and smp_apic_timer_interrupt alone without tracing their call tree: echo smp_apic_timer_interrupt > set_ftrace_filter echo do_IRQ >> set_ftrace_filter ^ permalink raw reply [flat|nested] 48+ messages in thread
end of thread, other threads:[~2009-02-27 14:43 UTC | newest] Thread overview: 48+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2009-02-18 19:53 [PATCH] new irq tracer Jason Baron 2009-02-18 20:08 ` Steven Rostedt 2009-02-18 20:26 ` Mathieu Desnoyers 2009-02-19 1:42 ` KOSAKI Motohiro 2009-02-18 20:30 ` Kyle McMartin 2009-02-18 21:15 ` Peter Zijlstra 2009-02-18 21:35 ` Jason Baron 2009-02-18 21:46 ` Peter Zijlstra 2009-02-18 22:02 ` Frank Ch. Eigler 2009-02-18 22:10 ` Peter Zijlstra 2009-02-18 22:23 ` Frank Ch. Eigler 2009-02-18 23:21 ` Peter Zijlstra 2009-02-20 19:52 ` Jason Baron 2009-02-21 3:39 ` Frederic Weisbecker 2009-02-22 3:38 ` KOSAKI Motohiro 2009-02-25 16:48 ` Masami Hiramatsu 2009-02-25 16:57 ` Jason Baron 2009-02-25 17:34 ` Mathieu Desnoyers 2009-02-25 18:05 ` Steven Rostedt 2009-02-25 22:12 ` Mathieu Desnoyers 2009-02-25 22:20 ` Frederic Weisbecker 2009-02-25 23:13 ` Mathieu Desnoyers 2009-02-26 1:41 ` Steven Rostedt 2009-02-26 12:37 ` Dominique Toupin 2009-02-27 3:14 ` KOSAKI Motohiro 2009-02-27 3:29 ` Steven Rostedt 2009-02-27 3:36 ` Steven Rostedt 2009-02-27 7:48 ` KOSAKI Motohiro 2009-02-27 8:06 ` Peter Zijlstra 2009-02-27 8:13 ` KOSAKI Motohiro 2009-02-27 13:10 ` Arnaldo Carvalho de Melo 2009-02-27 14:43 ` Steven Rostedt 2009-02-27 7:23 ` Peter Zijlstra 2009-02-25 22:21 ` Steven Rostedt 2009-02-26 15:11 ` Jason Baron 2009-02-26 15:32 ` Steven Rostedt 2009-02-26 15:35 ` Ingo Molnar 2009-02-26 15:40 ` Peter Zijlstra 2009-02-26 16:20 ` Frederic Weisbecker 2009-02-27 3:35 ` KOSAKI Motohiro 2009-02-27 3:33 ` KOSAKI Motohiro 2009-02-27 7:25 ` Peter Zijlstra 2009-02-25 16:58 ` Mathieu Desnoyers 2009-02-25 17:19 ` Masami Hiramatsu 2009-02-27 3:08 ` KOSAKI Motohiro 2009-02-18 23:34 ` Kyle McMartin 2009-02-19 2:13 ` Frederic Weisbecker 2009-02-19 1:46 ` Frederic Weisbecker
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox