* [Xenomai-core] [PATCH, RFC] Instrumentation to detect delayed timers
@ 2010-06-08 14:21 Wolfgang Mauerer
2010-06-08 15:11 ` Philippe Gerum
0 siblings, 1 reply; 4+ messages in thread
From: Wolfgang Mauerer @ 2010-06-08 14:21 UTC (permalink / raw)
To: xenomai@xenomai.org
Hi,
the attached patch augments the timer expiration handler with some checks
for excessive timer latencies. Depending on the configuration options, it
freezes the ipipe tracer and creates a list of delayed timers.
This code was useful for us in debugging some problems with faulty hardware.
Would such a thing be suitable for mainline?
Regards, Wolfgang
---
commit 9c2a3908ac4aff21022a577552002fc2622d3670
Author: Wolfgang Mauerer <wolfgang.mauerer@domain.hid>
Date: Mon Jun 7 15:34:39 2010 +0200
This patch provides the possibility to check upon timer expiration
how much a timer has been delayed. When the delay surpasses a configurable
threshold, the ipipe tracer is frozen. Additionally, the patch allows
for the creation of a list of timers that have suffered from excessive
delays.
The mechanism can be disabled at both, compile- and runtime. There is no
overhead in the former case.
Signed-off-by: Wolfgang Mauerer <wolfgang.mauerer@domain.hid>
diff --git a/ksrc/nucleus/Kconfig b/ksrc/nucleus/Kconfig
index 211a4ad..ffed3f8 100644
--- a/ksrc/nucleus/Kconfig
+++ b/ksrc/nucleus/Kconfig
@@ -293,6 +293,24 @@ config XENO_OPT_DEBUG_TIMERS
This option activates debugging output for critical
timer-related operations performed by the Xenomai core.
+config XENO_OPT_DEBUG_TIMER_LATENCY
+ bool "Log excessive timer latencies"
+ depends on XENO_OPT_DEBUG && (XENO_OPT_STATS || IPIPE_TRACE)
+ help
+
+ This option activates recording of excessive timer latencies.
+ In contrast to the userland latency checker which is statistical
+ in nature, this test checks every expiring timer and is thus
+ more precise. However, it adds some runtime overhead when
+ activated.
+
+ When the ipipe tracer is enabled, the trace is frozen
+ when a latency maximum above the threshold set in
+ the debugfs file timer_lattest/lat_threshold_us is detected.
+ When Xenomai statistics collection is enabled, a list of
+ excessive latencies together with the names of the timers
+ is kept in the debugfs file timer_lattest/exc_timer_list.
+
config XENO_OPT_DEBUG_SYNCH_RELAX
bool "Detect mutexes held in relaxed sections"
depends on XENO_OPT_PERVASIVE && XENO_OPT_DEBUG
diff --git a/ksrc/nucleus/timer.c b/ksrc/nucleus/timer.c
index d813c4f..01dad44 100644
--- a/ksrc/nucleus/timer.c
+++ b/ksrc/nucleus/timer.c
@@ -36,11 +36,37 @@
*
*@{*/
+#include <linux/debugfs.h>
#include <nucleus/pod.h>
#include <nucleus/thread.h>
#include <nucleus/timer.h>
+#include <nucleus/trace.h>
#include <asm/xenomai/bits/timer.h>
+#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY
+#define NANO 1000000000ll
+#define MU 1000000ll
+#define NANO_PER_MU (NANO/MU)
+
+static u32 lat_thresh = 0; /* in \mu s, set to zero to disable */
+
+#ifdef CONFIG_XENO_OPT_STATS
+#include <linux/seq_file.h>
+
+#define NUM_REC_LATENCIES 500
+static int num_lat_exc_timers = -1;
+static bool lat_rec_overflow = false; /* Set when more than NUM_REC_LATENCIES
+ * excessive latencies have been
+ * observed */
+struct lat_rec {
+ char name[XNOBJECT_NAME_LEN];
+ xnsticks_t delta;
+};
+
+static struct lat_rec delayed_timers[NUM_REC_LATENCIES];
+#endif // CONFIG_XENO_OPT_STATS
+#endif // XENO_OPT_DEBUG_TIMER_LATENCY
+
static inline void xntimer_enqueue_aperiodic(xntimer_t *timer)
{
xntimerq_t *q = &timer->sched->timerqueue;
@@ -368,6 +394,32 @@ void xntimer_tick_aperiodic(void)
if (delta > (xnsticks_t)nklatency)
break;
+#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY
+ /*
+ * Check for severly delayed timers, but omit the host tick
+ * timer as it can be delayed intentionally, thus causing
+ * spurious excessive latencies (see
+ * xntimer_next_local_shot()).
+ */
+ if (lat_thresh && (timer != &sched->htimer)) {
+ delta = xnarch_tsc_to_ns(-delta);
+ if (delta > ((xnsticks_t)lat_thresh)*NANO_PER_MU) {
+ xnarch_trace_user_freeze(delta, 1);
+#ifdef CONFIG_XENO_OPT_STATS
+ if (num_lat_exc_timers < NUM_REC_LATENCIES - 1) {
+ num_lat_exc_timers++;
+ } else {
+ lat_rec_overflow = true;
+ num_lat_exc_timers = 0;
+ }
+ xnobject_copy_name(delayed_timers[num_lat_exc_timers].name,
+ timer->name);
+ delayed_timers[num_lat_exc_timers].delta = delta;
+#endif // XENO_OPT_STATS
+ }
+ }
+#endif // XENO_OPT_DEBUG_TIMER_LATENCY
+
trace_mark(xn_nucleus, timer_expire, "timer %p", timer);
xntimer_dequeue_aperiodic(timer);
@@ -1147,10 +1199,70 @@ static int timer_read_proc(char *page,
return len;
}
+#ifdef CONFIG_XENO_OPT_STATS
+static int lat_exc_seq_show(struct seq_file *f, void *v)
+{
+ int count, max;
+
+ if (num_lat_exc_timers < 0) {
+ seq_printf(f, "(No excessive timer latencies detected)\n");
+ return 0;
+ }
+
+ if (lat_rec_overflow)
+ max = NUM_REC_LATENCIES;
+ else
+ max = num_lat_exc_timers;
+
+ for (count = 0; count < max; count++) {
+ seq_printf(f, "Timer '%s', delay %lld ns\n",
+ delayed_timers[count].name,
+ delayed_timers[count].delta);
+ }
+
+ if (lat_rec_overflow) {
+ seq_printf(f, "WARNING: Latency tracking array overflow detected.\n");
+ seq_printf(f, " Not all excessive latencies are shown.\n");
+
+ }
+ return 0;
+}
+
+static int lat_exc_seq_open(struct inode *inode, struct file *file)
+{
+ return single_open(file, lat_exc_seq_show, NULL);
+}
+
+static struct file_operations lat_exc_fops = {
+ .owner = THIS_MODULE,
+ .open = lat_exc_seq_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+};
+#endif // CONFIG_XENO_OPT_STATS
+
void xntimer_init_proc(void)
{
rthal_add_proc_leaf("timer", &timer_read_proc, NULL, NULL,
rthal_proc_root);
+
+#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY
+ struct dentry *debugfs_dir;
+
+ debugfs_dir = debugfs_create_dir("timer_lattest", NULL);
+
+ if (debugfs_dir) {
+ if (!debugfs_create_u32("lat_threshold_us", 0666,
+ debugfs_dir, &lat_thresh))
+ panic("Could not create lat_threshold_us");
+
+#ifdef CONFIG_XENO_OPT_STATS
+ if (!debugfs_create_file("exc_timer_list", 0600,
+ debugfs_dir, NULL, &lat_exc_fops))
+ panic("Could not create exc_timer_list");
+#endif // XENO_OPT_STATS
+ }
+#endif // XENO_OPT_DEBUG_TIMER_LATENCY
}
void xntimer_cleanup_proc(void)
^ permalink raw reply related [flat|nested] 4+ messages in thread* Re: [Xenomai-core] [PATCH, RFC] Instrumentation to detect delayed timers 2010-06-08 14:21 [Xenomai-core] [PATCH, RFC] Instrumentation to detect delayed timers Wolfgang Mauerer @ 2010-06-08 15:11 ` Philippe Gerum 2010-06-08 16:22 ` Wolfgang Mauerer 0 siblings, 1 reply; 4+ messages in thread From: Philippe Gerum @ 2010-06-08 15:11 UTC (permalink / raw) To: Wolfgang Mauerer; +Cc: xenomai@xenomai.org On Tue, 2010-06-08 at 16:21 +0200, Wolfgang Mauerer wrote: > Hi, > > the attached patch augments the timer expiration handler with some checks > for excessive timer latencies. Depending on the configuration options, it > freezes the ipipe tracer and creates a list of delayed timers. > > This code was useful for us in debugging some problems with faulty hardware. > Would such a thing be suitable for mainline? No, I don't think so, albeit the debug feature it brings in makes sense. - the code, as is, adds significant cruft to the timer code, which is per se a problem. Debug is ok, but I really would like that code to have a much lower impact source-wise. - dependency on debugfs is wrong; we still have to take care of legacy 2.4 kernels. I know, this is a real PITA, and I'm more than willing to get rid of that "heritage" in 3.x, but for now, 2.x still requires to be 2.4-compatible. - but, beyond this, this is something that should go into the I-pipe tracer instead, exporting a new dedicated interface to log a high latency event. There, you could use whatever fits to implement it, since there is by definition no consideration for compat with legacy kernels. I'm thinking of having ipipe_trace_latency(unsigned long delta, const char *source). That would be reusable in other, non-Xenomai contexts as well. Additionally, the latency spots could appear as events in the tracer log as well, which may help debugging them a lot, since you would know about the actual context. > > Regards, Wolfgang > > --- > commit 9c2a3908ac4aff21022a577552002fc2622d3670 > Author: Wolfgang Mauerer <wolfgang.mauerer@domain.hid> > Date: Mon Jun 7 15:34:39 2010 +0200 > > This patch provides the possibility to check upon timer expiration > how much a timer has been delayed. When the delay surpasses a configurable > threshold, the ipipe tracer is frozen. Additionally, the patch allows > for the creation of a list of timers that have suffered from excessive > delays. > > The mechanism can be disabled at both, compile- and runtime. There is no > overhead in the former case. > > Signed-off-by: Wolfgang Mauerer <wolfgang.mauerer@domain.hid> > > diff --git a/ksrc/nucleus/Kconfig b/ksrc/nucleus/Kconfig > index 211a4ad..ffed3f8 100644 > --- a/ksrc/nucleus/Kconfig > +++ b/ksrc/nucleus/Kconfig > @@ -293,6 +293,24 @@ config XENO_OPT_DEBUG_TIMERS > This option activates debugging output for critical > timer-related operations performed by the Xenomai core. > > +config XENO_OPT_DEBUG_TIMER_LATENCY > + bool "Log excessive timer latencies" > + depends on XENO_OPT_DEBUG && (XENO_OPT_STATS || IPIPE_TRACE) > + help > + > + This option activates recording of excessive timer latencies. > + In contrast to the userland latency checker which is statistical > + in nature, this test checks every expiring timer and is thus > + more precise. However, it adds some runtime overhead when > + activated. > + > + When the ipipe tracer is enabled, the trace is frozen > + when a latency maximum above the threshold set in > + the debugfs file timer_lattest/lat_threshold_us is detected. > + When Xenomai statistics collection is enabled, a list of > + excessive latencies together with the names of the timers > + is kept in the debugfs file timer_lattest/exc_timer_list. > + > config XENO_OPT_DEBUG_SYNCH_RELAX > bool "Detect mutexes held in relaxed sections" > depends on XENO_OPT_PERVASIVE && XENO_OPT_DEBUG > diff --git a/ksrc/nucleus/timer.c b/ksrc/nucleus/timer.c > index d813c4f..01dad44 100644 > --- a/ksrc/nucleus/timer.c > +++ b/ksrc/nucleus/timer.c > @@ -36,11 +36,37 @@ > * > *@{*/ > > +#include <linux/debugfs.h> > #include <nucleus/pod.h> > #include <nucleus/thread.h> > #include <nucleus/timer.h> > +#include <nucleus/trace.h> > #include <asm/xenomai/bits/timer.h> > > +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY > +#define NANO 1000000000ll > +#define MU 1000000ll > +#define NANO_PER_MU (NANO/MU) > + > +static u32 lat_thresh = 0; /* in \mu s, set to zero to disable */ > + > +#ifdef CONFIG_XENO_OPT_STATS > +#include <linux/seq_file.h> > + > +#define NUM_REC_LATENCIES 500 > +static int num_lat_exc_timers = -1; > +static bool lat_rec_overflow = false; /* Set when more than NUM_REC_LATENCIES > + * excessive latencies have been > + * observed */ > +struct lat_rec { > + char name[XNOBJECT_NAME_LEN]; > + xnsticks_t delta; > +}; > + > +static struct lat_rec delayed_timers[NUM_REC_LATENCIES]; > +#endif // CONFIG_XENO_OPT_STATS > +#endif // XENO_OPT_DEBUG_TIMER_LATENCY > + > static inline void xntimer_enqueue_aperiodic(xntimer_t *timer) > { > xntimerq_t *q = &timer->sched->timerqueue; > @@ -368,6 +394,32 @@ void xntimer_tick_aperiodic(void) > if (delta > (xnsticks_t)nklatency) > break; > > +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY > + /* > + * Check for severly delayed timers, but omit the host tick > + * timer as it can be delayed intentionally, thus causing > + * spurious excessive latencies (see > + * xntimer_next_local_shot()). > + */ > + if (lat_thresh && (timer != &sched->htimer)) { > + delta = xnarch_tsc_to_ns(-delta); > + if (delta > ((xnsticks_t)lat_thresh)*NANO_PER_MU) { > + xnarch_trace_user_freeze(delta, 1); > +#ifdef CONFIG_XENO_OPT_STATS > + if (num_lat_exc_timers < NUM_REC_LATENCIES - 1) { > + num_lat_exc_timers++; > + } else { > + lat_rec_overflow = true; > + num_lat_exc_timers = 0; > + } > + xnobject_copy_name(delayed_timers[num_lat_exc_timers].name, > + timer->name); > + delayed_timers[num_lat_exc_timers].delta = delta; > +#endif // XENO_OPT_STATS > + } > + } > +#endif // XENO_OPT_DEBUG_TIMER_LATENCY > + > trace_mark(xn_nucleus, timer_expire, "timer %p", timer); > > xntimer_dequeue_aperiodic(timer); > @@ -1147,10 +1199,70 @@ static int timer_read_proc(char *page, > return len; > } > > +#ifdef CONFIG_XENO_OPT_STATS > +static int lat_exc_seq_show(struct seq_file *f, void *v) > +{ > + int count, max; > + > + if (num_lat_exc_timers < 0) { > + seq_printf(f, "(No excessive timer latencies detected)\n"); > + return 0; > + } > + > + if (lat_rec_overflow) > + max = NUM_REC_LATENCIES; > + else > + max = num_lat_exc_timers; > + > + for (count = 0; count < max; count++) { > + seq_printf(f, "Timer '%s', delay %lld ns\n", > + delayed_timers[count].name, > + delayed_timers[count].delta); > + } > + > + if (lat_rec_overflow) { > + seq_printf(f, "WARNING: Latency tracking array overflow detected.\n"); > + seq_printf(f, " Not all excessive latencies are shown.\n"); > + > + } > + return 0; > +} > + > +static int lat_exc_seq_open(struct inode *inode, struct file *file) > +{ > + return single_open(file, lat_exc_seq_show, NULL); > +} > + > +static struct file_operations lat_exc_fops = { > + .owner = THIS_MODULE, > + .open = lat_exc_seq_open, > + .read = seq_read, > + .llseek = seq_lseek, > +}; > +#endif // CONFIG_XENO_OPT_STATS > + > void xntimer_init_proc(void) > { > rthal_add_proc_leaf("timer", &timer_read_proc, NULL, NULL, > rthal_proc_root); > + > +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY > + struct dentry *debugfs_dir; > + > + debugfs_dir = debugfs_create_dir("timer_lattest", NULL); > + > + if (debugfs_dir) { > + if (!debugfs_create_u32("lat_threshold_us", 0666, > + debugfs_dir, &lat_thresh)) > + panic("Could not create lat_threshold_us"); > + > +#ifdef CONFIG_XENO_OPT_STATS > + if (!debugfs_create_file("exc_timer_list", 0600, > + debugfs_dir, NULL, &lat_exc_fops)) > + panic("Could not create exc_timer_list"); > +#endif // XENO_OPT_STATS > + } > +#endif // XENO_OPT_DEBUG_TIMER_LATENCY > } > > void xntimer_cleanup_proc(void) > > _______________________________________________ > Xenomai-core mailing list > Xenomai-core@domain.hid > https://mail.gna.org/listinfo/xenomai-core -- Philippe. ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Xenomai-core] [PATCH, RFC] Instrumentation to detect delayed timers 2010-06-08 15:11 ` Philippe Gerum @ 2010-06-08 16:22 ` Wolfgang Mauerer 2010-06-08 16:48 ` Philippe Gerum 0 siblings, 1 reply; 4+ messages in thread From: Wolfgang Mauerer @ 2010-06-08 16:22 UTC (permalink / raw) To: Philippe Gerum; +Cc: xenomai@xenomai.org Philippe Gerum wrote: > On Tue, 2010-06-08 at 16:21 +0200, Wolfgang Mauerer wrote: >> Hi, >> >> the attached patch augments the timer expiration handler with some checks >> for excessive timer latencies. Depending on the configuration options, it >> freezes the ipipe tracer and creates a list of delayed timers. >> >> This code was useful for us in debugging some problems with faulty hardware. >> Would such a thing be suitable for mainline? > > No, I don't think so, albeit the debug feature it brings in makes sense. > > - the code, as is, adds significant cruft to the timer code, which is > per se a problem. Debug is ok, but I really would like that code to have > a much lower impact source-wise. so something along the lines of xntimer_enqueue_aperiodic(...) { ... large_latency_hook_whatever(args); ... } where large_latency_hook_whatever() is #ifdef'd to nop if not compile-time configured would be acceptable I suppose? > - dependency on debugfs is wrong; we still have to take care of legacy > 2.4 kernels. I know, this is a real PITA, and I'm more than willing to > get rid of that "heritage" in 3.x, but for now, 2.x still requires to be > 2.4-compatible. oh, yeah. procfs would be a replacement, but using debugfs via ipipe is much nicer since this is definitely debugging stuff. > - but, beyond this, this is something that should go into the I-pipe > tracer instead, exporting a new dedicated interface to log a high > latency event. There, you could use whatever fits to implement it, since > there is by definition no consideration for compat with legacy kernels. > I'm thinking of having ipipe_trace_latency(unsigned long delta, const > char *source). That would be reusable in other, non-Xenomai contexts as > well. Sounds reasonable. Would it make sense to augment ipipe_trace_latency() with an additional type argument to distinguish different latency sources, or is this overkill? Best, Wolfgang > > Additionally, the latency spots could appear as events in the tracer log > as well, which may help debugging them a lot, since you would know about > the actual context. > >> Regards, Wolfgang >> >> --- >> commit 9c2a3908ac4aff21022a577552002fc2622d3670 >> Author: Wolfgang Mauerer <wolfgang.mauerer@domain.hid> >> Date: Mon Jun 7 15:34:39 2010 +0200 >> >> This patch provides the possibility to check upon timer expiration >> how much a timer has been delayed. When the delay surpasses a configurable >> threshold, the ipipe tracer is frozen. Additionally, the patch allows >> for the creation of a list of timers that have suffered from excessive >> delays. >> >> The mechanism can be disabled at both, compile- and runtime. There is no >> overhead in the former case. >> >> Signed-off-by: Wolfgang Mauerer <wolfgang.mauerer@domain.hid> >> >> diff --git a/ksrc/nucleus/Kconfig b/ksrc/nucleus/Kconfig >> index 211a4ad..ffed3f8 100644 >> --- a/ksrc/nucleus/Kconfig >> +++ b/ksrc/nucleus/Kconfig >> @@ -293,6 +293,24 @@ config XENO_OPT_DEBUG_TIMERS >> This option activates debugging output for critical >> timer-related operations performed by the Xenomai core. >> >> +config XENO_OPT_DEBUG_TIMER_LATENCY >> + bool "Log excessive timer latencies" >> + depends on XENO_OPT_DEBUG && (XENO_OPT_STATS || IPIPE_TRACE) >> + help >> + >> + This option activates recording of excessive timer latencies. >> + In contrast to the userland latency checker which is statistical >> + in nature, this test checks every expiring timer and is thus >> + more precise. However, it adds some runtime overhead when >> + activated. >> + >> + When the ipipe tracer is enabled, the trace is frozen >> + when a latency maximum above the threshold set in >> + the debugfs file timer_lattest/lat_threshold_us is detected. >> + When Xenomai statistics collection is enabled, a list of >> + excessive latencies together with the names of the timers >> + is kept in the debugfs file timer_lattest/exc_timer_list. >> + >> config XENO_OPT_DEBUG_SYNCH_RELAX >> bool "Detect mutexes held in relaxed sections" >> depends on XENO_OPT_PERVASIVE && XENO_OPT_DEBUG >> diff --git a/ksrc/nucleus/timer.c b/ksrc/nucleus/timer.c >> index d813c4f..01dad44 100644 >> --- a/ksrc/nucleus/timer.c >> +++ b/ksrc/nucleus/timer.c >> @@ -36,11 +36,37 @@ >> * >> *@{*/ >> >> +#include <linux/debugfs.h> >> #include <nucleus/pod.h> >> #include <nucleus/thread.h> >> #include <nucleus/timer.h> >> +#include <nucleus/trace.h> >> #include <asm/xenomai/bits/timer.h> >> >> +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY >> +#define NANO 1000000000ll >> +#define MU 1000000ll >> +#define NANO_PER_MU (NANO/MU) >> + >> +static u32 lat_thresh = 0; /* in \mu s, set to zero to disable */ >> + >> +#ifdef CONFIG_XENO_OPT_STATS >> +#include <linux/seq_file.h> >> + >> +#define NUM_REC_LATENCIES 500 >> +static int num_lat_exc_timers = -1; >> +static bool lat_rec_overflow = false; /* Set when more than NUM_REC_LATENCIES >> + * excessive latencies have been >> + * observed */ >> +struct lat_rec { >> + char name[XNOBJECT_NAME_LEN]; >> + xnsticks_t delta; >> +}; >> + >> +static struct lat_rec delayed_timers[NUM_REC_LATENCIES]; >> +#endif // CONFIG_XENO_OPT_STATS >> +#endif // XENO_OPT_DEBUG_TIMER_LATENCY >> + >> static inline void xntimer_enqueue_aperiodic(xntimer_t *timer) >> { >> xntimerq_t *q = &timer->sched->timerqueue; >> @@ -368,6 +394,32 @@ void xntimer_tick_aperiodic(void) >> if (delta > (xnsticks_t)nklatency) >> break; >> >> +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY >> + /* >> + * Check for severly delayed timers, but omit the host tick >> + * timer as it can be delayed intentionally, thus causing >> + * spurious excessive latencies (see >> + * xntimer_next_local_shot()). >> + */ >> + if (lat_thresh && (timer != &sched->htimer)) { >> + delta = xnarch_tsc_to_ns(-delta); >> + if (delta > ((xnsticks_t)lat_thresh)*NANO_PER_MU) { >> + xnarch_trace_user_freeze(delta, 1); >> +#ifdef CONFIG_XENO_OPT_STATS >> + if (num_lat_exc_timers < NUM_REC_LATENCIES - 1) { >> + num_lat_exc_timers++; >> + } else { >> + lat_rec_overflow = true; >> + num_lat_exc_timers = 0; >> + } >> + xnobject_copy_name(delayed_timers[num_lat_exc_timers].name, >> + timer->name); >> + delayed_timers[num_lat_exc_timers].delta = delta; >> +#endif // XENO_OPT_STATS >> + } >> + } >> +#endif // XENO_OPT_DEBUG_TIMER_LATENCY >> + >> trace_mark(xn_nucleus, timer_expire, "timer %p", timer); >> >> xntimer_dequeue_aperiodic(timer); >> @@ -1147,10 +1199,70 @@ static int timer_read_proc(char *page, >> return len; >> } >> >> +#ifdef CONFIG_XENO_OPT_STATS >> +static int lat_exc_seq_show(struct seq_file *f, void *v) >> +{ >> + int count, max; >> + >> + if (num_lat_exc_timers < 0) { >> + seq_printf(f, "(No excessive timer latencies detected)\n"); >> + return 0; >> + } >> + >> + if (lat_rec_overflow) >> + max = NUM_REC_LATENCIES; >> + else >> + max = num_lat_exc_timers; >> + >> + for (count = 0; count < max; count++) { >> + seq_printf(f, "Timer '%s', delay %lld ns\n", >> + delayed_timers[count].name, >> + delayed_timers[count].delta); >> + } >> + >> + if (lat_rec_overflow) { >> + seq_printf(f, "WARNING: Latency tracking array overflow detected.\n"); >> + seq_printf(f, " Not all excessive latencies are shown.\n"); >> + >> + } >> + return 0; >> +} >> + >> +static int lat_exc_seq_open(struct inode *inode, struct file *file) >> +{ >> + return single_open(file, lat_exc_seq_show, NULL); >> +} >> + >> +static struct file_operations lat_exc_fops = { >> + .owner = THIS_MODULE, >> + .open = lat_exc_seq_open, >> + .read = seq_read, >> + .llseek = seq_lseek, >> +}; >> +#endif // CONFIG_XENO_OPT_STATS >> + >> void xntimer_init_proc(void) >> { >> rthal_add_proc_leaf("timer", &timer_read_proc, NULL, NULL, >> rthal_proc_root); >> + >> +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY >> + struct dentry *debugfs_dir; >> + >> + debugfs_dir = debugfs_create_dir("timer_lattest", NULL); >> + >> + if (debugfs_dir) { >> + if (!debugfs_create_u32("lat_threshold_us", 0666, >> + debugfs_dir, &lat_thresh)) >> + panic("Could not create lat_threshold_us"); >> + >> +#ifdef CONFIG_XENO_OPT_STATS >> + if (!debugfs_create_file("exc_timer_list", 0600, >> + debugfs_dir, NULL, &lat_exc_fops)) >> + panic("Could not create exc_timer_list"); >> +#endif // XENO_OPT_STATS >> + } >> +#endif // XENO_OPT_DEBUG_TIMER_LATENCY >> } >> >> void xntimer_cleanup_proc(void) >> >> _______________________________________________ >> Xenomai-core mailing list >> Xenomai-core@domain.hid >> https://mail.gna.org/listinfo/xenomai-core > > ^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [Xenomai-core] [PATCH, RFC] Instrumentation to detect delayed timers 2010-06-08 16:22 ` Wolfgang Mauerer @ 2010-06-08 16:48 ` Philippe Gerum 0 siblings, 0 replies; 4+ messages in thread From: Philippe Gerum @ 2010-06-08 16:48 UTC (permalink / raw) To: Wolfgang Mauerer; +Cc: xenomai@xenomai.org On Tue, 2010-06-08 at 18:22 +0200, Wolfgang Mauerer wrote: > Philippe Gerum wrote: > > On Tue, 2010-06-08 at 16:21 +0200, Wolfgang Mauerer wrote: > >> Hi, > >> > >> the attached patch augments the timer expiration handler with some checks > >> for excessive timer latencies. Depending on the configuration options, it > >> freezes the ipipe tracer and creates a list of delayed timers. > >> > >> This code was useful for us in debugging some problems with faulty hardware. > >> Would such a thing be suitable for mainline? > > > > No, I don't think so, albeit the debug feature it brings in makes sense. > > > > - the code, as is, adds significant cruft to the timer code, which is > > per se a problem. Debug is ok, but I really would like that code to have > > a much lower impact source-wise. > > so something along the lines of > > xntimer_enqueue_aperiodic(...) > { > ... > large_latency_hook_whatever(args); > ... > } > > where large_latency_hook_whatever() is #ifdef'd to nop if not > compile-time configured would be acceptable I suppose? Yes. > > > - dependency on debugfs is wrong; we still have to take care of legacy > > 2.4 kernels. I know, this is a real PITA, and I'm more than willing to > > get rid of that "heritage" in 3.x, but for now, 2.x still requires to be > > 2.4-compatible. > > oh, yeah. procfs would be a replacement, but using debugfs via ipipe > is much nicer since this is definitely debugging stuff. > > > - but, beyond this, this is something that should go into the I-pipe > > tracer instead, exporting a new dedicated interface to log a high > > latency event. There, you could use whatever fits to implement it, since > > there is by definition no consideration for compat with legacy kernels. > > I'm thinking of having ipipe_trace_latency(unsigned long delta, const > > char *source). That would be reusable in other, non-Xenomai contexts as > > well. > > Sounds reasonable. > > Would it make sense to augment ipipe_trace_latency() with an additional > type argument to distinguish different latency sources, or is this > overkill? > It would even be required to store a latency event as a tracepoint in the log, I guess, since we don't have room for any source label (which would be overkill to have in every tracepoint anyway), but we do have a type field there. > Best, Wolfgang > > > > > Additionally, the latency spots could appear as events in the tracer log > > as well, which may help debugging them a lot, since you would know about > > the actual context. > > > >> Regards, Wolfgang > >> > >> --- > >> commit 9c2a3908ac4aff21022a577552002fc2622d3670 > >> Author: Wolfgang Mauerer <wolfgang.mauerer@domain.hid> > >> Date: Mon Jun 7 15:34:39 2010 +0200 > >> > >> This patch provides the possibility to check upon timer expiration > >> how much a timer has been delayed. When the delay surpasses a configurable > >> threshold, the ipipe tracer is frozen. Additionally, the patch allows > >> for the creation of a list of timers that have suffered from excessive > >> delays. > >> > >> The mechanism can be disabled at both, compile- and runtime. There is no > >> overhead in the former case. > >> > >> Signed-off-by: Wolfgang Mauerer <wolfgang.mauerer@domain.hid> > >> > >> diff --git a/ksrc/nucleus/Kconfig b/ksrc/nucleus/Kconfig > >> index 211a4ad..ffed3f8 100644 > >> --- a/ksrc/nucleus/Kconfig > >> +++ b/ksrc/nucleus/Kconfig > >> @@ -293,6 +293,24 @@ config XENO_OPT_DEBUG_TIMERS > >> This option activates debugging output for critical > >> timer-related operations performed by the Xenomai core. > >> > >> +config XENO_OPT_DEBUG_TIMER_LATENCY > >> + bool "Log excessive timer latencies" > >> + depends on XENO_OPT_DEBUG && (XENO_OPT_STATS || IPIPE_TRACE) > >> + help > >> + > >> + This option activates recording of excessive timer latencies. > >> + In contrast to the userland latency checker which is statistical > >> + in nature, this test checks every expiring timer and is thus > >> + more precise. However, it adds some runtime overhead when > >> + activated. > >> + > >> + When the ipipe tracer is enabled, the trace is frozen > >> + when a latency maximum above the threshold set in > >> + the debugfs file timer_lattest/lat_threshold_us is detected. > >> + When Xenomai statistics collection is enabled, a list of > >> + excessive latencies together with the names of the timers > >> + is kept in the debugfs file timer_lattest/exc_timer_list. > >> + > >> config XENO_OPT_DEBUG_SYNCH_RELAX > >> bool "Detect mutexes held in relaxed sections" > >> depends on XENO_OPT_PERVASIVE && XENO_OPT_DEBUG > >> diff --git a/ksrc/nucleus/timer.c b/ksrc/nucleus/timer.c > >> index d813c4f..01dad44 100644 > >> --- a/ksrc/nucleus/timer.c > >> +++ b/ksrc/nucleus/timer.c > >> @@ -36,11 +36,37 @@ > >> * > >> *@{*/ > >> > >> +#include <linux/debugfs.h> > >> #include <nucleus/pod.h> > >> #include <nucleus/thread.h> > >> #include <nucleus/timer.h> > >> +#include <nucleus/trace.h> > >> #include <asm/xenomai/bits/timer.h> > >> > >> +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY > >> +#define NANO 1000000000ll > >> +#define MU 1000000ll > >> +#define NANO_PER_MU (NANO/MU) > >> + > >> +static u32 lat_thresh = 0; /* in \mu s, set to zero to disable */ > >> + > >> +#ifdef CONFIG_XENO_OPT_STATS > >> +#include <linux/seq_file.h> > >> + > >> +#define NUM_REC_LATENCIES 500 > >> +static int num_lat_exc_timers = -1; > >> +static bool lat_rec_overflow = false; /* Set when more than NUM_REC_LATENCIES > >> + * excessive latencies have been > >> + * observed */ > >> +struct lat_rec { > >> + char name[XNOBJECT_NAME_LEN]; > >> + xnsticks_t delta; > >> +}; > >> + > >> +static struct lat_rec delayed_timers[NUM_REC_LATENCIES]; > >> +#endif // CONFIG_XENO_OPT_STATS > >> +#endif // XENO_OPT_DEBUG_TIMER_LATENCY > >> + > >> static inline void xntimer_enqueue_aperiodic(xntimer_t *timer) > >> { > >> xntimerq_t *q = &timer->sched->timerqueue; > >> @@ -368,6 +394,32 @@ void xntimer_tick_aperiodic(void) > >> if (delta > (xnsticks_t)nklatency) > >> break; > >> > >> +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY > >> + /* > >> + * Check for severly delayed timers, but omit the host tick > >> + * timer as it can be delayed intentionally, thus causing > >> + * spurious excessive latencies (see > >> + * xntimer_next_local_shot()). > >> + */ > >> + if (lat_thresh && (timer != &sched->htimer)) { > >> + delta = xnarch_tsc_to_ns(-delta); > >> + if (delta > ((xnsticks_t)lat_thresh)*NANO_PER_MU) { > >> + xnarch_trace_user_freeze(delta, 1); > >> +#ifdef CONFIG_XENO_OPT_STATS > >> + if (num_lat_exc_timers < NUM_REC_LATENCIES - 1) { > >> + num_lat_exc_timers++; > >> + } else { > >> + lat_rec_overflow = true; > >> + num_lat_exc_timers = 0; > >> + } > >> + xnobject_copy_name(delayed_timers[num_lat_exc_timers].name, > >> + timer->name); > >> + delayed_timers[num_lat_exc_timers].delta = delta; > >> +#endif // XENO_OPT_STATS > >> + } > >> + } > >> +#endif // XENO_OPT_DEBUG_TIMER_LATENCY > >> + > >> trace_mark(xn_nucleus, timer_expire, "timer %p", timer); > >> > >> xntimer_dequeue_aperiodic(timer); > >> @@ -1147,10 +1199,70 @@ static int timer_read_proc(char *page, > >> return len; > >> } > >> > >> +#ifdef CONFIG_XENO_OPT_STATS > >> +static int lat_exc_seq_show(struct seq_file *f, void *v) > >> +{ > >> + int count, max; > >> + > >> + if (num_lat_exc_timers < 0) { > >> + seq_printf(f, "(No excessive timer latencies detected)\n"); > >> + return 0; > >> + } > >> + > >> + if (lat_rec_overflow) > >> + max = NUM_REC_LATENCIES; > >> + else > >> + max = num_lat_exc_timers; > >> + > >> + for (count = 0; count < max; count++) { > >> + seq_printf(f, "Timer '%s', delay %lld ns\n", > >> + delayed_timers[count].name, > >> + delayed_timers[count].delta); > >> + } > >> + > >> + if (lat_rec_overflow) { > >> + seq_printf(f, "WARNING: Latency tracking array overflow detected.\n"); > >> + seq_printf(f, " Not all excessive latencies are shown.\n"); > >> + > >> + } > >> + return 0; > >> +} > >> + > >> +static int lat_exc_seq_open(struct inode *inode, struct file *file) > >> +{ > >> + return single_open(file, lat_exc_seq_show, NULL); > >> +} > >> + > >> +static struct file_operations lat_exc_fops = { > >> + .owner = THIS_MODULE, > >> + .open = lat_exc_seq_open, > >> + .read = seq_read, > >> + .llseek = seq_lseek, > >> +}; > >> +#endif // CONFIG_XENO_OPT_STATS > >> + > >> void xntimer_init_proc(void) > >> { > >> rthal_add_proc_leaf("timer", &timer_read_proc, NULL, NULL, > >> rthal_proc_root); > >> + > >> +#ifdef CONFIG_XENO_OPT_DEBUG_TIMER_LATENCY > >> + struct dentry *debugfs_dir; > >> + > >> + debugfs_dir = debugfs_create_dir("timer_lattest", NULL); > >> + > >> + if (debugfs_dir) { > >> + if (!debugfs_create_u32("lat_threshold_us", 0666, > >> + debugfs_dir, &lat_thresh)) > >> + panic("Could not create lat_threshold_us"); > >> + > >> +#ifdef CONFIG_XENO_OPT_STATS > >> + if (!debugfs_create_file("exc_timer_list", 0600, > >> + debugfs_dir, NULL, &lat_exc_fops)) > >> + panic("Could not create exc_timer_list"); > >> +#endif // XENO_OPT_STATS > >> + } > >> +#endif // XENO_OPT_DEBUG_TIMER_LATENCY > >> } > >> > >> void xntimer_cleanup_proc(void) > >> > >> _______________________________________________ > >> Xenomai-core mailing list > >> Xenomai-core@domain.hid > >> https://mail.gna.org/listinfo/xenomai-core > > > > > -- Philippe. ^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2010-06-08 16:48 UTC | newest] Thread overview: 4+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-06-08 14:21 [Xenomai-core] [PATCH, RFC] Instrumentation to detect delayed timers Wolfgang Mauerer 2010-06-08 15:11 ` Philippe Gerum 2010-06-08 16:22 ` Wolfgang Mauerer 2010-06-08 16:48 ` Philippe Gerum
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.