* [PATCH v9 1/2] Track hard and soft "short lockups" or "stalls."
@ 2011-08-17 20:36 Alex Neronskiy
2011-08-17 20:36 ` [PATCH/RFC v9 2/2] Use trace events to report stalls Alex Neronskiy
0 siblings, 1 reply; 3+ messages in thread
From: Alex Neronskiy @ 2011-08-17 20:36 UTC (permalink / raw)
To: linux-kernel
Cc: peterz, Ingo Molnar, Don Zickus, Mandeep Singh Baines,
Alex Neronskiy
Enable the lockup watchdog to not only respond to lockups beyond a
certain threshold, but also keep track of the worst sub-threshold
lockup (called a "stall") seen so far. The stalls are communicated
to userspace via the kernel log, where a stack trace is also
included. Hard lockups are checked for more frequently to collect
additional data. Soft stalls are described by the length of time
between successive instances of scheduling of the watchdog thread,
and hard stalls are described by the number of times a hard lockup
check found that no interrupts had been recorded. Some thresholds
on these are set, to attempt to prevent floods of useless data.
Signed-off-by: Alex Neronskiy <zakmagnus@chromium.org>
---
The motivation here is to be able to gather more data about just how
"extreme" latency conditions get inside the kernel, without
necessarily triggering a threshold and potentially setting off a
panic.
The way the hard lockup watchdog works has been changed to make it
more compatible with this purpose. Previously it was hard to
collect any data from it other than "lockup" or "not lockup."
Now it has to fail 5 consecutive checks to be considered a lockup,
somewhat similarly to how soft lockups work.
kernel/watchdog.c | 124 +++++++++++++++++++++++++++++++++++++++++++++-------
1 files changed, 107 insertions(+), 17 deletions(-)
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 36491cd..d4c51cc 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,6 +29,8 @@
int watchdog_enabled = 1;
int __read_mostly watchdog_thresh = 10;
+static unsigned long worst_softstall;
+static DEFINE_SPINLOCK(softstall_lock);
static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -40,8 +42,29 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_missed);
static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-#endif
+
+static unsigned long worst_hardstall;
+static DEFINE_SPINLOCK(hardstall_lock);
+
+/* The number of consecutive hard check failures before it's a lockup */
+#define hardlockup_thresh (5)
+
+/*
+ * The minimum expected number of missed interrupts in a normal hard
+ * lockup check.
+ */
+#define hardstall_thresh (2)
+
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+
+/*
+ * The number of milliseconds by which a stall must be worse than the
+ * current worst in order to be recorded. This prevents bursts of near-
+ * identical stalls.
+ */
+#define softstall_diff_thresh (100)
/* boot commands */
/*
@@ -98,19 +121,28 @@ __setup("nosoftlockup", nosoftlockup_setup);
* the thresholds with a factor: we make the soft threshold twice the amount of
* time the hard threshold is.
*/
-static int get_softlockup_thresh(void)
+static inline int get_softlockup_thresh(void)
{
return watchdog_thresh * 2;
}
/*
- * Returns seconds, approximately. We don't need nanosecond
+ * This is just the period of the timer function. A "stall" shorter than
+ * this is the expected behavior and is therefore not noteworthy.
+ */
+static inline int get_softstall_thresh(void)
+{
+ return 1000 / 5 * get_softlockup_thresh();
+}
+
+/*
+ * Returns milliseconds, approximately. We don't need nanosecond
* resolution, and we don't need to waste time with a big divide when
- * 2^30ns == 1.074s.
+ * 2^20ns == 1.049ms.
*/
static unsigned long get_timestamp(int this_cpu)
{
- return cpu_clock(this_cpu) >> 30LL; /* 2^30 ~= 10^9 */
+ return cpu_clock(this_cpu) >> 20LL; /* 2^20 ~= 10^6 */
}
static unsigned long get_sample_period(void)
@@ -176,25 +208,79 @@ void touch_softlockup_watchdog_sync(void)
#ifdef CONFIG_HARDLOCKUP_DETECTOR
/* watchdog detector functions */
-static int is_hardlockup(void)
+static void update_hardstall(unsigned long stall, int this_cpu)
+{
+ int update_stall = 0;
+
+ if (stall > hardstall_thresh && stall > worst_hardstall) {
+ unsigned long flags;
+ spin_lock_irqsave(&hardstall_lock, flags);
+ update_stall = stall > worst_hardstall;
+ if (update_stall)
+ worst_hardstall = stall;
+ spin_unlock_irqrestore(&hardstall_lock, flags);
+ }
+
+ if (update_stall) {
+ printk(KERN_WARNING "LOCKUP may be in progress!"
+ "Worst hard stall seen on CPU#%d: %lu interrupts missed\n",
+ this_cpu, stall);
+ dump_stack();
+ }
+}
+
+static int is_hardlockup(int this_cpu)
{
unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+ unsigned long hrint_saved = __this_cpu_read(hrtimer_interrupts_saved);
+ unsigned long ints_missed = 0;
+
+ __this_cpu_write(hrtimer_interrupts_saved, hrint);
- if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+ if (hrint_saved == hrint)
+ ints_missed = per_cpu(hrtimer_interrupts_missed, this_cpu)++;
+ else
+ __this_cpu_write(hrtimer_interrupts_missed, 0);
+
+ if (ints_missed >= hardlockup_thresh)
return 1;
- __this_cpu_write(hrtimer_interrupts_saved, hrint);
+ update_hardstall(ints_missed, this_cpu);
return 0;
}
#endif
-static int is_softlockup(unsigned long touch_ts)
+static void update_softstall(unsigned long stall, int this_cpu)
{
- unsigned long now = get_timestamp(smp_processor_id());
+ int update_stall = 0;
+ if (stall > get_softstall_thresh() &&
+ stall > worst_softstall + softstall_diff_thresh) {
+ unsigned long flags;
+ spin_lock_irqsave(&softstall_lock, flags);
+ update_stall = stall > worst_softstall + softstall_diff_thresh;
+ if (update_stall)
+ worst_softstall = stall;
+ spin_unlock_irqrestore(&softstall_lock, flags);
+ }
+
+ if (update_stall) {
+ printk(KERN_WARNING "LOCKUP may be in progress!"
+ "Worst soft stall seen on CPU#%d: %lums\n",
+ this_cpu, stall);
+ dump_stack();
+ }
+}
+
+static int is_softlockup(unsigned long touch_ts, int this_cpu)
+{
+ unsigned long now = get_timestamp(this_cpu);
+ unsigned long stall = now - touch_ts;
/* Warn about unreasonable delays: */
- if (time_after(now, touch_ts + get_softlockup_thresh()))
- return now - touch_ts;
+ if (time_after(now, touch_ts + 1000 * get_softlockup_thresh()))
+ return stall;
+
+ update_softstall(stall, this_cpu);
return 0;
}
@@ -214,6 +300,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
struct perf_sample_data *data,
struct pt_regs *regs)
{
+ int lockup;
+ int this_cpu = smp_processor_id();
+
/* Ensure the watchdog never gets throttled */
event->hw.interrupts = 0;
@@ -228,9 +317,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
* fired multiple times before we overflow'd. If it hasn't
* then this is a good indication the cpu is stuck
*/
- if (is_hardlockup()) {
- int this_cpu = smp_processor_id();
-
+ lockup = is_hardlockup(this_cpu);
+ if (lockup) {
/* only print hardlockups once */
if (__this_cpu_read(hard_watchdog_warn) == true)
return;
@@ -261,6 +349,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
struct pt_regs *regs = get_irq_regs();
int duration;
+ int this_cpu = smp_processor_id();
/* kick the hardlockup detector */
watchdog_interrupt_count();
@@ -290,14 +379,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
* indicate it is getting cpu time. If it hasn't then
* this is a good indication some task is hogging the cpu
*/
- duration = is_softlockup(touch_ts);
+ duration = is_softlockup(touch_ts, this_cpu);
if (unlikely(duration)) {
/* only warn once */
if (__this_cpu_read(soft_watchdog_warn) == true)
return HRTIMER_RESTART;
printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
- smp_processor_id(), duration,
+ this_cpu, duration >> 10L,
current->comm, task_pid_nr(current));
print_modules();
print_irqtrace_events(current);
@@ -371,6 +460,7 @@ static int watchdog_nmi_enable(int cpu)
wd_attr = &wd_hw_attr;
wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+ do_div(wd_attr->sample_period, hardlockup_thresh);
/* Try to register using hardware perf events */
event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
--
1.7.3.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* [PATCH/RFC v9 2/2] Use trace events to report stalls
2011-08-17 20:36 [PATCH v9 1/2] Track hard and soft "short lockups" or "stalls." Alex Neronskiy
@ 2011-08-17 20:36 ` Alex Neronskiy
2011-08-19 19:05 ` Jason Baron
0 siblings, 1 reply; 3+ messages in thread
From: Alex Neronskiy @ 2011-08-17 20:36 UTC (permalink / raw)
To: linux-kernel
Cc: peterz, Ingo Molnar, Don Zickus, Mandeep Singh Baines,
Alex Neronskiy
Signed-off-by: Alex Neronskiy <zakmagnus@chromium.org>
---
This outputs both the numerical representation of the stall as well
as a stack trace. The trace, unlike the number, is always output,
regardless of what debug/tracing/events/watchdog/enable says. I
don't know how to hook into that.
kernel/trace_watchdog.h | 43 +++++++++++++++++++++++++++++++++++++++++++
kernel/watchdog.c | 23 +++++++++++------------
2 files changed, 54 insertions(+), 12 deletions(-)
create mode 100644 kernel/trace_watchdog.h
diff --git a/kernel/trace_watchdog.h b/kernel/trace_watchdog.h
new file mode 100644
index 0000000..1401d27f
--- /dev/null
+++ b/kernel/trace_watchdog.h
@@ -0,0 +1,43 @@
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM watchdog
+
+#if !defined(_TRACE_WATCHDOG_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_WATCHDOG_H
+
+#include <linux/tracepoint.h>
+#include <linux/stacktrace.h>
+
+TRACE_EVENT(soft_stall,
+ TP_PROTO(unsigned long stall),
+ TP_ARGS(stall),
+ TP_STRUCT__entry(
+ __field(unsigned long, stall)
+ ),
+ TP_fast_assign(
+ __entry->stall = stall;
+ ),
+ TP_printk("Soft stall: %lums", __entry->stall)
+);
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+TRACE_EVENT(hard_stall,
+ TP_PROTO(unsigned long stall),
+ TP_ARGS(stall),
+ TP_STRUCT__entry(
+ __field(unsigned long, stall)
+ ),
+ TP_fast_assign(
+ __entry->stall = stall;
+ ),
+ TP_printk("Hard stall: %lu", __entry->stall)
+);
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+
+#endif /* _TRACE_WATCHDOG_H */
+
+#undef TRACE_INCLUDE_PATH
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_PATH .
+
+#define TRACE_INCLUDE_FILE trace_watchdog
+#include <trace/define_trace.h>
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index d4c51cc..18385ed 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -27,6 +27,9 @@
#include <asm/irq_regs.h>
#include <linux/perf_event.h>
+#define CREATE_TRACE_POINTS
+#include "trace_watchdog.h"
+
int watchdog_enabled = 1;
int __read_mostly watchdog_thresh = 10;
static unsigned long worst_softstall;
@@ -208,7 +211,7 @@ void touch_softlockup_watchdog_sync(void)
#ifdef CONFIG_HARDLOCKUP_DETECTOR
/* watchdog detector functions */
-static void update_hardstall(unsigned long stall, int this_cpu)
+static void update_hardstall(unsigned long stall)
{
int update_stall = 0;
@@ -222,10 +225,8 @@ static void update_hardstall(unsigned long stall, int this_cpu)
}
if (update_stall) {
- printk(KERN_WARNING "LOCKUP may be in progress!"
- "Worst hard stall seen on CPU#%d: %lu interrupts missed\n",
- this_cpu, stall);
- dump_stack();
+ trace_hard_stall(stall);
+ trace_dump_stack();
}
}
@@ -245,12 +246,12 @@ static int is_hardlockup(int this_cpu)
if (ints_missed >= hardlockup_thresh)
return 1;
- update_hardstall(ints_missed, this_cpu);
+ update_hardstall(ints_missed);
return 0;
}
#endif
-static void update_softstall(unsigned long stall, int this_cpu)
+static void update_softstall(unsigned long stall)
{
int update_stall = 0;
if (stall > get_softstall_thresh() &&
@@ -264,10 +265,8 @@ static void update_softstall(unsigned long stall, int this_cpu)
}
if (update_stall) {
- printk(KERN_WARNING "LOCKUP may be in progress!"
- "Worst soft stall seen on CPU#%d: %lums\n",
- this_cpu, stall);
- dump_stack();
+ trace_soft_stall(stall);
+ trace_dump_stack();
}
}
@@ -280,7 +279,7 @@ static int is_softlockup(unsigned long touch_ts, int this_cpu)
if (time_after(now, touch_ts + 1000 * get_softlockup_thresh()))
return stall;
- update_softstall(stall, this_cpu);
+ update_softstall(stall);
return 0;
}
--
1.7.3.1
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH/RFC v9 2/2] Use trace events to report stalls
2011-08-17 20:36 ` [PATCH/RFC v9 2/2] Use trace events to report stalls Alex Neronskiy
@ 2011-08-19 19:05 ` Jason Baron
0 siblings, 0 replies; 3+ messages in thread
From: Jason Baron @ 2011-08-19 19:05 UTC (permalink / raw)
To: Alex Neronskiy
Cc: linux-kernel, peterz, Ingo Molnar, Don Zickus,
Mandeep Singh Baines, rostedt
On Wed, Aug 17, 2011 at 01:36:09PM -0700, Alex Neronskiy wrote:
> Signed-off-by: Alex Neronskiy <zakmagnus@chromium.org>
> ---
> This outputs both the numerical representation of the stall as well
> as a stack trace. The trace, unlike the number, is always output,
> regardless of what debug/tracing/events/watchdog/enable says. I
> don't know how to hook into that.
>
(adding Steve to 'cc)
Yes, it would be nice to make the dump stack contigent on the
tracepoints...that's what you want?
If so, maybe we could have TP_fast_assign fill a buffer which is output
via TP_printk, or else TP_printk could take an optional callback
function...
Thanks,
-Jason
> kernel/trace_watchdog.h | 43 +++++++++++++++++++++++++++++++++++++++++++
> kernel/watchdog.c | 23 +++++++++++------------
> 2 files changed, 54 insertions(+), 12 deletions(-)
> create mode 100644 kernel/trace_watchdog.h
>
> diff --git a/kernel/trace_watchdog.h b/kernel/trace_watchdog.h
> new file mode 100644
> index 0000000..1401d27f
> --- /dev/null
> +++ b/kernel/trace_watchdog.h
> @@ -0,0 +1,43 @@
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM watchdog
> +
> +#if !defined(_TRACE_WATCHDOG_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_WATCHDOG_H
> +
> +#include <linux/tracepoint.h>
> +#include <linux/stacktrace.h>
> +
> +TRACE_EVENT(soft_stall,
> + TP_PROTO(unsigned long stall),
> + TP_ARGS(stall),
> + TP_STRUCT__entry(
> + __field(unsigned long, stall)
> + ),
> + TP_fast_assign(
> + __entry->stall = stall;
> + ),
> + TP_printk("Soft stall: %lums", __entry->stall)
> +);
> +
> +#ifdef CONFIG_HARDLOCKUP_DETECTOR
> +TRACE_EVENT(hard_stall,
> + TP_PROTO(unsigned long stall),
> + TP_ARGS(stall),
> + TP_STRUCT__entry(
> + __field(unsigned long, stall)
> + ),
> + TP_fast_assign(
> + __entry->stall = stall;
> + ),
> + TP_printk("Hard stall: %lu", __entry->stall)
> +);
> +#endif /* CONFIG_HARDLOCKUP_DETECTOR */
> +
> +#endif /* _TRACE_WATCHDOG_H */
> +
> +#undef TRACE_INCLUDE_PATH
> +#undef TRACE_INCLUDE_FILE
> +#define TRACE_INCLUDE_PATH .
> +
> +#define TRACE_INCLUDE_FILE trace_watchdog
> +#include <trace/define_trace.h>
> diff --git a/kernel/watchdog.c b/kernel/watchdog.c
> index d4c51cc..18385ed 100644
> --- a/kernel/watchdog.c
> +++ b/kernel/watchdog.c
> @@ -27,6 +27,9 @@
> #include <asm/irq_regs.h>
> #include <linux/perf_event.h>
>
> +#define CREATE_TRACE_POINTS
> +#include "trace_watchdog.h"
> +
> int watchdog_enabled = 1;
> int __read_mostly watchdog_thresh = 10;
> static unsigned long worst_softstall;
> @@ -208,7 +211,7 @@ void touch_softlockup_watchdog_sync(void)
>
> #ifdef CONFIG_HARDLOCKUP_DETECTOR
> /* watchdog detector functions */
> -static void update_hardstall(unsigned long stall, int this_cpu)
> +static void update_hardstall(unsigned long stall)
> {
> int update_stall = 0;
>
> @@ -222,10 +225,8 @@ static void update_hardstall(unsigned long stall, int this_cpu)
> }
>
> if (update_stall) {
> - printk(KERN_WARNING "LOCKUP may be in progress!"
> - "Worst hard stall seen on CPU#%d: %lu interrupts missed\n",
> - this_cpu, stall);
> - dump_stack();
> + trace_hard_stall(stall);
> + trace_dump_stack();
> }
> }
>
> @@ -245,12 +246,12 @@ static int is_hardlockup(int this_cpu)
> if (ints_missed >= hardlockup_thresh)
> return 1;
>
> - update_hardstall(ints_missed, this_cpu);
> + update_hardstall(ints_missed);
> return 0;
> }
> #endif
>
> -static void update_softstall(unsigned long stall, int this_cpu)
> +static void update_softstall(unsigned long stall)
> {
> int update_stall = 0;
> if (stall > get_softstall_thresh() &&
> @@ -264,10 +265,8 @@ static void update_softstall(unsigned long stall, int this_cpu)
> }
>
> if (update_stall) {
> - printk(KERN_WARNING "LOCKUP may be in progress!"
> - "Worst soft stall seen on CPU#%d: %lums\n",
> - this_cpu, stall);
> - dump_stack();
> + trace_soft_stall(stall);
> + trace_dump_stack();
> }
> }
>
> @@ -280,7 +279,7 @@ static int is_softlockup(unsigned long touch_ts, int this_cpu)
> if (time_after(now, touch_ts + 1000 * get_softlockup_thresh()))
> return stall;
>
> - update_softstall(stall, this_cpu);
> + update_softstall(stall);
>
> return 0;
> }
> --
> 1.7.3.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at http://www.tux.org/lkml/
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2011-08-19 19:06 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-17 20:36 [PATCH v9 1/2] Track hard and soft "short lockups" or "stalls." Alex Neronskiy
2011-08-17 20:36 ` [PATCH/RFC v9 2/2] Use trace events to report stalls Alex Neronskiy
2011-08-19 19:05 ` Jason Baron
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox