[PATCH v5 1/2] Track hard and soft "short lockups" or "stalls."

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH v5 1/2] Track hard and soft "short lockups" or "stalls."
@ 2011-08-10  0:22 Alex Neronskiy
  2011-08-10  0:22 ` [PATCH v5 2/2] Output stall data in debugfs Alex Neronskiy
  0 siblings, 1 reply; 3+ messages in thread
From: Alex Neronskiy @ 2011-08-10  0:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, Ingo Molnar, Don Zickus, Mandeep Singh Baines,
	Alex Neronskiy

From: Alex Neronskiy <zakmagnus@chromium.com>

Enable the lockup watchdog to not only respond to lockups beyond a
certain threshold, but also keep track of the worst sub-threshold
lockup (called a "stall") seen so far. The stalls are communicated
to userspace via /proc/sys/kernel/softstall_worst
and hardstall_worst, as well as through the kernel log, where a
stack trace is also included. Hard lockups are checked for more
frequently to collect additional data. Soft stalls are described
by the length of time between successive instances of scheduling
of the watchdog thread, and hard stalls are described by the
number of times a hard lockup check found that no interrupts had
been recorded. Some thresholds on these are set, to attempt to
prevent floods of useless data.

Signed-off-by: Alex Neronskiy <zakmagnus@chromium.com>
---
 include/linux/nmi.h |    6 ++-
 kernel/sysctl.c     |   18 +++++++-
 kernel/watchdog.c   |  124 ++++++++++++++++++++++++++++++++++++++++++++-------
 3 files changed, 129 insertions(+), 19 deletions(-)

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 2d304ef..57cda21 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -48,9 +48,13 @@ int hw_nmi_is_cpu_stuck(struct pt_regs *);
 u64 hw_nmi_get_sample_period(int watchdog_thresh);
 extern int watchdog_enabled;
 extern int watchdog_thresh;
+extern unsigned long worst_softstall;
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+extern unsigned long worst_hardstall;
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
 struct ctl_table;
 extern int proc_dowatchdog(struct ctl_table *, int ,
 			   void __user *, size_t *, loff_t *);
-#endif
+#endif /* CONFIG_LOCKUP_DETECTOR */
 
 #endif
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 11d65b5..a392952 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -767,7 +767,23 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-#endif
+	{
+		.procname       = "softstall_worst",
+		.data           = &worst_softstall,
+		.maxlen         = sizeof(unsigned long),
+		.mode           = 0644,
+		.proc_handler   = proc_doulongvec_minmax,
+	},
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+	{
+		.procname       = "hardstall_worst",
+		.data           = &worst_hardstall,
+		.maxlen         = sizeof(unsigned long),
+		.mode           = 0644,
+		.proc_handler   = proc_doulongvec_minmax,
+	},
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+#endif /* CONFIG_LOCKUP_DETECTOR */
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
 		.procname       = "unknown_nmi_panic",
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 36491cd..8dd9ed4 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -29,6 +29,8 @@
 
 int watchdog_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
+unsigned long worst_softstall;
+static DEFINE_SPINLOCK(softstall_lock);
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -40,8 +42,29 @@ static DEFINE_PER_CPU(bool, hard_watchdog_warn);
 static DEFINE_PER_CPU(bool, watchdog_nmi_touch);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts);
 static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_saved);
+static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_missed);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
-#endif
+
+unsigned long worst_hardstall;
+static DEFINE_SPINLOCK(hardstall_lock);
+
+/* The number of consecutive hard check failures before it's a lockup */
+#define hardlockup_thresh (5)
+
+/*
+ * The minimum expected number of missed interrupts in a normal hard
+ * lockup check.
+ */
+#define hardstall_thresh (2)
+
+#endif /* CONFIG_HARDLOCKUP_DETECTOR */
+
+/*
+ * The number of milliseconds by which a stall must be worse than the
+ * current worst in order to be recorded. This prevents bursts of near-
+ * identical stalls.
+ */
+#define softstall_diff_thresh (100)
 
 /* boot commands */
 /*
@@ -98,19 +121,28 @@ __setup("nosoftlockup", nosoftlockup_setup);
  * the thresholds with a factor: we make the soft threshold twice the amount of
  * time the hard threshold is.
  */
-static int get_softlockup_thresh(void)
+static inline int get_softlockup_thresh(void)
 {
 	return watchdog_thresh * 2;
 }
 
 /*
- * Returns seconds, approximately.  We don't need nanosecond
+ * This is just the period of the timer function. A "stall" shorter than
+ * this is the expected behavior and is therefore not noteworthy.
+ */
+static inline int get_softstall_thresh(void)
+{
+	return 1000 / 5 * get_softlockup_thresh();
+}
+
+/*
+ * Returns milliseconds, approximately.  We don't need nanosecond
  * resolution, and we don't need to waste time with a big divide when
- * 2^30ns == 1.074s.
+ * 2^20ns == 1.049ms.
  */
 static unsigned long get_timestamp(int this_cpu)
 {
-	return cpu_clock(this_cpu) >> 30LL;  /* 2^30 ~= 10^9 */
+	return cpu_clock(this_cpu) >> 20LL;  /* 2^20 ~= 10^6 */
 }
 
 static unsigned long get_sample_period(void)
@@ -176,25 +208,79 @@ void touch_softlockup_watchdog_sync(void)
 
 #ifdef CONFIG_HARDLOCKUP_DETECTOR
 /* watchdog detector functions */
-static int is_hardlockup(void)
+static void update_hardstall(unsigned long stall, int this_cpu)
+{
+	int update_stall = 0;
+
+	if (stall > hardstall_thresh && stall > worst_hardstall) {
+		unsigned long flags;
+		spin_lock_irqsave(&hardstall_lock, flags);
+		update_stall = stall > worst_hardstall;
+		if (update_stall)
+			worst_hardstall = stall;
+		spin_unlock_irqrestore(&hardstall_lock, flags);
+	}
+
+	if (update_stall) {
+		printk(KERN_WARNING "LOCKUP may be in progress!"
+			"Worst hard stall seen on CPU#%d: %lu interrupts missed\n",
+			this_cpu, stall);
+		dump_stack();
+	}
+}
+
+static int is_hardlockup(int this_cpu)
 {
 	unsigned long hrint = __this_cpu_read(hrtimer_interrupts);
+	unsigned long hrint_saved = __this_cpu_read(hrtimer_interrupts_saved);
+	unsigned long ints_missed = 0;
+
+	__this_cpu_write(hrtimer_interrupts_saved, hrint);
 
-	if (__this_cpu_read(hrtimer_interrupts_saved) == hrint)
+	if (hrint_saved == hrint)
+		ints_missed = per_cpu(hrtimer_interrupts_missed, this_cpu)++;
+	else
+		__this_cpu_write(hrtimer_interrupts_missed, 0);
+
+	if (ints_missed >= hardlockup_thresh)
 		return 1;
 
-	__this_cpu_write(hrtimer_interrupts_saved, hrint);
+	update_hardstall(ints_missed, this_cpu);
 	return 0;
 }
 #endif
 
-static int is_softlockup(unsigned long touch_ts)
+static void update_softstall(unsigned long stall, int this_cpu)
 {
-	unsigned long now = get_timestamp(smp_processor_id());
+	int update_stall = 0;
+	if (stall > get_softstall_thresh() &&
+			stall > worst_softstall + softstall_diff_thresh) {
+		unsigned long flags;
+		spin_lock_irqsave(&softstall_lock, flags);
+		update_stall = stall > worst_softstall + softstall_diff_thresh;
+		if (update_stall)
+			worst_softstall = stall;
+		spin_unlock_irqrestore(&softstall_lock, flags);
+	}
+
+	if (update_stall) {
+		printk(KERN_WARNING "LOCKUP may be in progress!"
+				"Worst soft stall seen on CPU#%d: %lums\n",
+				this_cpu, stall);
+		dump_stack();
+	}
+}
+
+static int is_softlockup(unsigned long touch_ts, int this_cpu)
+{
+	unsigned long now = get_timestamp(this_cpu);
+	unsigned long stall = now - touch_ts;
 
 	/* Warn about unreasonable delays: */
-	if (time_after(now, touch_ts + get_softlockup_thresh()))
-		return now - touch_ts;
+	if (time_after(now, touch_ts + 1000 * get_softlockup_thresh()))
+		return stall;
+
+	update_softstall(stall, this_cpu);
 
 	return 0;
 }
@@ -214,6 +300,9 @@ static void watchdog_overflow_callback(struct perf_event *event,
 		 struct perf_sample_data *data,
 		 struct pt_regs *regs)
 {
+	int lockup;
+	int this_cpu = smp_processor_id();
+
 	/* Ensure the watchdog never gets throttled */
 	event->hw.interrupts = 0;
 
@@ -228,9 +317,8 @@ static void watchdog_overflow_callback(struct perf_event *event,
 	 * fired multiple times before we overflow'd.  If it hasn't
 	 * then this is a good indication the cpu is stuck
 	 */
-	if (is_hardlockup()) {
-		int this_cpu = smp_processor_id();
-
+	lockup = is_hardlockup(this_cpu);
+	if (lockup) {
 		/* only print hardlockups once */
 		if (__this_cpu_read(hard_watchdog_warn) == true)
 			return;
@@ -261,6 +349,7 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	unsigned long touch_ts = __this_cpu_read(watchdog_touch_ts);
 	struct pt_regs *regs = get_irq_regs();
 	int duration;
+	int this_cpu = smp_processor_id();
 
 	/* kick the hardlockup detector */
 	watchdog_interrupt_count();
@@ -290,14 +379,14 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)
 	 * indicate it is getting cpu time.  If it hasn't then
 	 * this is a good indication some task is hogging the cpu
 	 */
-	duration = is_softlockup(touch_ts);
+	duration = is_softlockup(touch_ts, this_cpu);
 	if (unlikely(duration)) {
 		/* only warn once */
 		if (__this_cpu_read(soft_watchdog_warn) == true)
 			return HRTIMER_RESTART;
 
 		printk(KERN_ERR "BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",
-			smp_processor_id(), duration,
+			this_cpu, duration >> 10L,
 			current->comm, task_pid_nr(current));
 		print_modules();
 		print_irqtrace_events(current);
@@ -371,6 +460,7 @@ static int watchdog_nmi_enable(int cpu)
 
 	wd_attr = &wd_hw_attr;
 	wd_attr->sample_period = hw_nmi_get_sample_period(watchdog_thresh);
+	do_div(wd_attr->sample_period, hardlockup_thresh);
 
 	/* Try to register using hardware perf events */
 	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
-- 
1.7.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* [PATCH v5 2/2] Output stall data in debugfs
  2011-08-10  0:22 [PATCH v5 1/2] Track hard and soft "short lockups" or "stalls." Alex Neronskiy
@ 2011-08-10  0:22 ` Alex Neronskiy
  2011-08-10 13:06   ` Don Zickus
  0 siblings, 1 reply; 3+ messages in thread
From: Alex Neronskiy @ 2011-08-10  0:22 UTC (permalink / raw)
  To: linux-kernel
  Cc: peterz, Ingo Molnar, Don Zickus, Mandeep Singh Baines,
	Alex Neronskiy, Alex Neronskiy

From: Alex Neronskiy <zakmagnus@chromium.com>

Instead of using the log, use debugfs for output of both stall
lengths and stack traces. Printing to the log can result in
watchdog touches, distorting the very events being measured.
Additionally, the information will not distract from lockups
when users view the log.

A two-buffer system is used to ensure that the trace information
can always be recorded without contention.

Signed-off-by: Alex Neronskiy <zakmagnus@chromium.org>
---
This locking system is a bit hairy and I'm not sure it will
always work properly...

 kernel/sysctl.c   |   16 ----
 kernel/watchdog.c |  210 ++++++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 185 insertions(+), 41 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index a392952..b8620a2 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -767,22 +767,6 @@ static struct ctl_table kern_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
-	{
-		.procname       = "softstall_worst",
-		.data           = &worst_softstall,
-		.maxlen         = sizeof(unsigned long),
-		.mode           = 0644,
-		.proc_handler   = proc_doulongvec_minmax,
-	},
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-	{
-		.procname       = "hardstall_worst",
-		.data           = &worst_hardstall,
-		.maxlen         = sizeof(unsigned long),
-		.mode           = 0644,
-		.proc_handler   = proc_doulongvec_minmax,
-	},
-#endif /* CONFIG_HARDLOCKUP_DETECTOR */
 #endif /* CONFIG_LOCKUP_DETECTOR */
 #if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
 	{
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index 8dd9ed4..abc101c 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -23,14 +23,27 @@
 #include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/sysctl.h>
+#include <linux/stacktrace.h>
+#include <linux/seq_file.h>
+#include <linux/debugfs.h>
 
 #include <asm/irq_regs.h>
 #include <linux/perf_event.h>
 
+#define STALL_SOFT (0)
+#define STALL_HARD (1)
+
+#define STALL_MAX_TRACE_DEPTH (50)
 int watchdog_enabled = 1;
 int __read_mostly watchdog_thresh = 10;
 unsigned long worst_softstall;
-static DEFINE_SPINLOCK(softstall_lock);
+static unsigned long softstall_trace_entries[STALL_MAX_TRACE_DEPTH * 2];
+static struct stack_trace softstall_traces[2];
+static spinlock_t softstall_locks[2];
+static DEFINE_SPINLOCK(softstall_read_lock);
+static DEFINE_SPINLOCK(softstall_write_lock);
+static volatile int soft_read_ind;
+static int soft_write_ind;
 
 static DEFINE_PER_CPU(unsigned long, watchdog_touch_ts);
 static DEFINE_PER_CPU(struct task_struct *, softlockup_watchdog);
@@ -46,7 +59,13 @@ static DEFINE_PER_CPU(unsigned long, hrtimer_interrupts_missed);
 static DEFINE_PER_CPU(struct perf_event *, watchdog_ev);
 
 unsigned long worst_hardstall;
-static DEFINE_SPINLOCK(hardstall_lock);
+static unsigned long hardstall_trace_entries[STALL_MAX_TRACE_DEPTH * 2];
+static struct stack_trace hardstall_traces[2];
+static spinlock_t hardstall_locks[2];
+static DEFINE_SPINLOCK(hardstall_read_lock);
+static DEFINE_SPINLOCK(hardstall_write_lock);
+static volatile int hard_read_ind;
+static int hard_write_ind;
 
 /* The number of consecutive hard check failures before it's a lockup */
 #define hardlockup_thresh (5)
@@ -210,22 +229,28 @@ void touch_softlockup_watchdog_sync(void)
 /* watchdog detector functions */
 static void update_hardstall(unsigned long stall, int this_cpu)
 {
-	int update_stall = 0;
-
 	if (stall > hardstall_thresh && stall > worst_hardstall) {
 		unsigned long flags;
-		spin_lock_irqsave(&hardstall_lock, flags);
-		update_stall = stall > worst_hardstall;
-		if (update_stall)
+		spin_lock_irqsave(&hardstall_write_lock, flags);
+		if (stall > worst_hardstall) {
+			int locked = spin_trylock(
+					&hardstall_locks[hard_write_ind]);
+			if (!locked) {
+				/* we may have interrupted the holder, so
+				 * switch buffers now */
+				hard_write_ind = !hard_write_ind;
+
+				/* no one has this lock right now */
+				spin_lock(&hardstall_locks[hard_write_ind]);
+			}
 			worst_hardstall = stall;
-		spin_unlock_irqrestore(&hardstall_lock, flags);
-	}
+			hardstall_traces[hard_write_ind].nr_entries = 0;
+			save_stack_trace(&hardstall_traces[hard_write_ind]);
 
-	if (update_stall) {
-		printk(KERN_WARNING "LOCKUP may be in progress!"
-			"Worst hard stall seen on CPU#%d: %lu interrupts missed\n",
-			this_cpu, stall);
-		dump_stack();
+			hard_read_ind = hard_write_ind;
+			spin_unlock(&hardstall_locks[hard_write_ind]);
+		}
+		spin_unlock_irqrestore(&hardstall_write_lock, flags);
 	}
 }
 
@@ -252,22 +277,29 @@ static int is_hardlockup(int this_cpu)
 
 static void update_softstall(unsigned long stall, int this_cpu)
 {
-	int update_stall = 0;
 	if (stall > get_softstall_thresh() &&
 			stall > worst_softstall + softstall_diff_thresh) {
 		unsigned long flags;
-		spin_lock_irqsave(&softstall_lock, flags);
-		update_stall = stall > worst_softstall + softstall_diff_thresh;
-		if (update_stall)
+		spin_lock_irqsave(&softstall_write_lock, flags);
+		if (stall > worst_softstall + softstall_diff_thresh) {
+			int locked = spin_trylock(
+					&softstall_locks[soft_write_ind]);
+			if (!locked) {
+				/* we may have interrupted the holder, so
+				 * switch buffers now */
+				soft_write_ind = !soft_write_ind;
+
+				/* no one has this lock right now */
+				spin_lock(&softstall_locks[soft_write_ind]);
+			}
 			worst_softstall = stall;
-		spin_unlock_irqrestore(&softstall_lock, flags);
-	}
+			softstall_traces[soft_write_ind].nr_entries = 0;
+			save_stack_trace(&softstall_traces[soft_write_ind]);
 
-	if (update_stall) {
-		printk(KERN_WARNING "LOCKUP may be in progress!"
-				"Worst soft stall seen on CPU#%d: %lums\n",
-				this_cpu, stall);
-		dump_stack();
+			soft_read_ind = soft_write_ind;
+			spin_unlock(&softstall_locks[soft_write_ind]);
+		}
+		spin_unlock_irqrestore(&softstall_write_lock, flags);
 	}
 }
 
@@ -667,6 +699,76 @@ static struct notifier_block __cpuinitdata cpu_nfb = {
 	.notifier_call = cpu_callback
 };
 
+static int show_stall_trace(struct seq_file *f, void *v)
+{
+	int i, end, buf_ind;
+	int type = (int) f->private;
+	spinlock_t *lock;
+	struct stack_trace *trace;
+	unsigned long flags;
+
+	if (type == STALL_SOFT) {
+		spin_lock_irqsave(&softstall_read_lock, flags);
+
+		buf_ind = soft_read_ind;
+		lock = &softstall_locks[buf_ind];
+		trace = &softstall_traces[buf_ind];
+	}
+	else {
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+		spin_lock_irqsave(&hardstall_read_lock, flags);
+
+		buf_ind = hard_read_ind;
+		lock = &hardstall_locks[buf_ind];
+		trace = &hardstall_traces[buf_ind];
+#endif
+	}
+
+	end = trace->nr_entries;
+	/* don't bother printing the trailing value */
+	if (end < trace->max_entries)
+		end--;
+
+	spin_lock(lock);
+	for (i = 0; i < end; i++) {
+		seq_printf(f, "[<%pK>] %pS\n", (void *)trace->entries[i],
+				(void *)trace->entries[i]);
+	}
+
+	if (type == STALL_SOFT)
+		spin_unlock_irqrestore(&softstall_read_lock, flags);
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+	else
+		spin_unlock_irqrestore(&hardstall_read_lock, flags);
+#endif
+	spin_unlock(lock);
+
+	return 0;
+}
+
+static int softstall_trace_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_stall_trace, (void *)STALL_SOFT);
+}
+
+static int hardstall_trace_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, show_stall_trace, (void *)STALL_HARD);
+}
+
+static struct file_operations softstall_trace_ops = {
+	.open = softstall_trace_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release
+};
+static struct file_operations hardstall_trace_ops = {
+	.open = hardstall_trace_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release
+};
+
 void __init lockup_detector_init(void)
 {
 	void *cpu = (void *)(long)smp_processor_id();
@@ -678,5 +780,63 @@ void __init lockup_detector_init(void)
 	cpu_callback(&cpu_nfb, CPU_ONLINE, cpu);
 	register_cpu_notifier(&cpu_nfb);
 
+	softstall_traces[0].nr_entries = 0;
+	softstall_traces[0].max_entries = STALL_MAX_TRACE_DEPTH;
+	softstall_traces[0].skip = 0;
+	softstall_traces[0].entries = softstall_trace_entries;
+	softstall_traces[1].nr_entries = 0;
+	softstall_traces[1].max_entries = STALL_MAX_TRACE_DEPTH;
+	softstall_traces[1].skip = 0;
+	softstall_traces[1].entries = softstall_trace_entries
+		+ STALL_MAX_TRACE_DEPTH;
+
+	spin_lock_init(&softstall_locks[0]);
+	spin_lock_init(&softstall_locks[1]);
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+	hardstall_traces[0].nr_entries = 0;
+	hardstall_traces[0].max_entries = STALL_MAX_TRACE_DEPTH;
+	hardstall_traces[0].skip = 0;
+	hardstall_traces[0].entries = hardstall_trace_entries;
+	hardstall_traces[1].nr_entries = 0;
+	hardstall_traces[1].max_entries = STALL_MAX_TRACE_DEPTH;
+	hardstall_traces[1].skip = 0;
+	hardstall_traces[1].entries = hardstall_trace_entries
+		+ STALL_MAX_TRACE_DEPTH;
+
+	spin_lock_init(&hardstall_locks[0]);
+	spin_lock_init(&hardstall_locks[1]);
+#endif
+
 	return;
 }
+
+/* Must be called after debugfs_init() */
+static int __init lockup_detector_debugfs_init(void)
+{
+	struct dentry *parent = debugfs_create_dir("lockup_watchdog", NULL);
+	if (parent) {
+		debugfs_create_file("softstall_trace", 0444, parent, NULL,
+				&softstall_trace_ops);
+		if (sizeof(worst_softstall) == sizeof(u32))
+			debugfs_create_u32("softstall_worst", 0644, parent,
+					(u32 *) &worst_softstall);
+		else if (sizeof(worst_softstall) == sizeof(u64))
+			debugfs_create_u64("softstall_worst", 0644, parent,
+					(u64 *) &worst_softstall);
+
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+		debugfs_create_file("hardstall_trace", 0444, parent, NULL,
+				&hardstall_trace_ops);
+		if (sizeof(worst_hardstall) == sizeof(u32))
+			debugfs_create_u32("hardstall_worst", 0644, parent,
+					(u32 *) &worst_hardstall);
+		else if (sizeof(worst_hardstall) == sizeof(u64))
+			debugfs_create_u64("hardstall_worst", 0644, parent,
+					(u64 *) &worst_hardstall);
+#endif
+	}
+
+	return 0;
+}
+postcore_initcall(lockup_detector_debugfs_init);
-- 
1.7.3.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH v5 2/2] Output stall data in debugfs
  2011-08-10  0:22 ` [PATCH v5 2/2] Output stall data in debugfs Alex Neronskiy
@ 2011-08-10 13:06   ` Don Zickus
  0 siblings, 0 replies; 3+ messages in thread
From: Don Zickus @ 2011-08-10 13:06 UTC (permalink / raw)
  To: Alex Neronskiy
  Cc: linux-kernel, peterz, Ingo Molnar, Mandeep Singh Baines,
	Alex Neronskiy

On Tue, Aug 09, 2011 at 05:22:43PM -0700, Alex Neronskiy wrote:
> From: Alex Neronskiy <zakmagnus@chromium.com>
> 
> Instead of using the log, use debugfs for output of both stall
> lengths and stack traces. Printing to the log can result in
> watchdog touches, distorting the very events being measured.
> Additionally, the information will not distract from lockups
> when users view the log.
> 
> A two-buffer system is used to ensure that the trace information
> can always be recorded without contention.

This looks like it can work.  I'll try to play with it today.  Can I ask
that you add some comments to document the way the buffers work, so others
know what is going on to.

Cheers,
Don

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2011-08-10 13:07 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-08-10  0:22 [PATCH v5 1/2] Track hard and soft "short lockups" or "stalls." Alex Neronskiy
2011-08-10  0:22 ` [PATCH v5 2/2] Output stall data in debugfs Alex Neronskiy
2011-08-10 13:06   ` Don Zickus

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox