From: Steven Rostedt <rostedt@goodmis.org>
To: linux-kernel@vger.kernel.org
Cc: Linus Torvalds <torvalds@linux-foundation.org>,
Ingo Molnar <mingo@kernel.org>,
Andrew Morton <akpm@linux-foundation.org>,
Jiri Kosina <jkosina@suse.cz>, Petr Mladek <pmladek@suse.cz>,
"Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Subject: [RFC][PATCH 3/3] x86/nmi: Perform a safe NMI stack trace on all CPUs
Date: Tue, 18 Nov 2014 23:39:20 -0500 [thread overview]
Message-ID: <20141119045556.084787418@goodmis.org> (raw)
In-Reply-To: 20141119043917.578498291@goodmis.org
[-- Attachment #1: 0003-x86-nmi-Perform-a-safe-NMI-stack-trace-on-all-CPUs.patch --]
[-- Type: text/plain, Size: 5451 bytes --]
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
When trigger_all_cpu_backtrace() is called on x86, it will trigger an
NMI on each CPU and call show_regs(). But this can lead to a hard lock
up if the NMI comes in on another printk().
In order to avoid this, when the NMI triggers, it switches the printk
routine for that CPU to call a NMI safe printk function that records the
printk in a per_cpu seq_buf descriptor. After all NMIs have finished
recording its data, the seq_bufs are printed in a safe context.
Link: http://lkml.kernel.org/p/20140619213952.360076309@goodmis.org
Link: http://lkml.kernel.org/r/20141115050605.055232587@goodmis.org
Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
arch/x86/kernel/apic/hw_nmi.c | 91 ++++++++++++++++++++++++++++++++++++++++---
1 file changed, 86 insertions(+), 5 deletions(-)
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 6a1e71bde323..c95c3e9ce196 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -18,6 +18,7 @@
#include <linux/nmi.h>
#include <linux/module.h>
#include <linux/delay.h>
+#include <linux/seq_buf.h>
#ifdef CONFIG_HARDLOCKUP_DETECTOR
u64 hw_nmi_get_sample_period(int watchdog_thresh)
@@ -29,14 +30,35 @@ u64 hw_nmi_get_sample_period(int watchdog_thresh)
#ifdef arch_trigger_all_cpu_backtrace
/* For reliability, we're prepared to waste bits here. */
static DECLARE_BITMAP(backtrace_mask, NR_CPUS) __read_mostly;
+static cpumask_var_t printtrace_mask;
+
+#define NMI_BUF_SIZE 4096
+
+struct nmi_seq_buf {
+ unsigned char buffer[NMI_BUF_SIZE];
+ struct seq_buf seq;
+};
+
+/* Safe printing in NMI context */
+static DEFINE_PER_CPU(struct nmi_seq_buf, nmi_print_seq);
/* "in progress" flag of arch_trigger_all_cpu_backtrace */
static unsigned long backtrace_flag;
+static void print_seq_line(struct nmi_seq_buf *s, int start, int end)
+{
+ const char *buf = s->buffer + start;
+
+ printk("%.*s", (end - start) + 1, buf);
+}
+
void arch_trigger_all_cpu_backtrace(bool include_self)
{
+ struct nmi_seq_buf *s;
+ int len;
+ int cpu;
int i;
- int cpu = get_cpu();
+ int this_cpu = get_cpu();
if (test_and_set_bit(0, &backtrace_flag)) {
/*
@@ -49,7 +71,17 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
cpumask_copy(to_cpumask(backtrace_mask), cpu_online_mask);
if (!include_self)
- cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
+ cpumask_clear_cpu(this_cpu, to_cpumask(backtrace_mask));
+
+ cpumask_copy(printtrace_mask, to_cpumask(backtrace_mask));
+ /*
+ * Set up per_cpu seq_buf buffers that the NMIs running on the other
+ * CPUs will write to.
+ */
+ for_each_cpu(cpu, to_cpumask(backtrace_mask)) {
+ s = &per_cpu(nmi_print_seq, cpu);
+ seq_buf_init(&s->seq, s->buffer, NMI_BUF_SIZE);
+ }
if (!cpumask_empty(to_cpumask(backtrace_mask))) {
pr_info("sending NMI to %s CPUs:\n",
@@ -65,11 +97,58 @@ void arch_trigger_all_cpu_backtrace(bool include_self)
touch_softlockup_watchdog();
}
+ /*
+ * Now that all the NMIs have triggered, we can dump out their
+ * back traces safely to the console.
+ */
+ for_each_cpu(cpu, printtrace_mask) {
+ int last_i = 0;
+
+ s = &per_cpu(nmi_print_seq, cpu);
+ len = seq_buf_used(&s->seq);
+ if (!len)
+ continue;
+
+ /* Print line by line. */
+ for (i = 0; i < len; i++) {
+ if (s->buffer[i] == '\n') {
+ print_seq_line(s, last_i, i);
+ last_i = i + 1;
+ }
+ }
+ /* Check if there was a partial line. */
+ if (last_i < len) {
+ print_seq_line(s, last_i, len - 1);
+ pr_cont("\n");
+ }
+ }
+
clear_bit(0, &backtrace_flag);
smp_mb__after_atomic();
put_cpu();
}
+/*
+ * It is not safe to call printk() directly from NMI handlers.
+ * It may be fine if the NMI detected a lock up and we have no choice
+ * but to do so, but doing a NMI on all other CPUs to get a back trace
+ * can be done with a sysrq-l. We don't want that to lock up, which
+ * can happen if the NMI interrupts a printk in progress.
+ *
+ * Instead, we redirect the vprintk() to this nmi_vprintk() that writes
+ * the content into a per cpu seq_buf buffer. Then when the NMIs are
+ * all done, we can safely dump the contents of the seq_buf to a printk()
+ * from a non NMI context.
+ */
+static int nmi_vprintk(const char *fmt, va_list args)
+{
+ struct nmi_seq_buf *s = this_cpu_ptr(&nmi_print_seq);
+ unsigned int len = seq_buf_used(&s->seq);
+
+ seq_buf_vprintf(&s->seq, fmt, args);
+ return seq_buf_used(&s->seq) - len;
+}
+
static int
arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
{
@@ -78,12 +157,14 @@ arch_trigger_all_cpu_backtrace_handler(unsigned int cmd, struct pt_regs *regs)
cpu = smp_processor_id();
if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
- static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+ printk_func_t printk_func_save = this_cpu_read(printk_func);
- arch_spin_lock(&lock);
+ /* Replace printk to write into the NMI seq */
+ this_cpu_write(printk_func, nmi_vprintk);
printk(KERN_WARNING "NMI backtrace for cpu %d\n", cpu);
show_regs(regs);
- arch_spin_unlock(&lock);
+ this_cpu_write(printk_func, printk_func_save);
+
cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask));
return NMI_HANDLED;
}
--
2.1.1
next prev parent reply other threads:[~2014-11-19 4:56 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-11-19 4:39 [RFC][PATCH 0/3] printk/seq-buf/NMI: Revisit of safe NMI printing with seq_buf code Steven Rostedt
2014-11-19 4:39 ` [RFC][PATCH 1/3] seq_buf: Move the seq_buf code to lib/ Steven Rostedt
2014-11-19 5:04 ` Steven Rostedt
2014-11-19 4:39 ` [RFC][PATCH 2/3] printk: Add per_cpu printk func to allow printk to be diverted Steven Rostedt
2014-11-19 4:39 ` Steven Rostedt [this message]
2014-11-19 10:41 ` [RFC][PATCH 3/3] x86/nmi: Perform a safe NMI stack trace on all CPUs Borislav Petkov
2014-11-19 10:44 ` Jiri Kosina
2014-11-19 10:53 ` Borislav Petkov
2014-11-19 13:02 ` Petr Mladek
-- strict thread matches above, loose matches on Subject: below --
2014-06-19 21:33 [RFC][PATCH 0/3] x86/nmi: Print all cpu stacks from NMI safely Steven Rostedt
2014-06-19 21:33 ` [RFC][PATCH 3/3] x86/nmi: Perform a safe NMI stack trace on all CPUs Steven Rostedt
2014-06-20 13:58 ` Don Zickus
2014-06-20 14:21 ` Steven Rostedt
2014-06-20 14:55 ` Petr Mládek
2014-06-20 15:17 ` Steven Rostedt
2014-06-23 16:12 ` Paul E. McKenney
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20141119045556.084787418@goodmis.org \
--to=rostedt@goodmis.org \
--cc=akpm@linux-foundation.org \
--cc=jkosina@suse.cz \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=paulmck@linux.vnet.ibm.com \
--cc=pmladek@suse.cz \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.