* [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR
@ 2010-10-09 6:49 Huang Ying
2010-10-09 6:49 ` [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
` (4 more replies)
0 siblings, 5 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-09 6:49 UTC (permalink / raw)
To: Don Zickus, Ingo Molnar, H. Peter Anvin
Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying
Replace the NMI related magic numbers with symbol constants.
memory parity error is only valid for IBM PC-AT, newer machine use 7
bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
reported via MCE. So corresponding function name and kernel log string
is changed.
But on some machines, PCI SERR line is still used to report memory
errors. This is used by EDAC, so corresponding EDAC call is reserved.
v3:
- Merge adding symbol constants and renaming patches
v2:
- EDAC call in pci_serr_error is reserved.
Signed-off-by: Huang Ying <ying.huang@intel.com>
---
arch/x86/include/asm/mach_traps.h | 12 ++++++++
arch/x86/kernel/traps.c | 51 +++++++++++++++++++-------------------
2 files changed, 37 insertions(+), 26 deletions(-)
--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -7,9 +7,19 @@
#include <asm/mc146818rtc.h>
+#define NMI_REASON_PORT 0x61
+
+#define NMI_REASON_SERR 0x80
+#define NMI_REASON_IOCHK 0x40
+#define NMI_REASON_MASK (NMI_REASON_SERR | NMI_REASON_IOCHK)
+
+#define NMI_REASON_CLEAR_SERR 0x04
+#define NMI_REASON_CLEAR_IOCHK 0x08
+#define NMI_REASON_CLEAR_MASK 0x0f
+
static inline unsigned char get_nmi_reason(void)
{
- return inb(0x61);
+ return inb(NMI_REASON_PORT);
}
static inline void reassert_nmi(void)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -301,15 +301,15 @@ gp_in_kernel:
}
static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
{
- printk(KERN_EMERG
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
-
- printk(KERN_EMERG
- "You have some hardware problem, likely on the PCI bus.\n");
+ pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
+ /*
+ * On some machines, PCI SERR line is used to report memory
+ * errors. EDAC makes use of it.
+ */
#if defined(CONFIG_EDAC)
if (edac_handler_set()) {
edac_atomic_assert_error();
@@ -320,11 +320,11 @@ mem_parity_error(unsigned char reason, s
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
- /* Clear and disable the memory parity error line. */
- reason = (reason & 0xf) | 4;
- outb(reason, 0x61);
+ /* Clear and disable the PCI SERR error line. */
+ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
+ outb(reason, NMI_REASON_PORT);
}
static notrace __kprobes void
@@ -332,22 +332,24 @@ io_check_error(unsigned char reason, str
{
unsigned long i;
- printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+ pr_emerg(
+ "NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
show_registers(regs);
if (panic_on_io_nmi)
panic("NMI IOCK error: Not continuing");
/* Re-enable the IOCK line, wait for a few seconds */
- reason = (reason & 0xf) | 8;
- outb(reason, 0x61);
+ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+ outb(reason, NMI_REASON_PORT);
i = 2000;
while (--i)
udelay(1000);
- reason &= ~8;
- outb(reason, 0x61);
+ reason &= ~NMI_REASON_CLEAR_IOCHK;
+ outb(reason, NMI_REASON_PORT);
}
static notrace __kprobes void
@@ -366,15 +368,14 @@ unknown_nmi_error(unsigned char reason,
return;
}
#endif
- printk(KERN_EMERG
- "Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
- reason, smp_processor_id());
+ pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+ reason, smp_processor_id());
- printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+ pr_emerg("Do you have a strange power saving mode enabled?\n");
if (panic_on_unrecovered_nmi)
panic("NMI: Not continuing");
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+ pr_emerg("Dazed and confused, but trying to continue\n");
}
static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
@@ -388,7 +389,7 @@ static notrace __kprobes void default_do
if (!cpu)
reason = get_nmi_reason();
- if (!(reason & 0xc0)) {
+ if (!(reason & NMI_REASON_MASK)) {
if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
== NOTIFY_STOP)
return;
@@ -418,9 +419,9 @@ static notrace __kprobes void default_do
return;
/* AK: following checks seem to be broken on modern chipsets. FIXME */
- if (reason & 0x80)
- mem_parity_error(reason, regs);
- if (reason & 0x40)
+ if (reason & NMI_REASON_SERR)
+ pci_serr_error(reason, regs);
+ if (reason & NMI_REASON_IOCHK)
io_check_error(reason, regs);
#ifdef CONFIG_X86_32
/*
^ permalink raw reply [flat|nested] 44+ messages in thread* [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay 2010-10-09 6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying @ 2010-10-09 6:49 ` Huang Ying 2010-10-09 6:49 ` [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler Huang Ying ` (3 subsequent siblings) 4 siblings, 0 replies; 44+ messages in thread From: Huang Ying @ 2010-10-09 6:49 UTC (permalink / raw) To: Don Zickus, Ingo Molnar, H. Peter Anvin Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying Prevent the long delay in io_check_error make NMI watchdog timeout. Signed-off-by: Huang Ying <ying.huang@intel.com> --- arch/x86/kernel/traps.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -344,9 +344,11 @@ io_check_error(unsigned char reason, str reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; outb(reason, NMI_REASON_PORT); - i = 2000; - while (--i) - udelay(1000); + i = 20000; + while (--i) { + touch_nmi_watchdog(); + udelay(100); + } reason &= ~NMI_REASON_CLEAR_IOCHK; outb(reason, NMI_REASON_PORT); ^ permalink raw reply [flat|nested] 44+ messages in thread
* [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-09 6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying 2010-10-09 6:49 ` [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying @ 2010-10-09 6:49 ` Huang Ying 2010-10-11 16:13 ` Peter Zijlstra 2010-10-09 6:49 ` [PATCH -v3 4/6] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying ` (2 subsequent siblings) 4 siblings, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-09 6:49 UTC (permalink / raw) To: Don Zickus, Ingo Molnar, H. Peter Anvin Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying The original NMI handler is quite outdated in many aspects. This patch try to fix it. The order to process the NMI sources are changed as follow: notify_die(DIE_NMI_IPI); notify_die(DIE_NMI); /* process io port 0x61 */ nmi_watchdog_touch(); unknown_nmi(); DIE_NMI_IPI is used to process CPU specific NMI sources, such as perf event, oprofile, crash IPI, etc. While DIE_NMI is used to process non-CPU-specific NMI sources, such as APEI (ACPI Platform Error Interface) GHES (Generic Hardware Error Source), etc. Non-CPU-specific NMI sources can be processed on any CPU, DIE_NMI_IPI must be processed before DIE_NMI. For example, perf event trigger a NMI on CPU 1, at the same time, APEI GHES trigger another NMI on CPU 0. If DIE_NMI is processed before DIE_NMI_IPI, it is possible that APEI GHES is processed on CPU 1, while unknown NMI is gotten on CPU 0. In this new order of processing, performance sensitive NMI sources such as oprofile or perf event will have better performance because the time consuming IO port reading is done after them. Only one NMI is eaten for each NMI handler call, even for PCI SERR and IOCHK NMIs. Because one NMI should be raised for each of them, eating too many NMI will cause unnecessary unknown NMI. The die value used in NMI sources are fixed accordingly. The NMI handler in the patch is designed by Andi Kleen. v3: - Make DIE_NMI and DIE_NMI_UNKNOWN work in more traditional way. v2: - Split process NMI reason (0x61) on non-BSP into another patch Signed-off-by: Huang Ying <ying.huang@intel.com> --- arch/x86/kernel/cpu/perf_event.c | 1 arch/x86/kernel/traps.c | 78 +++++++++++++++++++------------------- arch/x86/oprofile/nmi_int.c | 1 arch/x86/oprofile/nmi_timer_int.c | 2 drivers/char/ipmi/ipmi_watchdog.c | 2 drivers/watchdog/hpwdt.c | 2 6 files changed, 43 insertions(+), 43 deletions(-) --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1245,7 +1245,6 @@ perf_event_nmi_handler(struct notifier_b return NOTIFY_DONE; switch (cmd) { - case DIE_NMI: case DIE_NMI_IPI: break; case DIE_NMIUNKNOWN: --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -385,53 +385,55 @@ static notrace __kprobes void default_do unsigned char reason = 0; int cpu; - cpu = smp_processor_id(); + /* + * CPU-specific NMI must be processed before non-CPU-specific + * NMI, otherwise we may lose it, because the CPU-specific + * NMI can not be detected/processed on other CPUs. + */ + + /* + * CPU-specific NMI: send to specific CPU or NMI sources must + * be processed on specific CPU + */ + if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT) + == NOTIFY_STOP) + return; + /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ + cpu = smp_processor_id(); /* Only the BSP gets external NMIs from the system. */ - if (!cpu) + if (!cpu) { reason = get_nmi_reason(); - - if (!(reason & NMI_REASON_MASK)) { - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; - -#ifdef CONFIG_X86_LOCAL_APIC - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) + if (reason & NMI_REASON_MASK) { + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) + return; + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); +#ifdef CONFIG_X86_32 + /* + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: + */ + reassert_nmi(); +#endif return; + } + } -#ifndef CONFIG_LOCKUP_DETECTOR - /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. - */ - if (nmi_watchdog_tick(regs, reason)) - return; - if (!do_nmi_callback(regs, cpu)) -#endif /* !CONFIG_LOCKUP_DETECTOR */ - unknown_nmi_error(reason, regs); -#else - unknown_nmi_error(reason, regs); -#endif + if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) + return; +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR) + if (nmi_watchdog_tick(regs, reason)) return; - } - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) + if (do_nmi_callback(regs, cpu)) return; - - /* AK: following checks seem to be broken on modern chipsets. FIXME */ - if (reason & NMI_REASON_SERR) - pci_serr_error(reason, regs); - if (reason & NMI_REASON_IOCHK) - io_check_error(reason, regs); -#ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active meanwhile - * as it's edge-triggered: - */ - reassert_nmi(); #endif + + unknown_nmi_error(reason, regs); } dotraplinkage notrace __kprobes void --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -64,7 +64,6 @@ static int profile_exceptions_notify(str int ret = NOTIFY_DONE; switch (val) { - case DIE_NMI: case DIE_NMI_IPI: if (ctr_running) model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -25,7 +25,7 @@ static int profile_timer_exceptions_noti int ret = NOTIFY_DONE; switch (val) { - case DIE_NMI: + case DIE_NMI_IPI: oprofile_add_sample(args->regs, 0); ret = NOTIFY_STOP; break; --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -1080,7 +1080,7 @@ ipmi_nmi(struct notifier_block *self, un { struct die_args *args = data; - if (val != DIE_NMI) + if (val != DIE_NMIUNKNOWN) return NOTIFY_OK; /* Hack, if it's a memory or I/O error, ignore it. */ --- a/drivers/watchdog/hpwdt.c +++ b/drivers/watchdog/hpwdt.c @@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notif unsigned long rom_pl; static int die_nmi_called; - if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI) + if (ulReason != DIE_NMIUNKNOWN) goto out; if (!hpwdt_nmi_decoding) ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-09 6:49 ` [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler Huang Ying @ 2010-10-11 16:13 ` Peter Zijlstra 2010-10-11 20:35 ` Don Zickus 2010-10-12 0:50 ` Huang Ying 0 siblings, 2 replies; 44+ messages in thread From: Peter Zijlstra @ 2010-10-11 16:13 UTC (permalink / raw) To: Huang Ying Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen, Robert Richter On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote: > notify_die(DIE_NMI_IPI); > notify_die(DIE_NMI); > /* process io port 0x61 */ > nmi_watchdog_touch(); > unknown_nmi(); Why keep NMI_IPI? What the heck is it for? I'd like to see: DIE_NMI DIE_NMI_UNKNOWN In DIE_NMI we walk the chain and deal with NMIs, in DIE_NMI_UNKNOWN we try to consume extra NMIs where possible (like the much discussed extra PMU interrupts). ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-11 16:13 ` Peter Zijlstra @ 2010-10-11 20:35 ` Don Zickus 2010-10-12 0:50 ` Huang Ying 1 sibling, 0 replies; 44+ messages in thread From: Don Zickus @ 2010-10-11 20:35 UTC (permalink / raw) To: Peter Zijlstra Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen, Robert Richter On Mon, Oct 11, 2010 at 06:13:55PM +0200, Peter Zijlstra wrote: > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote: > > notify_die(DIE_NMI_IPI); > > notify_die(DIE_NMI); > > /* process io port 0x61 */ > > nmi_watchdog_touch(); > > unknown_nmi(); > > Why keep NMI_IPI? What the heck is it for? > > I'd like to see: > > DIE_NMI > DIE_NMI_UNKNOWN > > In DIE_NMI we walk the chain and deal with NMIs, in DIE_NMI_UNKNOWN we > try to consume extra NMIs where possible (like the much discussed extra > PMU interrupts). Yeah, there was another much discussed thread about creating an NMI notifier chain (instead of using the die_chain). And then re-arrange handlers according to a priority scheme. I wouldn't mind have Ingo commit most of this patchset to a temporary work-in-progress branch and have us build more changes on top. A lot of these changes are a good stepping stone towards simplifying things. Cheers, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-11 16:13 ` Peter Zijlstra 2010-10-11 20:35 ` Don Zickus @ 2010-10-12 0:50 ` Huang Ying 2010-10-12 6:04 ` Peter Zijlstra 1 sibling, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-12 0:50 UTC (permalink / raw) To: Peter Zijlstra Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote: > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote: > > notify_die(DIE_NMI_IPI); > > notify_die(DIE_NMI); > > /* process io port 0x61 */ > > nmi_watchdog_touch(); > > unknown_nmi(); > > Why keep NMI_IPI? What the heck is it for? DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as NMI notification from source bridge. The order between these two is important. So we use two die value to enforce the order. > I'd like to see: > > DIE_NMI > DIE_NMI_UNKNOWN > > In DIE_NMI we walk the chain and deal with NMIs, in DIE_NMI_UNKNOWN we > try to consume extra NMIs where possible (like the much discussed extra > PMU interrupts). Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 0:50 ` Huang Ying @ 2010-10-12 6:04 ` Peter Zijlstra 2010-10-12 6:14 ` Huang Ying 0 siblings, 1 reply; 44+ messages in thread From: Peter Zijlstra @ 2010-10-12 6:04 UTC (permalink / raw) To: Huang Ying Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 08:50 +0800, Huang Ying wrote: > On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote: > > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote: > > > notify_die(DIE_NMI_IPI); > > > notify_die(DIE_NMI); > > > /* process io port 0x61 */ > > > nmi_watchdog_touch(); > > > unknown_nmi(); > > > > Why keep NMI_IPI? What the heck is it for? > > DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf > NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as > NMI notification from source bridge. > > The order between these two is important. So we use two die value to > enforce the order. But you can't know about that, there is no reason field to distinguish between these cases, so you might as well fold it into a single notifier chain and be done with it. There is no good reason to call two chains when one is enough. Also, the IPI name is terrible, its not IPI related at all. Please kill the thing. ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 6:04 ` Peter Zijlstra @ 2010-10-12 6:14 ` Huang Ying 2010-10-12 6:31 ` Peter Zijlstra 0 siblings, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-12 6:14 UTC (permalink / raw) To: Peter Zijlstra Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 14:04 +0800, Peter Zijlstra wrote: > On Tue, 2010-10-12 at 08:50 +0800, Huang Ying wrote: > > On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote: > > > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote: > > > > notify_die(DIE_NMI_IPI); > > > > notify_die(DIE_NMI); > > > > /* process io port 0x61 */ > > > > nmi_watchdog_touch(); > > > > unknown_nmi(); > > > > > > Why keep NMI_IPI? What the heck is it for? > > > > DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf > > NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as > > NMI notification from source bridge. > > > > The order between these two is important. So we use two die value to > > enforce the order. > > But you can't know about that, there is no reason field to distinguish > between these cases, so you might as well fold it into a single notifier > chain and be done with it. NMI users know that. Such as perf uses CPU-specific NMI, while APEI GHES uses non-CPU-specific NMI. Different users expect different die values, such as perf expects DIE_NMI_IPI, while APEI GHES expects DIE_NMI, so that perf can be checked before APEI GHES. > There is no good reason to call two chains when one is enough. > > Also, the IPI name is terrible, its not IPI related at all. Please kill > the thing. I agree with you that IPI is not a good name. Can you suggest a better name? Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 6:14 ` Huang Ying @ 2010-10-12 6:31 ` Peter Zijlstra 2010-10-12 6:37 ` Huang Ying 0 siblings, 1 reply; 44+ messages in thread From: Peter Zijlstra @ 2010-10-12 6:31 UTC (permalink / raw) To: Huang Ying Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 14:14 +0800, Huang Ying wrote: > On Tue, 2010-10-12 at 14:04 +0800, Peter Zijlstra wrote: > > On Tue, 2010-10-12 at 08:50 +0800, Huang Ying wrote: > > > On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote: > > > > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote: > > > > > notify_die(DIE_NMI_IPI); > > > > > notify_die(DIE_NMI); > > > > > /* process io port 0x61 */ > > > > > nmi_watchdog_touch(); > > > > > unknown_nmi(); > > > > > > > > Why keep NMI_IPI? What the heck is it for? > > > > > > DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf > > > NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as > > > NMI notification from source bridge. > > > > > > The order between these two is important. So we use two die value to > > > enforce the order. > > > > But you can't know about that, there is no reason field to distinguish > > between these cases, so you might as well fold it into a single notifier > > chain and be done with it. > > NMI users know that. Such as perf uses CPU-specific NMI, while APEI GHES > uses non-CPU-specific NMI. Different users expect different die values, > such as perf expects DIE_NMI_IPI, while APEI GHES expects DIE_NMI, so > that perf can be checked before APEI GHES. GAAHHH, so why can't they live on a single notifier list? Its got priorities to deal with that? ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 6:31 ` Peter Zijlstra @ 2010-10-12 6:37 ` Huang Ying 2010-10-12 6:40 ` Peter Zijlstra 0 siblings, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-12 6:37 UTC (permalink / raw) To: Peter Zijlstra Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 14:31 +0800, Peter Zijlstra wrote: > On Tue, 2010-10-12 at 14:14 +0800, Huang Ying wrote: > > On Tue, 2010-10-12 at 14:04 +0800, Peter Zijlstra wrote: > > > On Tue, 2010-10-12 at 08:50 +0800, Huang Ying wrote: > > > > On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote: > > > > > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote: > > > > > > notify_die(DIE_NMI_IPI); > > > > > > notify_die(DIE_NMI); > > > > > > /* process io port 0x61 */ > > > > > > nmi_watchdog_touch(); > > > > > > unknown_nmi(); > > > > > > > > > > Why keep NMI_IPI? What the heck is it for? > > > > > > > > DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf > > > > NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as > > > > NMI notification from source bridge. > > > > > > > > The order between these two is important. So we use two die value to > > > > enforce the order. > > > > > > But you can't know about that, there is no reason field to distinguish > > > between these cases, so you might as well fold it into a single notifier > > > chain and be done with it. > > > > NMI users know that. Such as perf uses CPU-specific NMI, while APEI GHES > > uses non-CPU-specific NMI. Different users expect different die values, > > such as perf expects DIE_NMI_IPI, while APEI GHES expects DIE_NMI, so > > that perf can be checked before APEI GHES. > > GAAHHH, so why can't they live on a single notifier list? Its got > priorities to deal with that? It is possible to use priority to deal with the order. I just think two die_value make order more explicit. Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 6:37 ` Huang Ying @ 2010-10-12 6:40 ` Peter Zijlstra 2010-10-12 6:45 ` Huang Ying 0 siblings, 1 reply; 44+ messages in thread From: Peter Zijlstra @ 2010-10-12 6:40 UTC (permalink / raw) To: Huang Ying Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 14:37 +0800, Huang Ying wrote: > I just think two die_value make order more explicit. I think it sucks, you get to iterate the list twice for no gain. ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 6:40 ` Peter Zijlstra @ 2010-10-12 6:45 ` Huang Ying 2010-10-12 6:49 ` Peter Zijlstra 0 siblings, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-12 6:45 UTC (permalink / raw) To: Peter Zijlstra Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 14:40 +0800, Peter Zijlstra wrote: > On Tue, 2010-10-12 at 14:37 +0800, Huang Ying wrote: > > I just think two die_value make order more explicit. > > I think it sucks, you get to iterate the list twice for no gain. The gain is the code readability. And maybe we can split the list into 2 in the future. Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 6:45 ` Huang Ying @ 2010-10-12 6:49 ` Peter Zijlstra 2010-10-12 6:54 ` Huang Ying 2010-10-12 13:51 ` Andi Kleen 0 siblings, 2 replies; 44+ messages in thread From: Peter Zijlstra @ 2010-10-12 6:49 UTC (permalink / raw) To: Huang Ying Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 14:45 +0800, Huang Ying wrote: > On Tue, 2010-10-12 at 14:40 +0800, Peter Zijlstra wrote: > > On Tue, 2010-10-12 at 14:37 +0800, Huang Ying wrote: > > > I just think two die_value make order more explicit. > > > > I think it sucks, you get to iterate the list twice for no gain. > > The gain is the code readability. And maybe we can split the list into 2 > in the future. No, its not making sense, there's only one event source - the NMI, it doesn't make any sense what so ever to then artificially split it in two. ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 6:49 ` Peter Zijlstra @ 2010-10-12 6:54 ` Huang Ying 2010-10-12 13:51 ` Andi Kleen 1 sibling, 0 replies; 44+ messages in thread From: Huang Ying @ 2010-10-12 6:54 UTC (permalink / raw) To: Peter Zijlstra Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 14:49 +0800, Peter Zijlstra wrote: > On Tue, 2010-10-12 at 14:45 +0800, Huang Ying wrote: > > On Tue, 2010-10-12 at 14:40 +0800, Peter Zijlstra wrote: > > > On Tue, 2010-10-12 at 14:37 +0800, Huang Ying wrote: > > > > I just think two die_value make order more explicit. > > > > > > I think it sucks, you get to iterate the list twice for no gain. > > > > The gain is the code readability. And maybe we can split the list into 2 > > in the future. > > No, its not making sense, there's only one event source - the NMI, it > doesn't make any sense what so ever to then artificially split it in > two. They are two steps to process NMI. Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 6:49 ` Peter Zijlstra 2010-10-12 6:54 ` Huang Ying @ 2010-10-12 13:51 ` Andi Kleen 2010-10-12 14:15 ` Peter Zijlstra 1 sibling, 1 reply; 44+ messages in thread From: Andi Kleen @ 2010-10-12 13:51 UTC (permalink / raw) To: Peter Zijlstra Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter > No, its not making sense, there's only one event source - the NMI, it > doesn't make any sense what so ever to then artificially split it in > two. I can see that "unknown NMI" is a different kind of event than "NMI event triggered" e.g. the debugger would only hook into "unknown NMI" -Andi -- ak@linux.intel.com -- Speaking for myself only. ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 13:51 ` Andi Kleen @ 2010-10-12 14:15 ` Peter Zijlstra 2010-10-27 16:45 ` Don Zickus 0 siblings, 1 reply; 44+ messages in thread From: Peter Zijlstra @ 2010-10-12 14:15 UTC (permalink / raw) To: Andi Kleen Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter On Tue, 2010-10-12 at 15:51 +0200, Andi Kleen wrote: > > No, its not making sense, there's only one event source - the NMI, it > > doesn't make any sense what so ever to then artificially split it in > > two. > > I can see that "unknown NMI" is a different kind of event than > "NMI event triggered" > > e.g. the debugger would only hook into "unknown NMI" Sure, I'm not arguing about DIE_NMI vs DIE_NMI_UNKNOWN. I simply don't see the point of DIE_NMI_IPI. ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-12 14:15 ` Peter Zijlstra @ 2010-10-27 16:45 ` Don Zickus 2010-10-27 17:08 ` Peter Zijlstra 0 siblings, 1 reply; 44+ messages in thread From: Don Zickus @ 2010-10-27 16:45 UTC (permalink / raw) To: Peter Zijlstra Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter On Tue, Oct 12, 2010 at 04:15:27PM +0200, Peter Zijlstra wrote: > On Tue, 2010-10-12 at 15:51 +0200, Andi Kleen wrote: > > > No, its not making sense, there's only one event source - the NMI, it > > > doesn't make any sense what so ever to then artificially split it in > > > two. > > > > I can see that "unknown NMI" is a different kind of event than > > "NMI event triggered" > > > > e.g. the debugger would only hook into "unknown NMI" > > Sure, I'm not arguing about DIE_NMI vs DIE_NMI_UNKNOWN. I simply don't > see the point of DIE_NMI_IPI. I threw together a patch last week and did some panic and perf testing on it. Things still seemed to work correctly. This patch goes on top of the 3/6 patch in this thread. I assume this is sorta of what Peter was looking for. Cheers, Don diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca8..e28ec43 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -18,7 +18,6 @@ enum die_val { DIE_TRAP, DIE_GPF, DIE_CALL, - DIE_NMI_IPI, DIE_PAGE_FAULT, DIE_NMIUNKNOWN, }; diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index cefd694..7f9e3ac 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -53,7 +53,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; default: diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index f744f54..1f657a2 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -602,7 +602,7 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { - if (reason != DIE_NMI_IPI) + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index e7dbde7..7e0dba6 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -83,7 +83,7 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index da98b6d..ccd3b6d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; switch (cmd) { - case DIE_NMI_IPI: + case DIE_NMI: break; case DIE_NMIUNKNOWN: this_nmi = percpu_read(irq_stat.__nmi_count); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 852b819..805c5ab 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMI_IPI: - /* Just ignore, we will handle the roundup on DIE_NMI. */ - return NOTIFY_DONE; - case DIE_NMIUNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e3af342..fd7dc85 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -753,7 +753,7 @@ static int crash_nmi_callback(struct notifier_block *self, { int cpu; - if (val != DIE_NMI_IPI) + if (val != DIE_NMI) return NOTIFY_OK; cpu = raw_smp_processor_id(); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d8acab3..9e56e3d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) * CPU-specific NMI: send to specific CPU or NMI sources must * be processed on specific CPU */ - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT) - == NOTIFY_STOP) + if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP) return; /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ @@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) if (!cpu) { reason = get_nmi_reason(); if (reason & NMI_REASON_MASK) { - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; if (reason & NMI_REASON_SERR) pci_serr_error(reason, regs); else if (reason & NMI_REASON_IOCHK) @@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) } } - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR) if (nmi_watchdog_tick(regs, reason)) return; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 57f01bb..dd0039c 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self, int ret = NOTIFY_DONE; switch (val) { - case DIE_NMI_IPI: + case DIE_NMI: if (ctr_running) model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); else if (!nmi_enabled) diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index ab72a21..e3ecb71 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, int ret = NOTIFY_DONE; switch (val) { - case DIE_NMI_IPI: + case DIE_NMI: oprofile_add_sample(args->regs, 0); ret = NOTIFY_STOP; break; ^ permalink raw reply related [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-27 16:45 ` Don Zickus @ 2010-10-27 17:08 ` Peter Zijlstra 2010-10-27 18:07 ` Don Zickus 2010-11-02 17:50 ` Don Zickus 0 siblings, 2 replies; 44+ messages in thread From: Peter Zijlstra @ 2010-10-27 17:08 UTC (permalink / raw) To: Don Zickus Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote: > I assume this is sorta of what Peter was looking for. Yeah close, except the prio field of the various notification blocks need to get adjusted to preserve semantics. ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-27 17:08 ` Peter Zijlstra @ 2010-10-27 18:07 ` Don Zickus 2010-11-02 17:50 ` Don Zickus 1 sibling, 0 replies; 44+ messages in thread From: Don Zickus @ 2010-10-27 18:07 UTC (permalink / raw) To: Peter Zijlstra Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter On Wed, Oct 27, 2010 at 07:08:44PM +0200, Peter Zijlstra wrote: > On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote: > > I assume this is sorta of what Peter was looking for. > > Yeah close, except the prio field of the various notification blocks > need to get adjusted to preserve semantics. Heh. Of course. Let me add that to. Thanks, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-10-27 17:08 ` Peter Zijlstra 2010-10-27 18:07 ` Don Zickus @ 2010-11-02 17:50 ` Don Zickus 2010-11-02 18:16 ` Huang Ying 1 sibling, 1 reply; 44+ messages in thread From: Don Zickus @ 2010-11-02 17:50 UTC (permalink / raw) To: Peter Zijlstra Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter On Wed, Oct 27, 2010 at 07:08:44PM +0200, Peter Zijlstra wrote: > On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote: > > I assume this is sorta of what Peter was looking for. > > Yeah close, except the prio field of the various notification blocks > need to get adjusted to preserve semantics. Here is my next crack at it. I added some global defines to make it more clear what the priorities are. Again the intent was to roll something like this patch into Huang's original patch. This would help simplify the notifier chain and add priorities to maintain the original relationships. Cheers, Don diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca8..e28ec43 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -18,7 +18,6 @@ enum die_val { DIE_TRAP, DIE_GPF, DIE_CALL, - DIE_NMI_IPI, DIE_PAGE_FAULT, DIE_NMIUNKNOWN, }; diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 932f0f8..cfb6156 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -68,6 +68,26 @@ static inline int nmi_watchdog_active(void) } #endif +/* + * Define some priorities for the nmi notifier call chain. + * + * Create a local nmi bit that has a higher priority than + * external nmis, because the local ones are more frequent. + * + * Also setup some default high/normal/low settings for + * subsystems to registers with. Using 4 bits to seperate + * the priorities. This can go alot higher if needed be. + */ + +#define NMI_LOCAL_SHIFT 16 /* randomly picked */ +#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) +#define NMI_HIGH_PRIOR (1ULL << 8) +#define NMI_NORMAL_PRIOR (1ULL << 4) +#define NMI_LOW_PRIOR (1ULL << 0) +#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) +#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) +#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) + void lapic_watchdog_stop(void); int lapic_watchdog_init(unsigned nmi_hz); int lapic_wd_event(unsigned nmi_hz); diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 07a837d..44ff8c9 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -67,7 +67,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; default: @@ -95,7 +94,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, static __read_mostly struct notifier_block backtrace_notifier = { .notifier_call = arch_trigger_all_cpu_backtrace_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static int __init register_trigger_all_cpu_backtrace(void) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index be6f9c4..e6c6294 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -605,7 +605,7 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { - if (reason != DIE_NMI_IPI) + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index e7dbde7..a779719 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -25,6 +25,7 @@ #include <linux/gfp.h> #include <asm/mce.h> #include <asm/apic.h> +#include <asm/nmi.h> /* Update fake mce registers on current CPU. */ static void inject_mce(struct mce *m) @@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) @@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self, static struct notifier_block mce_raise_nb = { .notifier_call = mce_raise_notify, - .priority = 1000, + .priority = NMI_LOCAL_NORMAL_PRIOR, }; /* Inject mce on current CPU */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index da98b6d..55a3797 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; switch (cmd) { - case DIE_NMI_IPI: + case DIE_NMI: break; case DIE_NMIUNKNOWN: this_nmi = percpu_read(irq_stat.__nmi_count); @@ -1276,7 +1276,7 @@ perf_event_nmi_handler(struct notifier_block *self, static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static struct event_constraint unconstrained; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 852b819..020d052 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMI_IPI: - /* Just ignore, we will handle the roundup on DIE_NMI. */ - return NOTIFY_DONE; - case DIE_NMIUNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; @@ -604,7 +600,7 @@ static struct notifier_block kgdb_notifier = { /* * Lowest-prio notifier priority, we want to be notified last: */ - .priority = -INT_MAX, + .priority = NMI_LOCAL_LOW_PRIOR, }; /** diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e3af342..eabbde6 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -18,6 +18,7 @@ #include <asm/pci_x86.h> #include <asm/virtext.h> #include <asm/cpu.h> +#include <asm/nmi.h> #ifdef CONFIG_X86_32 # include <linux/ctype.h> @@ -753,7 +754,7 @@ static int crash_nmi_callback(struct notifier_block *self, { int cpu; - if (val != DIE_NMI_IPI) + if (val != DIE_NMI) return NOTIFY_OK; cpu = raw_smp_processor_id(); @@ -784,6 +785,8 @@ static void smp_send_nmi_allbutself(void) static struct notifier_block crash_nmi_nb = { .notifier_call = crash_nmi_callback, + /* we want to be the first one called */ + .priority = NMI_LOCAL_HIGH_PRIOR+1, }; /* Halt all other CPUs, calling the specified function on each of them diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d8acab3..9e56e3d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) * CPU-specific NMI: send to specific CPU or NMI sources must * be processed on specific CPU */ - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT) - == NOTIFY_STOP) + if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP) return; /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ @@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) if (!cpu) { reason = get_nmi_reason(); if (reason & NMI_REASON_MASK) { - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; if (reason & NMI_REASON_SERR) pci_serr_error(reason, regs); else if (reason & NMI_REASON_IOCHK) @@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) } } - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR) if (nmi_watchdog_tick(regs, reason)) return; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 57f01bb..43b4f35 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self, int ret = NOTIFY_DONE; switch (val) { - case DIE_NMI_IPI: + case DIE_NMI: if (ctr_running) model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); else if (!nmi_enabled) @@ -360,7 +360,7 @@ static void nmi_cpu_setup(void *dummy) static struct notifier_block profile_exceptions_nb = { .notifier_call = profile_exceptions_notify, .next = NULL, - .priority = 2 + .priority = NMI_LOCAL_LOW_PRIOR, }; static void nmi_cpu_restore_registers(struct op_msrs *msrs) diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index ab72a21..0197aa1 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, int ret = NOTIFY_DONE; switch (val) { - case DIE_NMI_IPI: + case DIE_NMI: oprofile_add_sample(args->regs, 0); ret = NOTIFY_STOP; break; @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, static struct notifier_block profile_timer_exceptions_nb = { .notifier_call = profile_timer_exceptions_notify, .next = NULL, - .priority = 0 + .priority = NMI_EXT_LOW_PRIOR, }; static int timer_start(void) ^ permalink raw reply related [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-11-02 17:50 ` Don Zickus @ 2010-11-02 18:16 ` Huang Ying 2010-11-02 19:11 ` Don Zickus 2010-11-02 20:47 ` Don Zickus 0 siblings, 2 replies; 44+ messages in thread From: Huang Ying @ 2010-11-02 18:16 UTC (permalink / raw) To: Don Zickus Cc: Peter Zijlstra, Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter [-- Attachment #1: Type: text/plain, Size: 10108 bytes --] Hi, Don, On Tue, 2010-11-02 at 10:50 -0700, Don Zickus wrote: > On Wed, Oct 27, 2010 at 07:08:44PM +0200, Peter Zijlstra wrote: > > On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote: > > > I assume this is sorta of what Peter was looking for. > > > > Yeah close, except the prio field of the various notification blocks > > need to get adjusted to preserve semantics. > > Here is my next crack at it. I added some global defines to make it more > clear what the priorities are. > > Again the intent was to roll something like this patch into Huang's > original patch. This would help simplify the notifier chain and add > priorities to maintain the original relationships. > > Cheers, > Don > > > diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h > index 5bdfca8..e28ec43 100644 > --- a/arch/x86/include/asm/kdebug.h > +++ b/arch/x86/include/asm/kdebug.h > @@ -18,7 +18,6 @@ enum die_val { > DIE_TRAP, > DIE_GPF, > DIE_CALL, > - DIE_NMI_IPI, > DIE_PAGE_FAULT, > DIE_NMIUNKNOWN, > }; > diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h > index 932f0f8..cfb6156 100644 > --- a/arch/x86/include/asm/nmi.h > +++ b/arch/x86/include/asm/nmi.h > @@ -68,6 +68,26 @@ static inline int nmi_watchdog_active(void) > } > #endif > > +/* > + * Define some priorities for the nmi notifier call chain. > + * > + * Create a local nmi bit that has a higher priority than > + * external nmis, because the local ones are more frequent. > + * > + * Also setup some default high/normal/low settings for > + * subsystems to registers with. Using 4 bits to seperate > + * the priorities. This can go alot higher if needed be. > + */ > + > +#define NMI_LOCAL_SHIFT 16 /* randomly picked */ > +#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) > +#define NMI_HIGH_PRIOR (1ULL << 8) > +#define NMI_NORMAL_PRIOR (1ULL << 4) > +#define NMI_LOW_PRIOR (1ULL << 0) > +#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) > +#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) > +#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) > + > void lapic_watchdog_stop(void); > int lapic_watchdog_init(unsigned nmi_hz); > int lapic_wd_event(unsigned nmi_hz); > diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c > index 07a837d..44ff8c9 100644 > --- a/arch/x86/kernel/apic/hw_nmi.c > +++ b/arch/x86/kernel/apic/hw_nmi.c > @@ -67,7 +67,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, > > switch (cmd) { > case DIE_NMI: > - case DIE_NMI_IPI: > break; > > default: > @@ -95,7 +94,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, > static __read_mostly struct notifier_block backtrace_notifier = { > .notifier_call = arch_trigger_all_cpu_backtrace_handler, > .next = NULL, > - .priority = 1 > + .priority = NMI_LOCAL_LOW_PRIOR, > }; > > static int __init register_trigger_all_cpu_backtrace(void) > diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c > index be6f9c4..e6c6294 100644 > --- a/arch/x86/kernel/apic/x2apic_uv_x.c > +++ b/arch/x86/kernel/apic/x2apic_uv_x.c > @@ -605,7 +605,7 @@ void __cpuinit uv_cpu_init(void) > */ > int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) > { > - if (reason != DIE_NMI_IPI) > + if (reason != DIE_NMIUNKNOWN) > return NOTIFY_OK; > > if (in_crash_kexec) > diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c > index e7dbde7..a779719 100644 > --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c > +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c > @@ -25,6 +25,7 @@ > #include <linux/gfp.h> > #include <asm/mce.h> > #include <asm/apic.h> > +#include <asm/nmi.h> > > /* Update fake mce registers on current CPU. */ > static void inject_mce(struct mce *m) > @@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self, > struct die_args *args = (struct die_args *)data; > int cpu = smp_processor_id(); > struct mce *m = &__get_cpu_var(injectm); > - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) > + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) > return NOTIFY_DONE; > cpumask_clear_cpu(cpu, mce_inject_cpumask); > if (m->inject_flags & MCJ_EXCEPTION) > @@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self, > > static struct notifier_block mce_raise_nb = { > .notifier_call = mce_raise_notify, > - .priority = 1000, > + .priority = NMI_LOCAL_NORMAL_PRIOR, > }; > > /* Inject mce on current CPU */ > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c > index da98b6d..55a3797 100644 > --- a/arch/x86/kernel/cpu/perf_event.c > +++ b/arch/x86/kernel/cpu/perf_event.c > @@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self, > return NOTIFY_DONE; > > switch (cmd) { > - case DIE_NMI_IPI: > + case DIE_NMI: > break; > case DIE_NMIUNKNOWN: > this_nmi = percpu_read(irq_stat.__nmi_count); > @@ -1276,7 +1276,7 @@ perf_event_nmi_handler(struct notifier_block *self, > static __read_mostly struct notifier_block perf_event_nmi_notifier = { > .notifier_call = perf_event_nmi_handler, > .next = NULL, > - .priority = 1 > + .priority = NMI_LOCAL_LOW_PRIOR, > }; > > static struct event_constraint unconstrained; > diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c > index 852b819..020d052 100644 > --- a/arch/x86/kernel/kgdb.c > +++ b/arch/x86/kernel/kgdb.c > @@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) > } > return NOTIFY_DONE; > > - case DIE_NMI_IPI: > - /* Just ignore, we will handle the roundup on DIE_NMI. */ > - return NOTIFY_DONE; > - > case DIE_NMIUNKNOWN: > if (was_in_debug_nmi[raw_smp_processor_id()]) { > was_in_debug_nmi[raw_smp_processor_id()] = 0; > @@ -604,7 +600,7 @@ static struct notifier_block kgdb_notifier = { > /* > * Lowest-prio notifier priority, we want to be notified last: > */ > - .priority = -INT_MAX, > + .priority = NMI_LOCAL_LOW_PRIOR, > }; > > /** > diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c > index e3af342..eabbde6 100644 > --- a/arch/x86/kernel/reboot.c > +++ b/arch/x86/kernel/reboot.c > @@ -18,6 +18,7 @@ > #include <asm/pci_x86.h> > #include <asm/virtext.h> > #include <asm/cpu.h> > +#include <asm/nmi.h> > > #ifdef CONFIG_X86_32 > # include <linux/ctype.h> > @@ -753,7 +754,7 @@ static int crash_nmi_callback(struct notifier_block *self, > { > int cpu; > > - if (val != DIE_NMI_IPI) > + if (val != DIE_NMI) > return NOTIFY_OK; > > cpu = raw_smp_processor_id(); > @@ -784,6 +785,8 @@ static void smp_send_nmi_allbutself(void) > > static struct notifier_block crash_nmi_nb = { > .notifier_call = crash_nmi_callback, > + /* we want to be the first one called */ > + .priority = NMI_LOCAL_HIGH_PRIOR+1, > }; > > /* Halt all other CPUs, calling the specified function on each of them > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index d8acab3..9e56e3d 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) > * CPU-specific NMI: send to specific CPU or NMI sources must > * be processed on specific CPU > */ > - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT) > - == NOTIFY_STOP) > + if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP) > return; > > /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ > @@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) > if (!cpu) { > reason = get_nmi_reason(); > if (reason & NMI_REASON_MASK) { > - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) > - == NOTIFY_STOP) > - return; > if (reason & NMI_REASON_SERR) > pci_serr_error(reason, regs); > else if (reason & NMI_REASON_IOCHK) > @@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) > } > } > > - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) > - return; > - > #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR) > if (nmi_watchdog_tick(regs, reason)) > return; > diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c > index 57f01bb..43b4f35 100644 > --- a/arch/x86/oprofile/nmi_int.c > +++ b/arch/x86/oprofile/nmi_int.c > @@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self, > int ret = NOTIFY_DONE; > > switch (val) { > - case DIE_NMI_IPI: > + case DIE_NMI: > if (ctr_running) > model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); > else if (!nmi_enabled) > @@ -360,7 +360,7 @@ static void nmi_cpu_setup(void *dummy) > static struct notifier_block profile_exceptions_nb = { > .notifier_call = profile_exceptions_notify, > .next = NULL, > - .priority = 2 > + .priority = NMI_LOCAL_LOW_PRIOR, > }; > > static void nmi_cpu_restore_registers(struct op_msrs *msrs) > diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c > index ab72a21..0197aa1 100644 > --- a/arch/x86/oprofile/nmi_timer_int.c > +++ b/arch/x86/oprofile/nmi_timer_int.c > @@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, > int ret = NOTIFY_DONE; > > switch (val) { > - case DIE_NMI_IPI: > + case DIE_NMI: > oprofile_add_sample(args->regs, 0); > ret = NOTIFY_STOP; > break; > @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, > static struct notifier_block profile_timer_exceptions_nb = { > .notifier_call = profile_timer_exceptions_notify, > .next = NULL, > - .priority = 0 > + .priority = NMI_EXT_LOW_PRIOR, Do not find definition of NMI_EXT_LOW_PRIOR, forget to add it? BTW: Attach a patch I used to test external NMI. Hope that is useful to you. Best Regards, Huang Ying [-- Attachment #2: 0008-x86-NMI-NMI-injecting-support.patch --] [-- Type: text/x-patch, Size: 7733 bytes --] >From 99a4accf4ccc36ac79c8663bd08e81884aedddaf Mon Sep 17 00:00:00 2001 From: Huang Ying <ying.huang@intel.com> Date: Thu, 9 Sep 2010 14:28:44 +0800 Subject: [PATCH 08/38] x86, NMI, NMI injecting support This patch implements trigger NMI on specified CPUs. At the same time, the NMI reason (contents of port 0x61) can be faked too. This can be used to debug and test the NMI handler. Signed-off-by: Huang Ying <ying.huang@intel.com> --- arch/x86/Kconfig.debug | 10 +++ arch/x86/include/asm/mach_traps.h | 9 +++- arch/x86/kernel/Makefile | 1 + arch/x86/kernel/nmi_inject.c | 117 +++++++++++++++++++++++++++++++++++++ arch/x86/kernel/traps.c | 34 ++++++++++- 5 files changed, 167 insertions(+), 4 deletions(-) create mode 100644 arch/x86/kernel/nmi_inject.c diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 7508508..d6bb833 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -299,4 +299,14 @@ config DEBUG_STRICT_USER_COPY_CHECKS If unsure, or if you run an older (pre 4.4) gcc, say N. +config NMI_INJECT + tristate "NMI injecting support" + depends on DEBUG_KERNEL + ---help--- + This can be used to trigger NMI on specified CPUs. And the + reason of NMI (contents of port 0x61) can be faked + too. This can be used to debug and test the NMI handler. + + If unsure, say N. + endmenu diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h index 72a8b52..4235bb3 100644 --- a/arch/x86/include/asm/mach_traps.h +++ b/arch/x86/include/asm/mach_traps.h @@ -17,7 +17,7 @@ #define NMI_REASON_CLEAR_IOCHK 0x08 #define NMI_REASON_CLEAR_MASK 0x0f -static inline unsigned char get_nmi_reason(void) +static inline unsigned char __get_nmi_reason(void) { return inb(NMI_REASON_PORT); } @@ -40,4 +40,11 @@ static inline void reassert_nmi(void) unlock_cmos(); } +struct nmi_reason_inject_data { + unsigned char reason; + unsigned char valid : 1; +}; + +extern struct nmi_reason_inject_data nmi_reason_inject_data; + #endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 2dde3c7..5f98f33 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -119,6 +119,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-y += hwerr.o +obj-$(CONFIG_NMI_INJECT) += nmi_inject.o ### # 64 bit specific files diff --git a/arch/x86/kernel/nmi_inject.c b/arch/x86/kernel/nmi_inject.c new file mode 100644 index 0000000..2b61148 --- /dev/null +++ b/arch/x86/kernel/nmi_inject.c @@ -0,0 +1,117 @@ +/* + * NMI injector, for NMI handler testing + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/debugfs.h> +#include <linux/cpu.h> +#include <asm/mach_traps.h> +#include <asm/apic.h> + +static int nmi_reason_inject_get(void *data, u64 *val) +{ + if (nmi_reason_inject_data.valid) + *val = nmi_reason_inject_data.reason; + else + *val = ~0ULL; + return 0; +} + +static int nmi_reason_inject_set(void *data, u64 val) +{ + nmi_reason_inject_data.reason = val; + nmi_reason_inject_data.valid = 1; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(nmi_reason_inject_fops, nmi_reason_inject_get, + nmi_reason_inject_set, "0x%llx\n"); + +static int nmi_reason_uninject_set(void *data, u64 val) +{ + nmi_reason_inject_data.valid = 0; + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(nmi_reason_uninject_fops, NULL, + nmi_reason_uninject_set, "%llu\n"); + +static int nmi_inject_set(void *data, u64 val) +{ + int cpu; + cpumask_var_t cpu_mask; + + alloc_cpumask_var(&cpu_mask, GFP_KERNEL); + cpumask_clear(cpu_mask); + for_each_online_cpu(cpu) { + if (cpu >= sizeof(val)) + continue; + if (val & (1ULL << cpu)) + cpumask_set_cpu(cpu, cpu_mask); + } + if (!cpumask_empty(cpu_mask)) + apic->send_IPI_mask(cpu_mask, NMI_VECTOR); + free_cpumask_var(cpu_mask); + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(nmi_inject_fops, NULL, nmi_inject_set, "0x%llx\n"); + +static struct dentry *nmi_debug_dir; + +static int __init nmi_inject_init(void) +{ + int rc; + struct dentry *de; + + rc = -ENOMEM; + nmi_debug_dir = debugfs_create_dir("nmi", NULL); + if (!nmi_debug_dir) + return rc; + de = debugfs_create_file("inject", S_IWUSR, nmi_debug_dir, + NULL, &nmi_inject_fops); + if (!de) + goto err; + de = debugfs_create_file("reason_inject", S_IRUSR | S_IWUSR, + nmi_debug_dir, NULL, &nmi_reason_inject_fops); + if (!de) + goto err; + de = debugfs_create_file("reason_uninject", S_IWUSR, + nmi_debug_dir, NULL, &nmi_reason_uninject_fops); + if (!de) + goto err; + + return 0; +err: + debugfs_remove_recursive(nmi_debug_dir); + return rc; +} + +static void __exit nmi_inject_exit(void) +{ + debugfs_remove_recursive(nmi_debug_dir); +} + +module_init(nmi_inject_init); +module_exit(nmi_inject_exit); + +MODULE_AUTHOR("Huang Ying"); +MODULE_DESCRIPTION("NMI injecting support"); +MODULE_LICENSE("GPL"); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 42f16f7..acfb1a9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -316,6 +316,34 @@ static int __init setup_unknown_nmi_panic(char *str) } __setup("unknown_nmi_panic", setup_unknown_nmi_panic); +struct nmi_reason_inject_data nmi_reason_inject_data; +EXPORT_SYMBOL_GPL(nmi_reason_inject_data); + +static inline unsigned char get_nmi_reason(void) +{ + if (nmi_reason_inject_data.valid) + return nmi_reason_inject_data.reason; + else + return __get_nmi_reason(); +} + +static inline void outb_nmi_reason(unsigned char reason) +{ + static unsigned char prev_reason; + + if (nmi_reason_inject_data.valid) { + if (reason & NMI_REASON_CLEAR_SERR) + nmi_reason_inject_data.reason &= ~NMI_REASON_SERR; + if (prev_reason == (reason | NMI_REASON_CLEAR_IOCHK) && + !(reason & NMI_REASON_CLEAR_IOCHK)) + nmi_reason_inject_data.reason &= ~NMI_REASON_IOCHK; + if (!nmi_reason_inject_data.reason) + nmi_reason_inject_data.valid = 0; + prev_reason = reason; + } else + outb(reason, NMI_REASON_PORT); +} + static notrace __kprobes void pci_serr_error(unsigned char reason, struct pt_regs *regs) { @@ -340,7 +368,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) /* Clear and disable the PCI SERR error line. */ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; - outb(reason, NMI_REASON_PORT); + outb_nmi_reason(reason); } static notrace __kprobes void @@ -358,7 +386,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) /* Re-enable the IOCK line, wait for a few seconds */ reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); + outb_nmi_reason(reason); i = 20000; while (--i) { @@ -367,7 +395,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) } reason &= ~NMI_REASON_CLEAR_IOCHK; - outb(reason, NMI_REASON_PORT); + outb_nmi_reason(reason); } static notrace __kprobes void -- 1.7.1 ^ permalink raw reply related [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-11-02 18:16 ` Huang Ying @ 2010-11-02 19:11 ` Don Zickus 2010-11-02 20:47 ` Don Zickus 1 sibling, 0 replies; 44+ messages in thread From: Don Zickus @ 2010-11-02 19:11 UTC (permalink / raw) To: Huang Ying Cc: Peter Zijlstra, Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter On Wed, Nov 03, 2010 at 02:16:03AM +0800, Huang Ying wrote: > Hi, Don, > > @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, > > static struct notifier_block profile_timer_exceptions_nb = { > > .notifier_call = profile_timer_exceptions_notify, > > .next = NULL, > > - .priority = 0 > > + .priority = NMI_EXT_LOW_PRIOR, > > Do not find definition of NMI_EXT_LOW_PRIOR, forget to add it? Gah. I meant to use NMI_LOW_PRIOR there. Thanks for finding that. Guess I should any randomness to my compiles. > > BTW: Attach a patch I used to test external NMI. Hope that is useful to > you. It looks useful. I'll try to play with later this week. Thanks! Cheers, Don > > Best Regards, > Huang Ying > > From 99a4accf4ccc36ac79c8663bd08e81884aedddaf Mon Sep 17 00:00:00 2001 > From: Huang Ying <ying.huang@intel.com> > Date: Thu, 9 Sep 2010 14:28:44 +0800 > Subject: [PATCH 08/38] x86, NMI, NMI injecting support > > This patch implements trigger NMI on specified CPUs. At the same time, > the NMI reason (contents of port 0x61) can be faked too. This can be > used to debug and test the NMI handler. > > Signed-off-by: Huang Ying <ying.huang@intel.com> > --- > arch/x86/Kconfig.debug | 10 +++ > arch/x86/include/asm/mach_traps.h | 9 +++- > arch/x86/kernel/Makefile | 1 + > arch/x86/kernel/nmi_inject.c | 117 +++++++++++++++++++++++++++++++++++++ > arch/x86/kernel/traps.c | 34 ++++++++++- > 5 files changed, 167 insertions(+), 4 deletions(-) > create mode 100644 arch/x86/kernel/nmi_inject.c > > diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug > index 7508508..d6bb833 100644 > --- a/arch/x86/Kconfig.debug > +++ b/arch/x86/Kconfig.debug > @@ -299,4 +299,14 @@ config DEBUG_STRICT_USER_COPY_CHECKS > > If unsure, or if you run an older (pre 4.4) gcc, say N. > > +config NMI_INJECT > + tristate "NMI injecting support" > + depends on DEBUG_KERNEL > + ---help--- > + This can be used to trigger NMI on specified CPUs. And the > + reason of NMI (contents of port 0x61) can be faked > + too. This can be used to debug and test the NMI handler. > + > + If unsure, say N. > + > endmenu > diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h > index 72a8b52..4235bb3 100644 > --- a/arch/x86/include/asm/mach_traps.h > +++ b/arch/x86/include/asm/mach_traps.h > @@ -17,7 +17,7 @@ > #define NMI_REASON_CLEAR_IOCHK 0x08 > #define NMI_REASON_CLEAR_MASK 0x0f > > -static inline unsigned char get_nmi_reason(void) > +static inline unsigned char __get_nmi_reason(void) > { > return inb(NMI_REASON_PORT); > } > @@ -40,4 +40,11 @@ static inline void reassert_nmi(void) > unlock_cmos(); > } > > +struct nmi_reason_inject_data { > + unsigned char reason; > + unsigned char valid : 1; > +}; > + > +extern struct nmi_reason_inject_data nmi_reason_inject_data; > + > #endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */ > diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile > index 2dde3c7..5f98f33 100644 > --- a/arch/x86/kernel/Makefile > +++ b/arch/x86/kernel/Makefile > @@ -119,6 +119,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o > obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o > > obj-y += hwerr.o > +obj-$(CONFIG_NMI_INJECT) += nmi_inject.o > > ### > # 64 bit specific files > diff --git a/arch/x86/kernel/nmi_inject.c b/arch/x86/kernel/nmi_inject.c > new file mode 100644 > index 0000000..2b61148 > --- /dev/null > +++ b/arch/x86/kernel/nmi_inject.c > @@ -0,0 +1,117 @@ > +/* > + * NMI injector, for NMI handler testing > + * > + * Copyright 2010 Intel Corp. > + * Author: Huang Ying <ying.huang@intel.com> > + * > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License version > + * 2 as published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, > + * but WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + * GNU General Public License for more details. > + * > + * You should have received a copy of the GNU General Public License > + * along with this program; if not, write to the Free Software > + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA > + */ > + > +#include <linux/kernel.h> > +#include <linux/module.h> > +#include <linux/debugfs.h> > +#include <linux/cpu.h> > +#include <asm/mach_traps.h> > +#include <asm/apic.h> > + > +static int nmi_reason_inject_get(void *data, u64 *val) > +{ > + if (nmi_reason_inject_data.valid) > + *val = nmi_reason_inject_data.reason; > + else > + *val = ~0ULL; > + return 0; > +} > + > +static int nmi_reason_inject_set(void *data, u64 val) > +{ > + nmi_reason_inject_data.reason = val; > + nmi_reason_inject_data.valid = 1; > + return 0; > +} > + > +DEFINE_SIMPLE_ATTRIBUTE(nmi_reason_inject_fops, nmi_reason_inject_get, > + nmi_reason_inject_set, "0x%llx\n"); > + > +static int nmi_reason_uninject_set(void *data, u64 val) > +{ > + nmi_reason_inject_data.valid = 0; > + return 0; > +} > + > +DEFINE_SIMPLE_ATTRIBUTE(nmi_reason_uninject_fops, NULL, > + nmi_reason_uninject_set, "%llu\n"); > + > +static int nmi_inject_set(void *data, u64 val) > +{ > + int cpu; > + cpumask_var_t cpu_mask; > + > + alloc_cpumask_var(&cpu_mask, GFP_KERNEL); > + cpumask_clear(cpu_mask); > + for_each_online_cpu(cpu) { > + if (cpu >= sizeof(val)) > + continue; > + if (val & (1ULL << cpu)) > + cpumask_set_cpu(cpu, cpu_mask); > + } > + if (!cpumask_empty(cpu_mask)) > + apic->send_IPI_mask(cpu_mask, NMI_VECTOR); > + free_cpumask_var(cpu_mask); > + return 0; > +} > + > +DEFINE_SIMPLE_ATTRIBUTE(nmi_inject_fops, NULL, nmi_inject_set, "0x%llx\n"); > + > +static struct dentry *nmi_debug_dir; > + > +static int __init nmi_inject_init(void) > +{ > + int rc; > + struct dentry *de; > + > + rc = -ENOMEM; > + nmi_debug_dir = debugfs_create_dir("nmi", NULL); > + if (!nmi_debug_dir) > + return rc; > + de = debugfs_create_file("inject", S_IWUSR, nmi_debug_dir, > + NULL, &nmi_inject_fops); > + if (!de) > + goto err; > + de = debugfs_create_file("reason_inject", S_IRUSR | S_IWUSR, > + nmi_debug_dir, NULL, &nmi_reason_inject_fops); > + if (!de) > + goto err; > + de = debugfs_create_file("reason_uninject", S_IWUSR, > + nmi_debug_dir, NULL, &nmi_reason_uninject_fops); > + if (!de) > + goto err; > + > + return 0; > +err: > + debugfs_remove_recursive(nmi_debug_dir); > + return rc; > +} > + > +static void __exit nmi_inject_exit(void) > +{ > + debugfs_remove_recursive(nmi_debug_dir); > +} > + > +module_init(nmi_inject_init); > +module_exit(nmi_inject_exit); > + > +MODULE_AUTHOR("Huang Ying"); > +MODULE_DESCRIPTION("NMI injecting support"); > +MODULE_LICENSE("GPL"); > diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c > index 42f16f7..acfb1a9 100644 > --- a/arch/x86/kernel/traps.c > +++ b/arch/x86/kernel/traps.c > @@ -316,6 +316,34 @@ static int __init setup_unknown_nmi_panic(char *str) > } > __setup("unknown_nmi_panic", setup_unknown_nmi_panic); > > +struct nmi_reason_inject_data nmi_reason_inject_data; > +EXPORT_SYMBOL_GPL(nmi_reason_inject_data); > + > +static inline unsigned char get_nmi_reason(void) > +{ > + if (nmi_reason_inject_data.valid) > + return nmi_reason_inject_data.reason; > + else > + return __get_nmi_reason(); > +} > + > +static inline void outb_nmi_reason(unsigned char reason) > +{ > + static unsigned char prev_reason; > + > + if (nmi_reason_inject_data.valid) { > + if (reason & NMI_REASON_CLEAR_SERR) > + nmi_reason_inject_data.reason &= ~NMI_REASON_SERR; > + if (prev_reason == (reason | NMI_REASON_CLEAR_IOCHK) && > + !(reason & NMI_REASON_CLEAR_IOCHK)) > + nmi_reason_inject_data.reason &= ~NMI_REASON_IOCHK; > + if (!nmi_reason_inject_data.reason) > + nmi_reason_inject_data.valid = 0; > + prev_reason = reason; > + } else > + outb(reason, NMI_REASON_PORT); > +} > + > static notrace __kprobes void > pci_serr_error(unsigned char reason, struct pt_regs *regs) > { > @@ -340,7 +368,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs) > > /* Clear and disable the PCI SERR error line. */ > reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR; > - outb(reason, NMI_REASON_PORT); > + outb_nmi_reason(reason); > } > > static notrace __kprobes void > @@ -358,7 +386,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) > > /* Re-enable the IOCK line, wait for a few seconds */ > reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK; > - outb(reason, NMI_REASON_PORT); > + outb_nmi_reason(reason); > > i = 20000; > while (--i) { > @@ -367,7 +395,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) > } > > reason &= ~NMI_REASON_CLEAR_IOCHK; > - outb(reason, NMI_REASON_PORT); > + outb_nmi_reason(reason); > } > > static notrace __kprobes void > -- > 1.7.1 > ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler 2010-11-02 18:16 ` Huang Ying 2010-11-02 19:11 ` Don Zickus @ 2010-11-02 20:47 ` Don Zickus 1 sibling, 0 replies; 44+ messages in thread From: Don Zickus @ 2010-11-02 20:47 UTC (permalink / raw) To: Huang Ying Cc: Peter Zijlstra, Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter On Wed, Nov 03, 2010 at 02:16:03AM +0800, Huang Ying wrote: > > break; > > @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, > > static struct notifier_block profile_timer_exceptions_nb = { > > .notifier_call = profile_timer_exceptions_notify, > > .next = NULL, > > - .priority = 0 > > + .priority = NMI_EXT_LOW_PRIOR, > > Do not find definition of NMI_EXT_LOW_PRIOR, forget to add it? I attached an updated patch with that change. Cheers, Don --- arch/x86/include/asm/kdebug.h | 1 - arch/x86/include/asm/nmi.h | 20 ++++++++++++++++++++ arch/x86/kernel/apic/hw_nmi.c | 3 +-- arch/x86/kernel/apic/x2apic_uv_x.c | 2 +- arch/x86/kernel/cpu/mcheck/mce-inject.c | 5 +++-- arch/x86/kernel/cpu/perf_event.c | 4 ++-- arch/x86/kernel/kgdb.c | 6 +----- arch/x86/kernel/reboot.c | 5 ++++- arch/x86/kernel/traps.c | 9 +-------- arch/x86/oprofile/nmi_int.c | 4 ++-- arch/x86/oprofile/nmi_timer_int.c | 4 ++-- 11 files changed, 37 insertions(+), 26 deletions(-) diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h index 5bdfca8..e28ec43 100644 --- a/arch/x86/include/asm/kdebug.h +++ b/arch/x86/include/asm/kdebug.h @@ -18,7 +18,6 @@ enum die_val { DIE_TRAP, DIE_GPF, DIE_CALL, - DIE_NMI_IPI, DIE_PAGE_FAULT, DIE_NMIUNKNOWN, }; diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 932f0f8..cfb6156 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -68,6 +68,26 @@ static inline int nmi_watchdog_active(void) } #endif +/* + * Define some priorities for the nmi notifier call chain. + * + * Create a local nmi bit that has a higher priority than + * external nmis, because the local ones are more frequent. + * + * Also setup some default high/normal/low settings for + * subsystems to registers with. Using 4 bits to seperate + * the priorities. This can go alot higher if needed be. + */ + +#define NMI_LOCAL_SHIFT 16 /* randomly picked */ +#define NMI_LOCAL_BIT (1ULL << NMI_LOCAL_SHIFT) +#define NMI_HIGH_PRIOR (1ULL << 8) +#define NMI_NORMAL_PRIOR (1ULL << 4) +#define NMI_LOW_PRIOR (1ULL << 0) +#define NMI_LOCAL_HIGH_PRIOR (NMI_LOCAL_BIT | NMI_HIGH_PRIOR) +#define NMI_LOCAL_NORMAL_PRIOR (NMI_LOCAL_BIT | NMI_NORMAL_PRIOR) +#define NMI_LOCAL_LOW_PRIOR (NMI_LOCAL_BIT | NMI_LOW_PRIOR) + void lapic_watchdog_stop(void); int lapic_watchdog_init(unsigned nmi_hz); int lapic_wd_event(unsigned nmi_hz); diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c index 07a837d..44ff8c9 100644 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -67,7 +67,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, switch (cmd) { case DIE_NMI: - case DIE_NMI_IPI: break; default: @@ -95,7 +94,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self, static __read_mostly struct notifier_block backtrace_notifier = { .notifier_call = arch_trigger_all_cpu_backtrace_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static int __init register_trigger_all_cpu_backtrace(void) diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index be6f9c4..e6c6294 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -605,7 +605,7 @@ void __cpuinit uv_cpu_init(void) */ int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data) { - if (reason != DIE_NMI_IPI) + if (reason != DIE_NMIUNKNOWN) return NOTIFY_OK; if (in_crash_kexec) diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index e7dbde7..a779719 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -25,6 +25,7 @@ #include <linux/gfp.h> #include <asm/mce.h> #include <asm/apic.h> +#include <asm/nmi.h> /* Update fake mce registers on current CPU. */ static void inject_mce(struct mce *m) @@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self, struct die_args *args = (struct die_args *)data; int cpu = smp_processor_id(); struct mce *m = &__get_cpu_var(injectm); - if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) + if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask)) return NOTIFY_DONE; cpumask_clear_cpu(cpu, mce_inject_cpumask); if (m->inject_flags & MCJ_EXCEPTION) @@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self, static struct notifier_block mce_raise_nb = { .notifier_call = mce_raise_notify, - .priority = 1000, + .priority = NMI_LOCAL_NORMAL_PRIOR, }; /* Inject mce on current CPU */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index da98b6d..55a3797 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_DONE; switch (cmd) { - case DIE_NMI_IPI: + case DIE_NMI: break; case DIE_NMIUNKNOWN: this_nmi = percpu_read(irq_stat.__nmi_count); @@ -1276,7 +1276,7 @@ perf_event_nmi_handler(struct notifier_block *self, static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, - .priority = 1 + .priority = NMI_LOCAL_LOW_PRIOR, }; static struct event_constraint unconstrained; diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 852b819..020d052 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd) } return NOTIFY_DONE; - case DIE_NMI_IPI: - /* Just ignore, we will handle the roundup on DIE_NMI. */ - return NOTIFY_DONE; - case DIE_NMIUNKNOWN: if (was_in_debug_nmi[raw_smp_processor_id()]) { was_in_debug_nmi[raw_smp_processor_id()] = 0; @@ -604,7 +600,7 @@ static struct notifier_block kgdb_notifier = { /* * Lowest-prio notifier priority, we want to be notified last: */ - .priority = -INT_MAX, + .priority = NMI_LOCAL_LOW_PRIOR, }; /** diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e3af342..eabbde6 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -18,6 +18,7 @@ #include <asm/pci_x86.h> #include <asm/virtext.h> #include <asm/cpu.h> +#include <asm/nmi.h> #ifdef CONFIG_X86_32 # include <linux/ctype.h> @@ -753,7 +754,7 @@ static int crash_nmi_callback(struct notifier_block *self, { int cpu; - if (val != DIE_NMI_IPI) + if (val != DIE_NMI) return NOTIFY_OK; cpu = raw_smp_processor_id(); @@ -784,6 +785,8 @@ static void smp_send_nmi_allbutself(void) static struct notifier_block crash_nmi_nb = { .notifier_call = crash_nmi_callback, + /* we want to be the first one called */ + .priority = NMI_LOCAL_HIGH_PRIOR+1, }; /* Halt all other CPUs, calling the specified function on each of them diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index d8acab3..9e56e3d 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) * CPU-specific NMI: send to specific CPU or NMI sources must * be processed on specific CPU */ - if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT) - == NOTIFY_STOP) + if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP) return; /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ @@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) if (!cpu) { reason = get_nmi_reason(); if (reason & NMI_REASON_MASK) { - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; if (reason & NMI_REASON_SERR) pci_serr_error(reason, regs); else if (reason & NMI_REASON_IOCHK) @@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs) } } - if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) - return; - #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR) if (nmi_watchdog_tick(regs, reason)) return; diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 57f01bb..43b4f35 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self, int ret = NOTIFY_DONE; switch (val) { - case DIE_NMI_IPI: + case DIE_NMI: if (ctr_running) model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs)); else if (!nmi_enabled) @@ -360,7 +360,7 @@ static void nmi_cpu_setup(void *dummy) static struct notifier_block profile_exceptions_nb = { .notifier_call = profile_exceptions_notify, .next = NULL, - .priority = 2 + .priority = NMI_LOCAL_LOW_PRIOR, }; static void nmi_cpu_restore_registers(struct op_msrs *msrs) diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c index ab72a21..5f14fcb 100644 --- a/arch/x86/oprofile/nmi_timer_int.c +++ b/arch/x86/oprofile/nmi_timer_int.c @@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, int ret = NOTIFY_DONE; switch (val) { - case DIE_NMI_IPI: + case DIE_NMI: oprofile_add_sample(args->regs, 0); ret = NOTIFY_STOP; break; @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self, static struct notifier_block profile_timer_exceptions_nb = { .notifier_call = profile_timer_exceptions_notify, .next = NULL, - .priority = 0 + .priority = NMI_LOW_PRIOR, }; static int timer_start(void) -- 1.7.2.3 ^ permalink raw reply related [flat|nested] 44+ messages in thread
* [PATCH -v3 4/6] Make NMI reason io port (0x61) can be processed on any CPU 2010-10-09 6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying 2010-10-09 6:49 ` [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying 2010-10-09 6:49 ` [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler Huang Ying @ 2010-10-09 6:49 ` Huang Ying 2010-10-09 6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying 2010-10-09 6:49 ` [PATCH -v3 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying 4 siblings, 0 replies; 44+ messages in thread From: Huang Ying @ 2010-10-09 6:49 UTC (permalink / raw) To: Don Zickus, Ingo Molnar, H. Peter Anvin Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying In original NMI handler, NMI reason io port (0x61) is only processed on BSP. This makes it impossible to hot-remove BSP. To solve the issue, a raw spinlock is used to make the port can be processed on any CPU. Signed-off-by: Huang Ying <ying.huang@intel.com> --- arch/x86/kernel/traps.c | 45 +++++++++++++++++++++++++-------------------- 1 file changed, 25 insertions(+), 20 deletions(-) --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -83,6 +83,12 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; +/* + * Prevent NMI reason port (0x61) being accessed simultaneously, can + * only be used in NMI handler. + */ +static DEFINE_RAW_SPINLOCK(nmi_reason_lock); + static inline void conditional_sti(struct pt_regs *regs) { if (regs->flags & X86_EFLAGS_IF) @@ -383,7 +389,6 @@ unknown_nmi_error(unsigned char reason, static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; - int cpu; /* * CPU-specific NMI must be processed before non-CPU-specific @@ -400,28 +405,28 @@ static notrace __kprobes void default_do return; /* Non-CPU-specific NMI: NMI sources can be processed on any CPU */ - cpu = smp_processor_id(); - /* Only the BSP gets external NMIs from the system. */ - if (!cpu) { - reason = get_nmi_reason(); - if (reason & NMI_REASON_MASK) { - if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) - == NOTIFY_STOP) - return; - if (reason & NMI_REASON_SERR) - pci_serr_error(reason, regs); - else if (reason & NMI_REASON_IOCHK) - io_check_error(reason, regs); + raw_spin_lock(&nmi_reason_lock); + reason = get_nmi_reason(); + if (reason & NMI_REASON_MASK) { + if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) + == NOTIFY_STOP) + goto unlock_return; + if (reason & NMI_REASON_SERR) + pci_serr_error(reason, regs); + else if (reason & NMI_REASON_IOCHK) + io_check_error(reason, regs); #ifdef CONFIG_X86_32 - /* - * Reassert NMI in case it became active - * meanwhile as it's edge-triggered: - */ - reassert_nmi(); + /* + * Reassert NMI in case it became active + * meanwhile as it's edge-triggered: + */ + reassert_nmi(); #endif - return; - } +unlock_return: + raw_spin_unlock(&nmi_reason_lock); + return; } + raw_spin_unlock(&nmi_reason_lock); if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) return; ^ permalink raw reply [flat|nested] 44+ messages in thread
* [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-09 6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying ` (2 preceding siblings ...) 2010-10-09 6:49 ` [PATCH -v3 4/6] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying @ 2010-10-09 6:49 ` Huang Ying 2010-10-10 14:07 ` Alan Cox 2010-10-11 21:20 ` Don Zickus 2010-10-09 6:49 ` [PATCH -v3 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying 4 siblings, 2 replies; 44+ messages in thread From: Huang Ying @ 2010-10-09 6:49 UTC (permalink / raw) To: Don Zickus, Ingo Molnar, H. Peter Anvin Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying In general, unknown NMI is used by hardware and firmware to notify fatal hardware errors to OS. So the Linux should treat unknown NMI as hardware error and go panic upon unknown NMI for better error containment. But there are some broken hardware, which will generate unknown NMI not for hardware error. To support these machines, a white list mechanism is provided to treat unknown NMI as hardware error only on some known working system. These systems are identified via the presentation of APEI HEST or some PCI ID of the host bridge. The PCI ID of host bridge instead of DMI ID is used, so that the checking can be done based on the platform type instead of motherboard. This should be simpler and sufficient. The method to identify the platforms is designed by Andi Kleen. v3: - Change document and comments Signed-off-by: Huang Ying <ying.huang@intel.com> --- arch/x86/include/asm/nmi.h | 1 arch/x86/kernel/Makefile | 2 + arch/x86/kernel/hwerr.c | 62 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/traps.c | 11 +++++++ drivers/acpi/apei/hest.c | 8 +++++ 5 files changed, 84 insertions(+) create mode 100644 arch/x86/kernel/hwerr.c --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -44,6 +44,7 @@ struct ctl_table; extern int proc_nmi_enabled(struct ctl_table *, int , void __user *, size_t *, loff_t *); extern int unknown_nmi_panic; +extern int unknown_nmi_as_hwerr; void arch_trigger_all_cpu_backtrace(void); #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -119,6 +119,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o +obj-y += hwerr.o + ### # 64 bit specific files ifeq ($(CONFIG_X86_64),y) --- /dev/null +++ b/arch/x86/kernel/hwerr.c @@ -0,0 +1,62 @@ +/* + * Hardware error architecture dependent processing + * + * Copyright 2010 Intel Corp. + * Author: Huang Ying <ying.huang@intel.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version + * 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include <linux/kernel.h> +#include <linux/pci.h> +#include <linux/init.h> +#include <linux/nmi.h> + +/* + * In general, unknown NMI is used by hardware and firmware to notify + * fatal hardware errors to OS. So the Linux should treat unknown NMI + * as hardware error and go panic upon unknown NMI for better error + * containment. + * + * But there are some broken hardware, which will generate unknown NMI + * not for hardware error. To support these systems, a white list + * mechanism is used to treat unknown NMI as hardware error only on + * some known working system. + * + * The PCI ID of host bridge instead of DMI ID is used, so that the + * checking can be done based on the platform instead of motherboard. + * This should be simpler and sufficient. + */ +static const +struct pci_device_id unknown_nmi_as_hwerr_platform[] __initdata = { + { PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3406) }, + { 0, } +}; + +int __init check_unknown_nmi_as_hwerr(void) +{ + struct pci_dev *dev = NULL; + + for_each_pci_dev(dev) { + if (pci_match_id(unknown_nmi_as_hwerr_platform, dev)) { + pr_info( +"Host bridge is identified, will treat unknown NMI as hardware error!\n"); + unknown_nmi_as_hwerr = 1; + break; + } + } + + return 0; +} +late_initcall(check_unknown_nmi_as_hwerr); --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; +int unknown_nmi_as_hwerr; + /* * Prevent NMI reason port (0x61) being accessed simultaneously, can * only be used in NMI handler. @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason, if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) return; + /* + * On some platforms, hardware errors may be notified via + * unknown NMI + */ + if (unknown_nmi_as_hwerr) + panic( + "NMI for hardware error without error record: Not continuing\n" + "Please check BIOS/BMC log for further information."); + #ifdef CONFIG_MCA /* * Might actually be able to figure out what the guilty party --- a/drivers/acpi/apei/hest.c +++ b/drivers/acpi/apei/hest.c @@ -35,6 +35,7 @@ #include <linux/highmem.h> #include <linux/io.h> #include <linux/platform_device.h> +#include <linux/nmi.h> #include <acpi/apei.h> #include "apei-internal.h" @@ -225,6 +226,13 @@ static int __init hest_init(void) if (rc) goto err; + /* + * System has proper HEST should treat unknown NMI as fatal + * hardware error notification + */ + pr_info("HEST is valid, will treat unknown NMI as hardware error!\n"); + unknown_nmi_as_hwerr = 1; + rc = hest_ghes_dev_register(ghes_count); if (rc) goto err; ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-09 6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying @ 2010-10-10 14:07 ` Alan Cox 2010-10-10 14:13 ` Andi Kleen 2010-10-11 21:20 ` Don Zickus 1 sibling, 1 reply; 44+ messages in thread From: Alan Cox @ 2010-10-10 14:07 UTC (permalink / raw) To: Huang Ying Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen, Robert Richter On Sat, 9 Oct 2010 14:49:46 +0800 Huang Ying <ying.huang@intel.com> wrote: > In general, unknown NMI is used by hardware and firmware to notify > fatal hardware errors to OS. So the Linux should treat unknown NMI as > hardware error and go panic upon unknown NMI for better error > containment. Not entirely true. Older machines use NMI for all sorts of interesting purposes. In particular many 486 laptops trigger NMI as part of power manaagement, (Hence the choice of the dazed and confused message) > But there are some broken hardware, which will generate unknown NMI > not for hardware error. To support these machines, a white list "Broken" is not the right term. There is no formal documentation about such uses of NMI on older PC platforms that forbids such use. They may not agree with your personal preferred behaviour. > These systems are identified via the presentation of APEI HEST or > some PCI ID of the host bridge. The PCI ID of host bridge instead of > DMI ID is used, so that the checking can be done based on the platform > type instead of motherboard. This should be simpler and sufficient. > > The method to identify the platforms is designed by Andi Kleen. Why not make the new flag also a boot option so you can force it on for platforms where we don't auto whitelist it. Alan ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-10 14:07 ` Alan Cox @ 2010-10-10 14:13 ` Andi Kleen 2010-10-11 21:08 ` Don Zickus 0 siblings, 1 reply; 44+ messages in thread From: Andi Kleen @ 2010-10-10 14:13 UTC (permalink / raw) To: Alan Cox Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen, Robert Richter On Sun, Oct 10, 2010 at 03:07:13PM +0100, Alan Cox wrote: > On Sat, 9 Oct 2010 14:49:46 +0800 > Huang Ying <ying.huang@intel.com> wrote: > > > In general, unknown NMI is used by hardware and firmware to notify > > fatal hardware errors to OS. So the Linux should treat unknown NMI as > > hardware error and go panic upon unknown NMI for better error > > containment. > > Not entirely true. Older machines use NMI for all sorts of interesting > purposes. In particular many 486 laptops trigger NMI as part of power > manaagement, (Hence the choice of the dazed and confused message) In general, on any post stone age x86 system, ... > > These systems are identified via the presentation of APEI HEST or > > some PCI ID of the host bridge. The PCI ID of host bridge instead of > > DMI ID is used, so that the checking can be done based on the platform > > type instead of motherboard. This should be simpler and sufficient. > > > > The method to identify the platforms is designed by Andi Kleen. > > Why not make the new flag also a boot option so you can force it on for > platforms where we don't auto whitelist it. You can already set it at run time using sysctl. echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi echo 1 > /proc/sys/kernel/panic_on_io_nmi -Andi -- ak@linux.intel.com -- Speaking for myself only. ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-10 14:13 ` Andi Kleen @ 2010-10-11 21:08 ` Don Zickus 2010-10-11 21:12 ` Don Zickus 0 siblings, 1 reply; 44+ messages in thread From: Don Zickus @ 2010-10-11 21:08 UTC (permalink / raw) To: Andi Kleen Cc: Alan Cox, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Robert Richter On Sun, Oct 10, 2010 at 04:13:11PM +0200, Andi Kleen wrote: > > > These systems are identified via the presentation of APEI HEST or > > > some PCI ID of the host bridge. The PCI ID of host bridge instead of > > > DMI ID is used, so that the checking can be done based on the platform > > > type instead of motherboard. This should be simpler and sufficient. > > > > > > The method to identify the platforms is designed by Andi Kleen. > > > > Why not make the new flag also a boot option so you can force it on for > > platforms where we don't auto whitelist it. > > You can already set it at run time using sysctl. > > echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi > echo 1 > /proc/sys/kernel/panic_on_io_nmi I get the impression Alan was talking about the option unknown_nmi_as_hwerr which isn't hooked into a sysctl but will cause machines to panic. Cheers, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-11 21:08 ` Don Zickus @ 2010-10-11 21:12 ` Don Zickus 0 siblings, 0 replies; 44+ messages in thread From: Don Zickus @ 2010-10-11 21:12 UTC (permalink / raw) To: Andi Kleen Cc: Alan Cox, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Robert Richter On Mon, Oct 11, 2010 at 05:08:29PM -0400, Don Zickus wrote: > On Sun, Oct 10, 2010 at 04:13:11PM +0200, Andi Kleen wrote: > > > > These systems are identified via the presentation of APEI HEST or > > > > some PCI ID of the host bridge. The PCI ID of host bridge instead of > > > > DMI ID is used, so that the checking can be done based on the platform > > > > type instead of motherboard. This should be simpler and sufficient. > > > > > > > > The method to identify the platforms is designed by Andi Kleen. > > > > > > Why not make the new flag also a boot option so you can force it on for > > > platforms where we don't auto whitelist it. > > > > You can already set it at run time using sysctl. > > > > echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi > > echo 1 > /proc/sys/kernel/panic_on_io_nmi > > I get the impression Alan was talking about the option > > unknown_nmi_as_hwerr > > which isn't hooked into a sysctl but will cause machines to panic. Gah. disregard this, my brain is tired from a long day. Cheers, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-09 6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying 2010-10-10 14:07 ` Alan Cox @ 2010-10-11 21:20 ` Don Zickus 2010-10-12 1:10 ` Huang Ying 2010-10-20 6:12 ` Huang Ying 1 sibling, 2 replies; 44+ messages in thread From: Don Zickus @ 2010-10-11 21:20 UTC (permalink / raw) To: Huang Ying Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen, Robert Richter On Sat, Oct 09, 2010 at 02:49:46PM +0800, Huang Ying wrote: > In general, unknown NMI is used by hardware and firmware to notify > fatal hardware errors to OS. So the Linux should treat unknown NMI as > hardware error and go panic upon unknown NMI for better error > containment. > > But there are some broken hardware, which will generate unknown NMI > not for hardware error. To support these machines, a white list > mechanism is provided to treat unknown NMI as hardware error only on > some known working system. > > These systems are identified via the presentation of APEI HEST or > some PCI ID of the host bridge. The PCI ID of host bridge instead of > DMI ID is used, so that the checking can be done based on the platform > type instead of motherboard. This should be simpler and sufficient. > > The method to identify the platforms is designed by Andi Kleen. I don't have any major problems with the other patches in the patch series. In fact I would like to get them committed somewhere, so we can continue building on them. > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason, > if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == > NOTIFY_STOP) > return; > + /* > + * On some platforms, hardware errors may be notified via > + * unknown NMI > + */ > + if (unknown_nmi_as_hwerr) > + panic( > + "NMI for hardware error without error record: Not continuing\n" > + "Please check BIOS/BMC log for further information."); > + > #ifdef CONFIG_MCA > /* > * Might actually be able to figure out what the guilty party The only quirk I have left is the above piece, which is basically a philosophy difference with Robert and myself. Where we believe it should be on the die_chain and Andi and yourself would like to see it explicitly called out. If we move to a new notifier chain, like we discussed in another thread, would you guys be willing to move this into that new notifier chain or is your argument still going to stand? Cheers, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-11 21:20 ` Don Zickus @ 2010-10-12 1:10 ` Huang Ying 2010-10-20 6:12 ` Huang Ying 1 sibling, 0 replies; 44+ messages in thread From: Huang Ying @ 2010-10-12 1:10 UTC (permalink / raw) To: Don Zickus Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter On Tue, 2010-10-12 at 05:20 +0800, Don Zickus wrote: > On Sat, Oct 09, 2010 at 02:49:46PM +0800, Huang Ying wrote: > > In general, unknown NMI is used by hardware and firmware to notify > > fatal hardware errors to OS. So the Linux should treat unknown NMI as > > hardware error and go panic upon unknown NMI for better error > > containment. > > > > But there are some broken hardware, which will generate unknown NMI > > not for hardware error. To support these machines, a white list > > mechanism is provided to treat unknown NMI as hardware error only on > > some known working system. > > > > These systems are identified via the presentation of APEI HEST or > > some PCI ID of the host bridge. The PCI ID of host bridge instead of > > DMI ID is used, so that the checking can be done based on the platform > > type instead of motherboard. This should be simpler and sufficient. > > > > The method to identify the platforms is designed by Andi Kleen. > > I don't have any major problems with the other patches in the patch > series. In fact I would like to get them committed somewhere, so we can > continue building on them. Thanks. > > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason, > > if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == > > NOTIFY_STOP) > > return; > > + /* > > + * On some platforms, hardware errors may be notified via > > + * unknown NMI > > + */ > > + if (unknown_nmi_as_hwerr) > > + panic( > > + "NMI for hardware error without error record: Not continuing\n" > > + "Please check BIOS/BMC log for further information."); > > + > > #ifdef CONFIG_MCA > > /* > > * Might actually be able to figure out what the guilty party > > The only quirk I have left is the above piece, which is basically a > philosophy difference with Robert and myself. Where we believe it should > be on the die_chain and Andi and yourself would like to see it explicitly > called out. > > If we move to a new notifier chain, like we discussed in another thread, > would you guys be willing to move this into that new notifier chain or is > your argument still going to stand? Perhaps I will not move this into that new notifier chain. If you want to do that, feel free to pick it up and change it as you will. Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-11 21:20 ` Don Zickus 2010-10-12 1:10 ` Huang Ying @ 2010-10-20 6:12 ` Huang Ying 2010-10-20 14:15 ` Don Zickus 1 sibling, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-20 6:12 UTC (permalink / raw) To: Don Zickus Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter Hi, Don, On Tue, 2010-10-12 at 05:20 +0800, Don Zickus wrote: > > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason, > > if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == > > NOTIFY_STOP) > > return; > > + /* > > + * On some platforms, hardware errors may be notified via > > + * unknown NMI > > + */ > > + if (unknown_nmi_as_hwerr) > > + panic( > > + "NMI for hardware error without error record: Not continuing\n" > > + "Please check BIOS/BMC log for further information."); > > + > > #ifdef CONFIG_MCA > > /* > > * Might actually be able to figure out what the guilty party > > The only quirk I have left is the above piece, which is basically a > philosophy difference with Robert and myself. Where we believe it should > be on the die_chain and Andi and yourself would like to see it explicitly > called out. After some more thought, I found this is different from DIE_NMI and DIE_NMI_IPI case. I think the code added is for general unknown NMI processing instead of a device driver. What we do is not to add special processing for some devices, but treat unknown NMI as hardware error notification in general and use a white list to deal with broken hardware and stone age machine. Do you agree? If so, it should not be turned into a notifier block unless you want to turn all general unknown NMI processing code into a notifier block. Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-20 6:12 ` Huang Ying @ 2010-10-20 14:15 ` Don Zickus 2010-10-21 1:14 ` Huang Ying 0 siblings, 1 reply; 44+ messages in thread From: Don Zickus @ 2010-10-20 14:15 UTC (permalink / raw) To: Huang Ying Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter, peterz On Wed, Oct 20, 2010 at 02:12:37PM +0800, Huang Ying wrote: > Hi, Don, > > On Tue, 2010-10-12 at 05:20 +0800, Don Zickus wrote: > > > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason, > > > if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == > > > NOTIFY_STOP) > > > return; > > > + /* > > > + * On some platforms, hardware errors may be notified via > > > + * unknown NMI > > > + */ > > > + if (unknown_nmi_as_hwerr) > > > + panic( > > > + "NMI for hardware error without error record: Not continuing\n" > > > + "Please check BIOS/BMC log for further information."); > > > + > > > #ifdef CONFIG_MCA > > > /* > > > * Might actually be able to figure out what the guilty party > > > > The only quirk I have left is the above piece, which is basically a > > philosophy difference with Robert and myself. Where we believe it should > > be on the die_chain and Andi and yourself would like to see it explicitly > > called out. > > After some more thought, I found this is different from DIE_NMI and > DIE_NMI_IPI case. I think the code added is for general unknown NMI > processing instead of a device driver. What we do is not to add special > processing for some devices, but treat unknown NMI as hardware error > notification in general and use a white list to deal with broken > hardware and stone age machine. Do you agree? > > If so, it should not be turned into a notifier block unless you want to > turn all general unknown NMI processing code into a notifier block. Well, yes I actually do, mainly to keep the code simpler. But also, after having a conversation with someone yesterday, realized that unknown NMIs are dealt with on a platform level and not a chipset level. The reason I say that is some companies, like HP, have a special driver hpwdt that they want to run in the case of an unknown NMI. They don't care about HEST or the other stuff, they want their BIOS call to take care of it. So now that hack has to be put into notifier somewhere. I can only imagine Dell trying to do something similar as a value add. To me it just makes sense to setup all the HEST stuff as default notifier blocks and then have platform specific drivers register on top of them (using the priority scheme). This to me gives everyone flexibility on how to handle the unknown NMIs. Thoughts? Cheers, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-20 14:15 ` Don Zickus @ 2010-10-21 1:14 ` Huang Ying 2010-10-21 2:31 ` Don Zickus 0 siblings, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-21 1:14 UTC (permalink / raw) To: Don Zickus Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter, peterz@infradead.org On Wed, 2010-10-20 at 22:15 +0800, Don Zickus wrote: > On Wed, Oct 20, 2010 at 02:12:37PM +0800, Huang Ying wrote: > > Hi, Don, > > > > On Tue, 2010-10-12 at 05:20 +0800, Don Zickus wrote: > > > > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason, > > > > if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == > > > > NOTIFY_STOP) > > > > return; > > > > + /* > > > > + * On some platforms, hardware errors may be notified via > > > > + * unknown NMI > > > > + */ > > > > + if (unknown_nmi_as_hwerr) > > > > + panic( > > > > + "NMI for hardware error without error record: Not continuing\n" > > > > + "Please check BIOS/BMC log for further information."); > > > > + > > > > #ifdef CONFIG_MCA > > > > /* > > > > * Might actually be able to figure out what the guilty party > > > > > > The only quirk I have left is the above piece, which is basically a > > > philosophy difference with Robert and myself. Where we believe it should > > > be on the die_chain and Andi and yourself would like to see it explicitly > > > called out. > > > > After some more thought, I found this is different from DIE_NMI and > > DIE_NMI_IPI case. I think the code added is for general unknown NMI > > processing instead of a device driver. What we do is not to add special > > processing for some devices, but treat unknown NMI as hardware error > > notification in general and use a white list to deal with broken > > hardware and stone age machine. Do you agree? > > > > If so, it should not be turned into a notifier block unless you want to > > turn all general unknown NMI processing code into a notifier block. > > Well, yes I actually do, mainly to keep the code simpler. But also, after > having a conversation with someone yesterday, realized that unknown NMIs > are dealt with on a platform level and not a chipset level. But there is some general rules for unknown NMI. We think unknown NMI is hardware error notification on all systems except systems with broken hardware or software bugs, stone age machines. Do you agree with that? > The reason I say that is some companies, like HP, have a special driver > hpwdt that they want to run in the case of an unknown NMI. They don't > care about HEST or the other stuff, they want their BIOS call to take care > of it. So now that hack has to be put into notifier somewhere. Yes. I found that during NMI handler development. It sits in a notifier chain and in a driver. hpwdt uses unknown NMI for watchdog timeout notification, it is a platform feature and should be implemented in a driver. But we want to implement a general default unknown NMI processing logic, not do that for some specific platform or chipset. > I can only imagine Dell trying to do something similar as a value add. > > To me it just makes sense to setup all the HEST stuff as default notifier > blocks and then have platform specific drivers register on top of them > (using the priority scheme). This to me gives everyone flexibility on how > to handle the unknown NMIs. Yes. HEST code will be in a driver and will register a notifier block to do its work. > Thoughts? But the code in this patch is not for HEST. (HEST is only used to implement the white list). I think the code is for a general standard feature. I don't want to add HEST processing here. Do you think it should be a general rule to treat all unknown NMI as hardware error notification except some broken hardware and stone age machines? If so, this patch add that general logic to the general NMI handling code. It is not for specific hardware. The HEST and PCI ID table are used to implement a white list to deal with the broken hardware and stone age machines. Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-21 1:14 ` Huang Ying @ 2010-10-21 2:31 ` Don Zickus 2010-10-21 5:17 ` Huang Ying 0 siblings, 1 reply; 44+ messages in thread From: Don Zickus @ 2010-10-21 2:31 UTC (permalink / raw) To: Huang Ying Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter, peterz@infradead.org On Thu, Oct 21, 2010 at 09:14:03AM +0800, Huang Ying wrote: > > > DIE_NMI_IPI case. I think the code added is for general unknown NMI > > > processing instead of a device driver. What we do is not to add special > > > processing for some devices, but treat unknown NMI as hardware error > > > notification in general and use a white list to deal with broken > > > hardware and stone age machine. Do you agree? > > > > > > If so, it should not be turned into a notifier block unless you want to > > > turn all general unknown NMI processing code into a notifier block. > > > > Well, yes I actually do, mainly to keep the code simpler. But also, after > > having a conversation with someone yesterday, realized that unknown NMIs > > are dealt with on a platform level and not a chipset level. > > But there is some general rules for unknown NMI. We think unknown NMI is > hardware error notification on all systems except systems with broken > hardware or software bugs, stone age machines. Do you agree with that? Nope. In my experiences, most of our customers are still running pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or bad firmware (at least in the bugzillas I deal with). I would be excited if I was getting some sort of hardware error notification, because then I would know where the NMI might be coming from. Instead, I have customers pull out cards out of their machine or instrument their kernel to see which device is causing the problem. Slow and painful. > > > The reason I say that is some companies, like HP, have a special driver > > hpwdt that they want to run in the case of an unknown NMI. They don't > > care about HEST or the other stuff, they want their BIOS call to take care > > of it. So now that hack has to be put into notifier somewhere. > > Yes. I found that during NMI handler development. It sits in a notifier > chain and in a driver. hpwdt uses unknown NMI for watchdog timeout > notification, it is a platform feature and should be implemented in a Actually no it doesn't, the name HP watchdog is deceiving. The intent HP has with that handler is any unknown NMI needs to be trapped by that driver so it can do an SMI call, which tries to source the NMI and save its result in NVRAM. Then it jumps back to the kernel for a reboot. I have been dealing with HP for 3 years with that driver, I have gotten quite familiar with the NMI part of it. :-) > driver. But we want to implement a general default unknown NMI > processing logic, not do that for some specific platform or chipset. > > > I can only imagine Dell trying to do something similar as a value add. > > > > To me it just makes sense to setup all the HEST stuff as default notifier > > blocks and then have platform specific drivers register on top of them > > (using the priority scheme). This to me gives everyone flexibility on how > > to handle the unknown NMIs. > > Yes. HEST code will be in a driver and will register a notifier block to > do its work. > > > Thoughts? > > But the code in this patch is not for HEST. (HEST is only used to > implement the white list). I think the code is for a general standard > feature. I don't want to add HEST processing here. > > Do you think it should be a general rule to treat all unknown NMI as > hardware error notification except some broken hardware and stone age > machines? I guess my impression of what unknown NMIs should do might be a little different than yours (not saying my view is a correct one, just the view I have when I answer your questions). (after spending more time thinking about this while looking at nmi priorities) I thought anything that registers with a notifier and cases off of DIE_NMI, should be a driver/subsystem that expects and _can properly handle_ an NMI. The expectation is that it can successfully detect the NMI is its own and return a NOTIFY_STOP if it is (after processing it). [I excluded DIE_NMI_IPI because of PeterZ's comments] Whereas DIE_NMIUNKNOWN would be for drivers/subsystem that can probably detect the NMI is its own but can't do anything but panic or drivers that don't know but want to handle the panic in their own special way (ie hpwdt, or sgi's x2apic_uv_x.c where they like to use nmi_buttons to debug stalls or hangs but don't want to panic). And if noone wants to attempt to handle it after that, then call unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN)). So to me hardware error notification, would just detect what chipset it is on and if it is something that matches its whitelist, register and use DIE_NMIUNKNOWN. unknown_nmi_error() would just continue to be this general and vague thing that on more modern systems will likely never be called. Anyway that is how I viewed everything or how I wouldn't mind seeing it implemented. Then again, my view could be completely wrong. :-) I'll just rely on majority concensus on somebody's view. Cheers, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-21 2:31 ` Don Zickus @ 2010-10-21 5:17 ` Huang Ying 2010-10-21 14:10 ` Don Zickus 0 siblings, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-21 5:17 UTC (permalink / raw) To: Don Zickus Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter, peterz@infradead.org On Thu, 2010-10-21 at 10:31 +0800, Don Zickus wrote: > On Thu, Oct 21, 2010 at 09:14:03AM +0800, Huang Ying wrote: > > > > DIE_NMI_IPI case. I think the code added is for general unknown NMI > > > > processing instead of a device driver. What we do is not to add special > > > > processing for some devices, but treat unknown NMI as hardware error > > > > notification in general and use a white list to deal with broken > > > > hardware and stone age machine. Do you agree? > > > > > > > > If so, it should not be turned into a notifier block unless you want to > > > > turn all general unknown NMI processing code into a notifier block. > > > > > > Well, yes I actually do, mainly to keep the code simpler. But also, after > > > having a conversation with someone yesterday, realized that unknown NMIs > > > are dealt with on a platform level and not a chipset level. > > > > But there is some general rules for unknown NMI. We think unknown NMI is > > hardware error notification on all systems except systems with broken > > hardware or software bugs, stone age machines. Do you agree with that? > > Nope. In my experiences, most of our customers are still running > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or > bad firmware (at least in the bugzillas I deal with). It seems that we have different point of view for reason of unknown NMI. Should broken hardware be seen as hardware error? As far as I know, Windows treat unknown NMI as hardware errors. Although we are programming for Linux not Windows. Many hardware are built for Windows. > I would be excited if I was getting some sort of hardware error > notification, because then I would know where the NMI might be coming > from. Instead, I have customers pull out cards out of their machine or > instrument their kernel to see which device is causing the problem. Slow > and painful. Hope new machine will have better hardware error reporting. :) > > > The reason I say that is some companies, like HP, have a special driver > > > hpwdt that they want to run in the case of an unknown NMI. They don't > > > care about HEST or the other stuff, they want their BIOS call to take care > > > of it. So now that hack has to be put into notifier somewhere. > > > > Yes. I found that during NMI handler development. It sits in a notifier > > chain and in a driver. hpwdt uses unknown NMI for watchdog timeout > > notification, it is a platform feature and should be implemented in a > > Actually no it doesn't, the name HP watchdog is deceiving. The intent HP > has with that handler is any unknown NMI needs to be trapped by that > driver so it can do an SMI call, which tries to source the NMI and save > its result in NVRAM. Then it jumps back to the kernel for a reboot. > > I have been dealing with HP for 3 years with that driver, I have gotten > quite familiar with the NMI part of it. :-) It seems that I am fooled by the naming :). Originally I had thought that if someone does not write the misc file (notify firmware) in time, NMI will be triggered as a watchdog. > > driver. But we want to implement a general default unknown NMI > > processing logic, not do that for some specific platform or chipset. > > > > > I can only imagine Dell trying to do something similar as a value add. > > > > > > To me it just makes sense to setup all the HEST stuff as default notifier > > > blocks and then have platform specific drivers register on top of them > > > (using the priority scheme). This to me gives everyone flexibility on how > > > to handle the unknown NMIs. > > > > Yes. HEST code will be in a driver and will register a notifier block to > > do its work. > > > > > Thoughts? > > > > But the code in this patch is not for HEST. (HEST is only used to > > implement the white list). I think the code is for a general standard > > feature. I don't want to add HEST processing here. > > > > Do you think it should be a general rule to treat all unknown NMI as > > hardware error notification except some broken hardware and stone age > > machines? > > I guess my impression of what unknown NMIs should do might be a little > different than yours (not saying my view is a correct one, just the view I > have when I answer your questions). Yes. I think so too. The reply following is my understanding for that. My understanding may be not correct too. :) > (after spending more time thinking about this while looking at nmi > priorities) > > I thought anything that registers with a notifier and cases off of > DIE_NMI, should be a driver/subsystem that expects and _can properly > handle_ an NMI. The expectation is that it can successfully detect the > NMI is its own and return a NOTIFY_STOP if it is (after processing it). > [I excluded DIE_NMI_IPI because of PeterZ's comments] I think notifier registered on DIE_NMI can panic too. Why prevent it? > Whereas DIE_NMIUNKNOWN would be for drivers/subsystem that can probably > detect the NMI is its own but can't do anything but panic or drivers that > don't know but want to handle the panic in their own special way (ie > hpwdt, or sgi's x2apic_uv_x.c where they like to use nmi_buttons to debug > stalls or hangs but don't want to panic). I think drivers want to handle the unknown NMI in their own special way are the expected users of DIE_NMIUNKNOWN. While drivers that can detect the NMI is its own and will go panic should be registered on DIE_NMI. > And if noone wants to attempt to handle it after that, then call > unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN)). I think unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN) is the general default operation for unknown NMI. DIE_NMIUNKNOWN is for drivers processing after determining the NMI is unknown and before the general default operation. > So to me hardware error notification, would just detect what chipset it is > on and if it is something that matches its whitelist, register and use > DIE_NMIUNKNOWN. unknown_nmi_error() would just continue to be this > general and vague thing that on more modern systems will likely never be > called. The difference between us is that I think it should be a general rule to treat unknown NMI as hardware error notification, while you think it should be in a driver for some special hardware. That is, it is general or special? > Anyway that is how I viewed everything or how I wouldn't mind seeing it > implemented. Then again, my view could be completely wrong. :-) > > I'll just rely on majority concensus on somebody's view. That is fair. Thanks. Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-21 5:17 ` Huang Ying @ 2010-10-21 14:10 ` Don Zickus 2010-10-21 15:45 ` Andi Kleen 0 siblings, 1 reply; 44+ messages in thread From: Don Zickus @ 2010-10-21 14:10 UTC (permalink / raw) To: Huang Ying Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter, peterz@infradead.org On Thu, Oct 21, 2010 at 01:17:31PM +0800, Huang Ying wrote: > > > But there is some general rules for unknown NMI. We think unknown NMI is > > > hardware error notification on all systems except systems with broken > > > hardware or software bugs, stone age machines. Do you agree with that? > > > > Nope. In my experiences, most of our customers are still running > > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or > > bad firmware (at least in the bugzillas I deal with). > > It seems that we have different point of view for reason of unknown NMI. > Should broken hardware be seen as hardware error? Well, do you have an alternative way to handle broken hardware? Broken hardware has generated NMIs, sometimes if I am lucky SERRs. The ones that generate SERRs can be filtered through a different path, but what about the ones that don't? I understand you are trying to make a distinction between the two, but I don't understand how you plan on handling the different scenarios. That's probably part of my confusion. > > As far as I know, Windows treat unknown NMI as hardware errors. Although > we are programming for Linux not Windows. Many hardware are built for > Windows. I was told Windows treats _any_ NMI as hardware errors, not just unknown ones. :-) > > > I would be excited if I was getting some sort of hardware error > > notification, because then I would know where the NMI might be coming > > from. Instead, I have customers pull out cards out of their machine or > > instrument their kernel to see which device is causing the problem. Slow > > and painful. > > Hope new machine will have better hardware error reporting. :) Me too. <snip> > > > > > > But the code in this patch is not for HEST. (HEST is only used to > > > implement the white list). I think the code is for a general standard > > > feature. I don't want to add HEST processing here. > > > > > > Do you think it should be a general rule to treat all unknown NMI as > > > hardware error notification except some broken hardware and stone age > > > machines? > > > > I guess my impression of what unknown NMIs should do might be a little > > different than yours (not saying my view is a correct one, just the view I > > have when I answer your questions). > > Yes. I think so too. The reply following is my understanding for that. > My understanding may be not correct too. :) > > > (after spending more time thinking about this while looking at nmi > > priorities) > > > > I thought anything that registers with a notifier and cases off of > > DIE_NMI, should be a driver/subsystem that expects and _can properly > > handle_ an NMI. The expectation is that it can successfully detect the > > NMI is its own and return a NOTIFY_STOP if it is (after processing it). > > [I excluded DIE_NMI_IPI because of PeterZ's comments] > > I think notifier registered on DIE_NMI can panic too. Why prevent it? True, I guess as long as the handler can determine the NMI is its own, I can't see why not (/me realizes that is what the nmi watchdog does :-) ). > > > Whereas DIE_NMIUNKNOWN would be for drivers/subsystem that can probably > > detect the NMI is its own but can't do anything but panic or drivers that > > don't know but want to handle the panic in their own special way (ie > > hpwdt, or sgi's x2apic_uv_x.c where they like to use nmi_buttons to debug > > stalls or hangs but don't want to panic). > > I think drivers want to handle the unknown NMI in their own special way > are the expected users of DIE_NMIUNKNOWN. While drivers that can detect > the NMI is its own and will go panic should be registered on DIE_NMI. Ok. I can agree with this. > > > And if noone wants to attempt to handle it after that, then call > > unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN)). > > I think unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN) is the > general default operation for unknown NMI. DIE_NMIUNKNOWN is for drivers > processing after determining the NMI is unknown and before the general > default operation. Yes. > > > So to me hardware error notification, would just detect what chipset it is > > on and if it is something that matches its whitelist, register and use > > DIE_NMIUNKNOWN. unknown_nmi_error() would just continue to be this > > general and vague thing that on more modern systems will likely never be > > called. > > The difference between us is that I think it should be a general rule to > treat unknown NMI as hardware error notification, while you think it > should be in a driver for some special hardware. That is, it is general > or special? Probably. I guess I don't fully understand your definition of hardware error notification so I can't tell if we are arguing or agreeing (but using different words). How do you envision the code looking like with hardware error notification? I just wanted to keep the code in traps.c simple and clean and not constantly add new #ifdefs every time Intel came up with an interesting way to determine a hardware error condition. For example, I am not the biggest fan of seeing stuff like edac or mce inside the code and would prefer them to use notifiers. But that is just my opinion. If you have a framework that you wanted to propose that could encapsulate an ever growing class of hardware error notifications, I would be interested. Anyway, perhaps providing some examples about what you had in mind and how it would scale going forward might help me understand what you are looking to do. Cheers, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-21 14:10 ` Don Zickus @ 2010-10-21 15:45 ` Andi Kleen 2010-10-22 1:49 ` Don Zickus 0 siblings, 1 reply; 44+ messages in thread From: Andi Kleen @ 2010-10-21 15:45 UTC (permalink / raw) To: Don Zickus Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter, peterz@infradead.org On Thu, Oct 21, 2010 at 10:10:02AM -0400, Don Zickus wrote: > On Thu, Oct 21, 2010 at 01:17:31PM +0800, Huang Ying wrote: > > > > But there is some general rules for unknown NMI. We think unknown NMI is > > > > hardware error notification on all systems except systems with broken > > > > hardware or software bugs, stone age machines. Do you agree with that? > > > > > > Nope. In my experiences, most of our customers are still running > > > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or > > > bad firmware (at least in the bugzillas I deal with). > > > > It seems that we have different point of view for reason of unknown NMI. > > Should broken hardware be seen as hardware error? > > Well, do you have an alternative way to handle broken hardware? Broken > hardware has generated NMIs, sometimes if I am lucky SERRs. The ones that > generate SERRs can be filtered through a different path, but what about > the ones that don't? > Don, AFAIK you're saying the same thing as Ying: an unknown NMI is a hardware error. The reason the hardware does that is that it wants to tell us: "I lost track of an error. There is corrupted data somewhere in the system. Please stop, don't do anything that could consume that data. S.O.S." The correct answer for that is panic. The only problem we're trying to solve here is to distingush other cases, like when software uses NMIs for something else or there may be other cases > > > > As far as I know, Windows treat unknown NMI as hardware errors. Although > > we are programming for Linux not Windows. Many hardware are built for > > Windows. > > I was told Windows treats _any_ NMI as hardware errors, not just unknown > ones. :-) I don't know details about Windows' code here, but I assume that they have a way to hook into the NMI too, otherwise drivers like the HP watchdog wouldn't work on Windows. But yes we know they shut down. Essentially the hardware (and the BIOS) is designed under the assumption that the OS behaves like Windows for this. In my long experience in making Linux work on all kinds of hardware I learned very firmly that trying to drive PC hardware in a different way than Windows does is usually a bad idea. The approach of Ying's patch kit was really to make the behaviour more like Windows. > Probably. I guess I don't fully understand your definition of hardware > error notification so I can't tell if we are arguing or agreeing (but > using different words). > > How do you envision the code looking like with hardware error > notification? > > I just wanted to keep the code in traps.c simple and clean and not > constantly add new #ifdefs every time Intel came up with an interesting > way to determine a hardware error condition. > > For example, I am not the biggest fan of seeing stuff like edac or mce > inside the code and would prefer them to use notifiers. But that is just > my opinion. mce is only for testing anyways, "real" mce doesn't need vector 0. So basically it's just a fancy "smp_call_nmi()" thingy. EDAC shouldn't need it either, if it's does it's some probably broken legacy behaviour. > If you have a framework that you wanted to propose that could encapsulate > an ever growing class of hardware error notifications, I would be > interested The trend in error reporting is away from NMIs and towards machine checks. My bunch is that NMI reporting is more or less a legacy problem, so we have to deal with what is there today. I don't think you need to worry about a lot more hardware NMI sources. The only thing that we need to worry about is more software NMI sources, these unfortunately like to multiply. -Andi -- ak@linux.intel.com -- Speaking for myself only. ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-21 15:45 ` Andi Kleen @ 2010-10-22 1:49 ` Don Zickus 2010-10-22 2:05 ` Huang Ying 2010-10-22 9:24 ` Andi Kleen 0 siblings, 2 replies; 44+ messages in thread From: Don Zickus @ 2010-10-22 1:49 UTC (permalink / raw) To: Andi Kleen Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter, peterz@infradead.org On Thu, Oct 21, 2010 at 05:45:44PM +0200, Andi Kleen wrote: > On Thu, Oct 21, 2010 at 10:10:02AM -0400, Don Zickus wrote: > > On Thu, Oct 21, 2010 at 01:17:31PM +0800, Huang Ying wrote: > > > > > But there is some general rules for unknown NMI. We think unknown NMI is > > > > > hardware error notification on all systems except systems with broken > > > > > hardware or software bugs, stone age machines. Do you agree with that? > > > > > > > > Nope. In my experiences, most of our customers are still running > > > > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or > > > > bad firmware (at least in the bugzillas I deal with). > > > > > > It seems that we have different point of view for reason of unknown NMI. > > > Should broken hardware be seen as hardware error? > > > > Well, do you have an alternative way to handle broken hardware? Broken > > hardware has generated NMIs, sometimes if I am lucky SERRs. The ones that > > generate SERRs can be filtered through a different path, but what about > > the ones that don't? > > > > Don, AFAIK you're saying the same thing as Ying: an unknown NMI is > a hardware error. > > The reason the hardware does that is that it wants to tell us: > > "I lost track of an error. There is corrupted data somewhere in the system. > Please stop, don't do anything that could consume that data. S.O.S." > > The correct answer for that is panic. After re-reading Huang's patch, I am starting to understand what you mean by broken hardware. Basically you are trying to distinguish between legacy systems that were 'broken' in the sense they would randomly send uknown NMIs for no good reason, hence the 'Dazed and confused' messages and hardware errors on more modern systems that say, 'Hardware error, panicing check your BIOS for more info' (or whatever). So Huang's patch was sort of acting like a switch. On legacy systems use 'Dazed and confused' for unknown NMIs. Whereas on whitelisted modern systems use a more relavant 'Check BIOS for error' message. Is that right? That's why you guys are complaining that registering a die_notifier would be silly? > > The only problem we're trying to solve here is to distingush > other cases, like when software uses NMIs for something else > or there may be other cases Ok. > > > > > > > As far as I know, Windows treat unknown NMI as hardware errors. Although > > > we are programming for Linux not Windows. Many hardware are built for > > > Windows. > > > > I was told Windows treats _any_ NMI as hardware errors, not just unknown > > ones. :-) > > I don't know details about Windows' code here, but I assume that > they have a way to hook into the NMI too, otherwise drivers like the HP > watchdog wouldn't work on Windows. But yes we know they shut down. > > Essentially the hardware (and the BIOS) is designed > under the assumption that the OS behaves like Windows for this. > > In my long experience in making Linux work on all kinds > of hardware I learned very firmly that trying to drive PC hardware > in a different way than Windows does is usually a bad idea. Of course. > > The approach of Ying's patch kit was really to make > the behaviour more like Windows. Ok. > > > > Probably. I guess I don't fully understand your definition of hardware > > error notification so I can't tell if we are arguing or agreeing (but > > using different words). > > > > How do you envision the code looking like with hardware error > > notification? > > > > I just wanted to keep the code in traps.c simple and clean and not > > constantly add new #ifdefs every time Intel came up with an interesting > > way to determine a hardware error condition. > > > > For example, I am not the biggest fan of seeing stuff like edac or mce > > inside the code and would prefer them to use notifiers. But that is just > > my opinion. > > mce is only for testing anyways, "real" mce doesn't need vector 0. > So basically it's just a fancy "smp_call_nmi()" thingy. > > EDAC shouldn't need it either, if it's does it's some probably > broken legacy behaviour. > > > If you have a framework that you wanted to propose that could encapsulate > > an ever growing class of hardware error notifications, I would be > > interested > > The trend in error reporting is away from NMIs and towards machine checks. > My bunch is that NMI reporting is more or less a legacy problem, so we have > to deal with what is there today. That's good. > > I don't think you need to worry about a lot more hardware NMI sources. Well until those machines dominate the marketplace, I'm stuck supporting those pre-Nahelam boxes with customers that committed to 10 years with last year's technology. ;-) > > The only thing that we need to worry about is more software NMI sources, > these unfortunately like to multiply. Sure. Thanks, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-22 1:49 ` Don Zickus @ 2010-10-22 2:05 ` Huang Ying 2010-10-22 2:56 ` Don Zickus 2010-10-22 9:24 ` Andi Kleen 1 sibling, 1 reply; 44+ messages in thread From: Huang Ying @ 2010-10-22 2:05 UTC (permalink / raw) To: Don Zickus Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter, peterz@infradead.org On Fri, 2010-10-22 at 09:49 +0800, Don Zickus wrote: > On Thu, Oct 21, 2010 at 05:45:44PM +0200, Andi Kleen wrote: > > On Thu, Oct 21, 2010 at 10:10:02AM -0400, Don Zickus wrote: > > > On Thu, Oct 21, 2010 at 01:17:31PM +0800, Huang Ying wrote: > > > > > > But there is some general rules for unknown NMI. We think unknown NMI is > > > > > > hardware error notification on all systems except systems with broken > > > > > > hardware or software bugs, stone age machines. Do you agree with that? > > > > > > > > > > Nope. In my experiences, most of our customers are still running > > > > > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or > > > > > bad firmware (at least in the bugzillas I deal with). > > > > > > > > It seems that we have different point of view for reason of unknown NMI. > > > > Should broken hardware be seen as hardware error? > > > > > > Well, do you have an alternative way to handle broken hardware? Broken > > > hardware has generated NMIs, sometimes if I am lucky SERRs. The ones that > > > generate SERRs can be filtered through a different path, but what about > > > the ones that don't? > > > > > > > Don, AFAIK you're saying the same thing as Ying: an unknown NMI is > > a hardware error. > > > > The reason the hardware does that is that it wants to tell us: > > > > "I lost track of an error. There is corrupted data somewhere in the system. > > Please stop, don't do anything that could consume that data. S.O.S." > > > > The correct answer for that is panic. > > After re-reading Huang's patch, I am starting to understand what you mean > by broken hardware. Basically you are trying to distinguish between > legacy systems that were 'broken' in the sense they would randomly send > uknown NMIs for no good reason, hence the 'Dazed and confused' messages > and hardware errors on more modern systems that say, 'Hardware error, > panicing check your BIOS for more info' (or whatever). Yes. > So Huang's patch was sort of acting like a switch. On legacy systems use > 'Dazed and confused' for unknown NMIs. Whereas on whitelisted modern > systems use a more relavant 'Check BIOS for error' message. Is that > right? In fact we want to go panic and 'check BIOS for error, contact your hardware vendor' for all systems. But as you said, there are some 'broken hardware' randomly send unknown NMIs for no good reason. So a white list is used for them. And not all pre-Nehalem machines are 'broken' in fact. > That's why you guys are complaining that registering a die_notifier would > be silly? I think whether going die_notifier or unknown_nmi_error() depends on it is general or specific for some hardware. Do you agree with that? Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-22 2:05 ` Huang Ying @ 2010-10-22 2:56 ` Don Zickus 2010-10-22 5:23 ` Huang Ying 0 siblings, 1 reply; 44+ messages in thread From: Don Zickus @ 2010-10-22 2:56 UTC (permalink / raw) To: Huang Ying Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter, peterz@infradead.org On Fri, Oct 22, 2010 at 10:05:10AM +0800, Huang Ying wrote: > > > > Well, do you have an alternative way to handle broken hardware? Broken > > > > hardware has generated NMIs, sometimes if I am lucky SERRs. The ones that > > > > generate SERRs can be filtered through a different path, but what about > > > > the ones that don't? > > > > > > > > > > Don, AFAIK you're saying the same thing as Ying: an unknown NMI is > > > a hardware error. > > > > > > The reason the hardware does that is that it wants to tell us: > > > > > > "I lost track of an error. There is corrupted data somewhere in the system. > > > Please stop, don't do anything that could consume that data. S.O.S." > > > > > > The correct answer for that is panic. > > > > After re-reading Huang's patch, I am starting to understand what you mean > > by broken hardware. Basically you are trying to distinguish between > > legacy systems that were 'broken' in the sense they would randomly send > > uknown NMIs for no good reason, hence the 'Dazed and confused' messages > > and hardware errors on more modern systems that say, 'Hardware error, > > panicing check your BIOS for more info' (or whatever). > > Yes. > > > So Huang's patch was sort of acting like a switch. On legacy systems use > > 'Dazed and confused' for unknown NMIs. Whereas on whitelisted modern > > systems use a more relavant 'Check BIOS for error' message. Is that > > right? > > In fact we want to go panic and 'check BIOS for error, contact your > hardware vendor' for all systems. But as you said, there are some > 'broken hardware' randomly send unknown NMIs for no good reason. So a > white list is used for them. And not all pre-Nehalem machines are > 'broken' in fact. Ok, I think I finally understand what you guys are trying to do. I also can't see a problem with it. Though I think the patch could probably use some clean up to make it more clear. Off the top of my head perhaps a function call that sets the variable unknown_nmi_as_hwerr instead of setting it explicitly and maybe structuring unknown_nmi() with an if-then modern-message; else legacy-message; to possibly make it obvious what the code is trying to acheive. And yeah I know not all pre-Nehalem machines are broken. I am usually sarcastic when I mention that just because being at IDF last year, I got the impression that pre-Nehalem machines were considered the dark ages. :-) I am actually curious to know how many x86_64 machines would be considered broken? > > > That's why you guys are complaining that registering a die_notifier would > > be silly? > > I think whether going die_notifier or unknown_nmi_error() depends on it > is general or specific for some hardware. Do you agree with that? Well I am hoping the only general case would be the one you want to use now. Everything else would be specific and require a die_notifier. I mean how many different ways do we want to have a printk/panic in unknown_nmi()? Cheers, Don ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-22 2:56 ` Don Zickus @ 2010-10-22 5:23 ` Huang Ying 0 siblings, 0 replies; 44+ messages in thread From: Huang Ying @ 2010-10-22 5:23 UTC (permalink / raw) To: Don Zickus Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter, peterz@infradead.org On Fri, 2010-10-22 at 10:56 +0800, Don Zickus wrote: > On Fri, Oct 22, 2010 at 10:05:10AM +0800, Huang Ying wrote: > > > > > Well, do you have an alternative way to handle broken hardware? Broken > > > > > hardware has generated NMIs, sometimes if I am lucky SERRs. The ones that > > > > > generate SERRs can be filtered through a different path, but what about > > > > > the ones that don't? > > > > > > > > > > > > > Don, AFAIK you're saying the same thing as Ying: an unknown NMI is > > > > a hardware error. > > > > > > > > The reason the hardware does that is that it wants to tell us: > > > > > > > > "I lost track of an error. There is corrupted data somewhere in the system. > > > > Please stop, don't do anything that could consume that data. S.O.S." > > > > > > > > The correct answer for that is panic. > > > > > > After re-reading Huang's patch, I am starting to understand what you mean > > > by broken hardware. Basically you are trying to distinguish between > > > legacy systems that were 'broken' in the sense they would randomly send > > > uknown NMIs for no good reason, hence the 'Dazed and confused' messages > > > and hardware errors on more modern systems that say, 'Hardware error, > > > panicing check your BIOS for more info' (or whatever). > > > > Yes. > > > > > So Huang's patch was sort of acting like a switch. On legacy systems use > > > 'Dazed and confused' for unknown NMIs. Whereas on whitelisted modern > > > systems use a more relavant 'Check BIOS for error' message. Is that > > > right? > > > > In fact we want to go panic and 'check BIOS for error, contact your > > hardware vendor' for all systems. But as you said, there are some > > 'broken hardware' randomly send unknown NMIs for no good reason. So a > > white list is used for them. And not all pre-Nehalem machines are > > 'broken' in fact. > > Ok, I think I finally understand what you guys are trying to do. I also > can't see a problem with it. Thanks. > Though I think the patch could probably use > some clean up to make it more clear. Off the top of my head perhaps a > function call that sets the variable unknown_nmi_as_hwerr instead of > setting it explicitly and maybe structuring unknown_nmi() with an if-then > modern-message; else legacy-message; to possibly make it obvious what the > code is trying to acheive. OK. Will do it. > And yeah I know not all pre-Nehalem machines are broken. I am usually > sarcastic when I mention that just because being at IDF last year, I got > the impression that pre-Nehalem machines were considered the dark ages. > :-) Haha > I am actually curious to know how many x86_64 machines would be considered > broken? Don't know either. > > > That's why you guys are complaining that registering a die_notifier would > > > be silly? > > > > I think whether going die_notifier or unknown_nmi_error() depends on it > > is general or specific for some hardware. Do you agree with that? > > Well I am hoping the only general case would be the one you want to use > now. Everything else would be specific and require a die_notifier. I > mean how many different ways do we want to have a printk/panic in > unknown_nmi()? I think this one should be the only one for general unknown NMI processing. Best Regards, Huang Ying ^ permalink raw reply [flat|nested] 44+ messages in thread
* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error 2010-10-22 1:49 ` Don Zickus 2010-10-22 2:05 ` Huang Ying @ 2010-10-22 9:24 ` Andi Kleen 1 sibling, 0 replies; 44+ messages in thread From: Andi Kleen @ 2010-10-22 9:24 UTC (permalink / raw) To: Don Zickus Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org, Robert Richter, peterz@infradead.org On Thu, Oct 21, 2010 at 09:49:55PM -0400, Don Zickus wrote: > After re-reading Huang's patch, I am starting to understand what you mean > by broken hardware. Basically you are trying to distinguish between > legacy systems that were 'broken' in the sense they would randomly send > uknown NMIs for no good reason, hence the 'Dazed and confused' messages > and hardware errors on more modern systems that say, 'Hardware error, > panicing check your BIOS for more info' (or whatever). Yes that's it. Unfortunately there are some cases where the BIOS lost it either, so the fallback has to be panic (at least for the modern boxes) > > So Huang's patch was sort of acting like a switch. On legacy systems use > 'Dazed and confused' for unknown NMIs. Whereas on whitelisted modern > systems use a more relavant 'Check BIOS for error' message. Is that > right? Yes. > > I don't think you need to worry about a lot more hardware NMI sources. > > Well until those machines dominate the marketplace, I'm stuck supporting > those pre-Nahelam boxes with customers that committed to 10 years with > last year's technology. ;-) I should clarify that the NMI model I described long predates Nehalem. If you assume 3-5 years deprecation cycles on servers it should be pretty much universal in this space. The HEDT detection was a proposed way to detect that, because most of these systems should have HEDT. The older machines still need to be supported, but it's ok to just behave the same as today on them, no need for great improvements here. -Andi -- ak@linux.intel.com -- Speaking for myself only. ^ permalink raw reply [flat|nested] 44+ messages in thread
* [PATCH -v3 6/6] x86, NMI, Remove do_nmi_callback logic 2010-10-09 6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying ` (3 preceding siblings ...) 2010-10-09 6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying @ 2010-10-09 6:49 ` Huang Ying 4 siblings, 0 replies; 44+ messages in thread From: Huang Ying @ 2010-10-09 6:49 UTC (permalink / raw) To: Don Zickus, Ingo Molnar, H. Peter Anvin Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying do_nmi_callback related logic is removed, because it is useless, just adds code complexity. unknown_nmi_panic sysctl is reserved to keep kernel ABI unchanged. Signed-off-by: Huang Ying <ying.huang@intel.com> --- arch/x86/include/asm/nmi.h | 10 +++++++++- arch/x86/kernel/apic/hw_nmi.c | 1 - arch/x86/kernel/apic/nmi.c | 29 +---------------------------- arch/x86/kernel/traps.c | 16 ++++++++++------ 4 files changed, 20 insertions(+), 36 deletions(-) --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -30,9 +30,17 @@ extern void setup_apic_nmi_watchdog(void extern void stop_apic_nmi_watchdog(void *); extern void disable_timer_nmi_watchdog(void); extern void enable_timer_nmi_watchdog(void); -extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason); extern void cpu_nmi_set_wd_enabled(void); +#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR) +extern int nmi_watchdog_tick(struct pt_regs *regs); +#else +static inline int nmi_watchdog_tick(struct pt_regs *regs) +{ + return 0; +} +#endif + extern atomic_t nmi_active; extern unsigned int nmi_watchdog; #define NMI_NONE 0 --- a/arch/x86/kernel/apic/hw_nmi.c +++ b/arch/x86/kernel/apic/hw_nmi.c @@ -100,7 +100,6 @@ void acpi_nmi_disable(void) { return; } #endif atomic_t nmi_active = ATOMIC_INIT(0); /* oprofile uses this */ EXPORT_SYMBOL(nmi_active); -int unknown_nmi_panic; void cpu_nmi_set_wd_enabled(void) { return; } void stop_apic_nmi_watchdog(void *unused) { return; } void setup_apic_nmi_watchdog(void *unused) { return; } --- a/arch/x86/kernel/apic/nmi.c +++ b/arch/x86/kernel/apic/nmi.c @@ -37,7 +37,6 @@ #include <asm/mach_traps.h> -int unknown_nmi_panic; int nmi_watchdog_enabled; /* For reliability, we're prepared to waste bits here. */ @@ -389,7 +388,7 @@ void touch_nmi_watchdog(void) EXPORT_SYMBOL(touch_nmi_watchdog); notrace __kprobes int -nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) +nmi_watchdog_tick(struct pt_regs *regs) { /* * Since current_thread_info()-> is always on the stack, and we @@ -483,23 +482,6 @@ static void disable_ioapic_nmi_watchdog( on_each_cpu(stop_apic_nmi_watchdog, NULL, 1); } -static int __init setup_unknown_nmi_panic(char *str) -{ - unknown_nmi_panic = 1; - return 1; -} -__setup("unknown_nmi_panic", setup_unknown_nmi_panic); - -static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu) -{ - unsigned char reason = get_nmi_reason(); - char buf[64]; - - sprintf(buf, "NMI received for unknown reason %02x\n", reason); - die_nmi(buf, regs, 1); /* Always panic here */ - return 0; -} - /* * proc handler for /proc/sys/kernel/nmi */ @@ -540,15 +522,6 @@ int proc_nmi_enabled(struct ctl_table *t #endif /* CONFIG_SYSCTL */ -int do_nmi_callback(struct pt_regs *regs, int cpu) -{ -#ifdef CONFIG_SYSCTL - if (unknown_nmi_panic) - return unknown_nmi_panic_callback(regs, cpu); -#endif - return 0; -} - void arch_trigger_all_cpu_backtrace(void) { int i; --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -84,6 +84,7 @@ EXPORT_SYMBOL_GPL(used_vectors); static int ignore_nmis; int unknown_nmi_as_hwerr; +int unknown_nmi_panic; /* * Prevent NMI reason port (0x61) being accessed simultaneously, can @@ -308,6 +309,13 @@ gp_in_kernel: die("general protection fault", regs, error_code); } +static int __init setup_unknown_nmi_panic(char *str) +{ + unknown_nmi_panic = 1; + return 1; +} +__setup("unknown_nmi_panic", setup_unknown_nmi_panic); + static notrace __kprobes void pci_serr_error(unsigned char reason, struct pt_regs *regs) { @@ -391,7 +399,7 @@ unknown_nmi_error(unsigned char reason, reason, smp_processor_id()); pr_emerg("Do you have a strange power saving mode enabled?\n"); - if (panic_on_unrecovered_nmi) + if (unknown_nmi_panic || panic_on_unrecovered_nmi) panic("NMI: Not continuing"); pr_emerg("Dazed and confused, but trying to continue\n"); @@ -442,12 +450,8 @@ unlock_return: if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP) return; -#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR) - if (nmi_watchdog_tick(regs, reason)) - return; - if (do_nmi_callback(regs, cpu)) + if (nmi_watchdog_tick(regs)) return; -#endif unknown_nmi_error(reason, regs); } ^ permalink raw reply [flat|nested] 44+ messages in thread
end of thread, other threads:[~2010-11-02 20:47 UTC | newest] Thread overview: 44+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2010-10-09 6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying 2010-10-09 6:49 ` [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying 2010-10-09 6:49 ` [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler Huang Ying 2010-10-11 16:13 ` Peter Zijlstra 2010-10-11 20:35 ` Don Zickus 2010-10-12 0:50 ` Huang Ying 2010-10-12 6:04 ` Peter Zijlstra 2010-10-12 6:14 ` Huang Ying 2010-10-12 6:31 ` Peter Zijlstra 2010-10-12 6:37 ` Huang Ying 2010-10-12 6:40 ` Peter Zijlstra 2010-10-12 6:45 ` Huang Ying 2010-10-12 6:49 ` Peter Zijlstra 2010-10-12 6:54 ` Huang Ying 2010-10-12 13:51 ` Andi Kleen 2010-10-12 14:15 ` Peter Zijlstra 2010-10-27 16:45 ` Don Zickus 2010-10-27 17:08 ` Peter Zijlstra 2010-10-27 18:07 ` Don Zickus 2010-11-02 17:50 ` Don Zickus 2010-11-02 18:16 ` Huang Ying 2010-11-02 19:11 ` Don Zickus 2010-11-02 20:47 ` Don Zickus 2010-10-09 6:49 ` [PATCH -v3 4/6] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying 2010-10-09 6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying 2010-10-10 14:07 ` Alan Cox 2010-10-10 14:13 ` Andi Kleen 2010-10-11 21:08 ` Don Zickus 2010-10-11 21:12 ` Don Zickus 2010-10-11 21:20 ` Don Zickus 2010-10-12 1:10 ` Huang Ying 2010-10-20 6:12 ` Huang Ying 2010-10-20 14:15 ` Don Zickus 2010-10-21 1:14 ` Huang Ying 2010-10-21 2:31 ` Don Zickus 2010-10-21 5:17 ` Huang Ying 2010-10-21 14:10 ` Don Zickus 2010-10-21 15:45 ` Andi Kleen 2010-10-22 1:49 ` Don Zickus 2010-10-22 2:05 ` Huang Ying 2010-10-22 2:56 ` Don Zickus 2010-10-22 5:23 ` Huang Ying 2010-10-22 9:24 ` Andi Kleen 2010-10-09 6:49 ` [PATCH -v3 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox