[PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR
@ 2010-10-09  6:49 Huang Ying
  2010-10-09  6:49 ` [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
                   ` (4 more replies)
  0 siblings, 5 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-09  6:49 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying

Replace the NMI related magic numbers with symbol constants.

memory parity error is only valid for IBM PC-AT, newer machine use 7
bit (0x80) of 0x61 port for PCI SERR. While memory error is usually
reported via MCE. So corresponding function name and kernel log string
is changed.

But on some machines, PCI SERR line is still used to report memory
errors. This is used by EDAC, so corresponding EDAC call is reserved.


v3:

- Merge adding symbol constants and renaming patches

v2:

- EDAC call in pci_serr_error is reserved.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/mach_traps.h |   12 ++++++++
 arch/x86/kernel/traps.c           |   51 +++++++++++++++++++-------------------
 2 files changed, 37 insertions(+), 26 deletions(-)

--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -7,9 +7,19 @@
 
 #include <asm/mc146818rtc.h>
 
+#define NMI_REASON_PORT		0x61
+
+#define NMI_REASON_SERR		0x80
+#define NMI_REASON_IOCHK	0x40
+#define NMI_REASON_MASK		(NMI_REASON_SERR | NMI_REASON_IOCHK)
+
+#define NMI_REASON_CLEAR_SERR	0x04
+#define NMI_REASON_CLEAR_IOCHK	0x08
+#define NMI_REASON_CLEAR_MASK	0x0f
+
 static inline unsigned char get_nmi_reason(void)
 {
-	return inb(0x61);
+	return inb(NMI_REASON_PORT);
 }
 
 static inline void reassert_nmi(void)
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -301,15 +301,15 @@ gp_in_kernel:
 }
 
 static notrace __kprobes void
-mem_parity_error(unsigned char reason, struct pt_regs *regs)
+pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
-	printk(KERN_EMERG
-		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-			reason, smp_processor_id());
-
-	printk(KERN_EMERG
-		"You have some hardware problem, likely on the PCI bus.\n");
+	pr_emerg("NMI: PCI system error (SERR) for reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
 
+	/*
+	 * On some machines, PCI SERR line is used to report memory
+	 * errors. EDAC makes use of it.
+	 */
 #if defined(CONFIG_EDAC)
 	if (edac_handler_set()) {
 		edac_atomic_assert_error();
@@ -320,11 +320,11 @@ mem_parity_error(unsigned char reason, s
 	if (panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
-	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+	pr_emerg("Dazed and confused, but trying to continue\n");
 
-	/* Clear and disable the memory parity error line. */
-	reason = (reason & 0xf) | 4;
-	outb(reason, 0x61);
+	/* Clear and disable the PCI SERR error line. */
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
+	outb(reason, NMI_REASON_PORT);
 }
 
 static notrace __kprobes void
@@ -332,22 +332,24 @@ io_check_error(unsigned char reason, str
 {
 	unsigned long i;
 
-	printk(KERN_EMERG "NMI: IOCK error (debug interrupt?)\n");
+	pr_emerg(
+	"NMI: IOCK error (debug interrupt?) for reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
 	show_registers(regs);
 
 	if (panic_on_io_nmi)
 		panic("NMI IOCK error: Not continuing");
 
 	/* Re-enable the IOCK line, wait for a few seconds */
-	reason = (reason & 0xf) | 8;
-	outb(reason, 0x61);
+	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
 
 	i = 2000;
 	while (--i)
 		udelay(1000);
 
-	reason &= ~8;
-	outb(reason, 0x61);
+	reason &= ~NMI_REASON_CLEAR_IOCHK;
+	outb(reason, NMI_REASON_PORT);
 }
 
 static notrace __kprobes void
@@ -366,15 +368,14 @@ unknown_nmi_error(unsigned char reason,
 		return;
 	}
 #endif
-	printk(KERN_EMERG
-		"Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
-			reason, smp_processor_id());
+	pr_emerg("Uhhuh. NMI received for unknown reason %02x on CPU %d.\n",
+		 reason, smp_processor_id());
 
-	printk(KERN_EMERG "Do you have a strange power saving mode enabled?\n");
+	pr_emerg("Do you have a strange power saving mode enabled?\n");
 	if (panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
-	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
+	pr_emerg("Dazed and confused, but trying to continue\n");
 }
 
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
@@ -388,7 +389,7 @@ static notrace __kprobes void default_do
 	if (!cpu)
 		reason = get_nmi_reason();
 
-	if (!(reason & 0xc0)) {
+	if (!(reason & NMI_REASON_MASK)) {
 		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
 								== NOTIFY_STOP)
 			return;
@@ -418,9 +419,9 @@ static notrace __kprobes void default_do
 		return;
 
 	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & 0x80)
-		mem_parity_error(reason, regs);
-	if (reason & 0x40)
+	if (reason & NMI_REASON_SERR)
+		pci_serr_error(reason, regs);
+	if (reason & NMI_REASON_IOCHK)
 		io_check_error(reason, regs);
 #ifdef CONFIG_X86_32
 	/*

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay
  2010-10-09  6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying
@ 2010-10-09  6:49 ` Huang Ying
  2010-10-09  6:49 ` [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler Huang Ying
                   ` (3 subsequent siblings)
  4 siblings, 0 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-09  6:49 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying

Prevent the long delay in io_check_error make NMI watchdog timeout.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kernel/traps.c |    8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -344,9 +344,11 @@ io_check_error(unsigned char reason, str
 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
 	outb(reason, NMI_REASON_PORT);
 
-	i = 2000;
-	while (--i)
-		udelay(1000);
+	i = 20000;
+	while (--i) {
+		touch_nmi_watchdog();
+		udelay(100);
+	}
 
 	reason &= ~NMI_REASON_CLEAR_IOCHK;
 	outb(reason, NMI_REASON_PORT);

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-09  6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying
  2010-10-09  6:49 ` [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
@ 2010-10-09  6:49 ` Huang Ying
  2010-10-11 16:13   ` Peter Zijlstra
  2010-10-09  6:49 ` [PATCH -v3 4/6] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying
                   ` (2 subsequent siblings)
  4 siblings, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-09  6:49 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying

The original NMI handler is quite outdated in many aspects. This patch
try to fix it.

The order to process the NMI sources are changed as follow:

notify_die(DIE_NMI_IPI);
notify_die(DIE_NMI);
/* process io port 0x61 */
nmi_watchdog_touch();
unknown_nmi();

DIE_NMI_IPI is used to process CPU specific NMI sources, such as perf
event, oprofile, crash IPI, etc. While DIE_NMI is used to process
non-CPU-specific NMI sources, such as APEI (ACPI Platform Error
Interface) GHES (Generic Hardware Error Source), etc. Non-CPU-specific
NMI sources can be processed on any CPU,

DIE_NMI_IPI must be processed before DIE_NMI. For example, perf event
trigger a NMI on CPU 1, at the same time, APEI GHES trigger another
NMI on CPU 0. If DIE_NMI is processed before DIE_NMI_IPI, it is
possible that APEI GHES is processed on CPU 1, while unknown NMI is
gotten on CPU 0.

In this new order of processing, performance sensitive NMI sources
such as oprofile or perf event will have better performance because
the time consuming IO port reading is done after them.

Only one NMI is eaten for each NMI handler call, even for PCI SERR and
IOCHK NMIs. Because one NMI should be raised for each of them, eating
too many NMI will cause unnecessary unknown NMI.

The die value used in NMI sources are fixed accordingly.

The NMI handler in the patch is designed by Andi Kleen.

v3:

- Make DIE_NMI and DIE_NMI_UNKNOWN work in more traditional way.

v2:

- Split process NMI reason (0x61) on non-BSP into another patch

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kernel/cpu/perf_event.c  |    1 
 arch/x86/kernel/traps.c           |   78 +++++++++++++++++++-------------------
 arch/x86/oprofile/nmi_int.c       |    1 
 arch/x86/oprofile/nmi_timer_int.c |    2 
 drivers/char/ipmi/ipmi_watchdog.c |    2 
 drivers/watchdog/hpwdt.c          |    2 
 6 files changed, 43 insertions(+), 43 deletions(-)

--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1245,7 +1245,6 @@ perf_event_nmi_handler(struct notifier_b
 		return NOTIFY_DONE;
 
 	switch (cmd) {
-	case DIE_NMI:
 	case DIE_NMI_IPI:
 		break;
 	case DIE_NMIUNKNOWN:
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -385,53 +385,55 @@ static notrace __kprobes void default_do
 	unsigned char reason = 0;
 	int cpu;
 
-	cpu = smp_processor_id();
+	/*
+	 * CPU-specific NMI must be processed before non-CPU-specific
+	 * NMI, otherwise we may lose it, because the CPU-specific
+	 * NMI can not be detected/processed on other CPUs.
+	 */
+
+	/*
+	 * CPU-specific NMI: send to specific CPU or NMI sources must
+	 * be processed on specific CPU
+	 */
+	if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
+	    == NOTIFY_STOP)
+		return;
 
+	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
+	cpu = smp_processor_id();
 	/* Only the BSP gets external NMIs from the system. */
-	if (!cpu)
+	if (!cpu) {
 		reason = get_nmi_reason();
-
-	if (!(reason & NMI_REASON_MASK)) {
-		if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, reason, 2, SIGINT)
-								== NOTIFY_STOP)
-			return;
-
-#ifdef CONFIG_X86_LOCAL_APIC
-		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-							== NOTIFY_STOP)
+		if (reason & NMI_REASON_MASK) {
+			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+			    == NOTIFY_STOP)
+				return;
+			if (reason & NMI_REASON_SERR)
+				pci_serr_error(reason, regs);
+			else if (reason & NMI_REASON_IOCHK)
+				io_check_error(reason, regs);
+#ifdef CONFIG_X86_32
+			/*
+			 * Reassert NMI in case it became active
+			 * meanwhile as it's edge-triggered:
+			 */
+			reassert_nmi();
+#endif
 			return;
+		}
+	}
 
-#ifndef CONFIG_LOCKUP_DETECTOR
-		/*
-		 * Ok, so this is none of the documented NMI sources,
-		 * so it must be the NMI watchdog.
-		 */
-		if (nmi_watchdog_tick(regs, reason))
-			return;
-		if (!do_nmi_callback(regs, cpu))
-#endif /* !CONFIG_LOCKUP_DETECTOR */
-			unknown_nmi_error(reason, regs);
-#else
-		unknown_nmi_error(reason, regs);
-#endif
+	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
+		return;
 
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
+	if (nmi_watchdog_tick(regs, reason))
 		return;
-	}
-	if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP)
+	if (do_nmi_callback(regs, cpu))
 		return;
-
-	/* AK: following checks seem to be broken on modern chipsets. FIXME */
-	if (reason & NMI_REASON_SERR)
-		pci_serr_error(reason, regs);
-	if (reason & NMI_REASON_IOCHK)
-		io_check_error(reason, regs);
-#ifdef CONFIG_X86_32
-	/*
-	 * Reassert NMI in case it became active meanwhile
-	 * as it's edge-triggered:
-	 */
-	reassert_nmi();
 #endif
+
+	unknown_nmi_error(reason, regs);
 }
 
 dotraplinkage notrace __kprobes void
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -64,7 +64,6 @@ static int profile_exceptions_notify(str
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI:
 	case DIE_NMI_IPI:
 		if (ctr_running)
 			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -25,7 +25,7 @@ static int profile_timer_exceptions_noti
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI:
+	case DIE_NMI_IPI:
 		oprofile_add_sample(args->regs, 0);
 		ret = NOTIFY_STOP;
 		break;
--- a/drivers/char/ipmi/ipmi_watchdog.c
+++ b/drivers/char/ipmi/ipmi_watchdog.c
@@ -1080,7 +1080,7 @@ ipmi_nmi(struct notifier_block *self, un
 {
 	struct die_args *args = data;
 
-	if (val != DIE_NMI)
+	if (val != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	/* Hack, if it's a memory or I/O error, ignore it. */
--- a/drivers/watchdog/hpwdt.c
+++ b/drivers/watchdog/hpwdt.c
@@ -469,7 +469,7 @@ static int hpwdt_pretimeout(struct notif
 	unsigned long rom_pl;
 	static int die_nmi_called;
 
-	if (ulReason != DIE_NMI && ulReason != DIE_NMI_IPI)
+	if (ulReason != DIE_NMIUNKNOWN)
 		goto out;
 
 	if (!hpwdt_nmi_decoding)

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-09  6:49 ` [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler Huang Ying
@ 2010-10-11 16:13   ` Peter Zijlstra
  2010-10-11 20:35     ` Don Zickus
  2010-10-12  0:50     ` Huang Ying
  0 siblings, 2 replies; 44+ messages in thread
From: Peter Zijlstra @ 2010-10-11 16:13 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen,
	Robert Richter

On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote:
> notify_die(DIE_NMI_IPI);
> notify_die(DIE_NMI);
> /* process io port 0x61 */
> nmi_watchdog_touch();
> unknown_nmi(); 

Why keep NMI_IPI? What the heck is it for?

I'd like to see:

 DIE_NMI
 DIE_NMI_UNKNOWN

In DIE_NMI we walk the chain and deal with NMIs, in DIE_NMI_UNKNOWN we
try to consume extra NMIs where possible (like the much discussed extra
PMU interrupts).



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-11 16:13   ` Peter Zijlstra
@ 2010-10-11 20:35     ` Don Zickus
  2010-10-12  0:50     ` Huang Ying
  1 sibling, 0 replies; 44+ messages in thread
From: Don Zickus @ 2010-10-11 20:35 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen,
	Robert Richter

On Mon, Oct 11, 2010 at 06:13:55PM +0200, Peter Zijlstra wrote:
> On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote:
> > notify_die(DIE_NMI_IPI);
> > notify_die(DIE_NMI);
> > /* process io port 0x61 */
> > nmi_watchdog_touch();
> > unknown_nmi(); 
> 
> Why keep NMI_IPI? What the heck is it for?
> 
> I'd like to see:
> 
>  DIE_NMI
>  DIE_NMI_UNKNOWN
> 
> In DIE_NMI we walk the chain and deal with NMIs, in DIE_NMI_UNKNOWN we
> try to consume extra NMIs where possible (like the much discussed extra
> PMU interrupts).

Yeah, there was another much discussed thread about creating an NMI
notifier chain (instead of using the die_chain).  And then re-arrange
handlers according to a priority scheme.

I wouldn't mind have Ingo commit most of this patchset to a temporary
work-in-progress branch and have us build more changes on top.  A lot of
these changes are a good stepping stone towards simplifying things.

Cheers,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-11 16:13   ` Peter Zijlstra
  2010-10-11 20:35     ` Don Zickus
@ 2010-10-12  0:50     ` Huang Ying
  2010-10-12  6:04       ` Peter Zijlstra
  1 sibling, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-12  0:50 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote:
> On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote:
> > notify_die(DIE_NMI_IPI);
> > notify_die(DIE_NMI);
> > /* process io port 0x61 */
> > nmi_watchdog_touch();
> > unknown_nmi(); 
> 
> Why keep NMI_IPI? What the heck is it for?

DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf
NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as
NMI notification from source bridge.

The order between these two is important. So we use two die value to
enforce the order.

> I'd like to see:
> 
>  DIE_NMI
>  DIE_NMI_UNKNOWN
> 
> In DIE_NMI we walk the chain and deal with NMIs, in DIE_NMI_UNKNOWN we
> try to consume extra NMIs where possible (like the much discussed extra
> PMU interrupts).

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  0:50     ` Huang Ying
@ 2010-10-12  6:04       ` Peter Zijlstra
  2010-10-12  6:14         ` Huang Ying
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Zijlstra @ 2010-10-12  6:04 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 08:50 +0800, Huang Ying wrote:
> On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote:
> > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote:
> > > notify_die(DIE_NMI_IPI);
> > > notify_die(DIE_NMI);
> > > /* process io port 0x61 */
> > > nmi_watchdog_touch();
> > > unknown_nmi(); 
> > 
> > Why keep NMI_IPI? What the heck is it for?
> 
> DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf
> NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as
> NMI notification from source bridge.
> 
> The order between these two is important. So we use two die value to
> enforce the order.

But you can't know about that, there is no reason field to distinguish
between these cases, so you might as well fold it into a single notifier
chain and be done with it.

There is no good reason to call two chains when one is enough.

Also, the IPI name is terrible, its not IPI related at all. Please kill
the thing.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  6:04       ` Peter Zijlstra
@ 2010-10-12  6:14         ` Huang Ying
  2010-10-12  6:31           ` Peter Zijlstra
  0 siblings, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-12  6:14 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 14:04 +0800, Peter Zijlstra wrote:
> On Tue, 2010-10-12 at 08:50 +0800, Huang Ying wrote:
> > On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote:
> > > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote:
> > > > notify_die(DIE_NMI_IPI);
> > > > notify_die(DIE_NMI);
> > > > /* process io port 0x61 */
> > > > nmi_watchdog_touch();
> > > > unknown_nmi(); 
> > > 
> > > Why keep NMI_IPI? What the heck is it for?
> > 
> > DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf
> > NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as
> > NMI notification from source bridge.
> > 
> > The order between these two is important. So we use two die value to
> > enforce the order.
> 
> But you can't know about that, there is no reason field to distinguish
> between these cases, so you might as well fold it into a single notifier
> chain and be done with it.

NMI users know that. Such as perf uses CPU-specific NMI, while APEI GHES
uses non-CPU-specific NMI. Different users expect different die values,
such as perf expects DIE_NMI_IPI, while APEI GHES expects DIE_NMI, so
that perf can be checked before APEI GHES.

> There is no good reason to call two chains when one is enough.
> 
> Also, the IPI name is terrible, its not IPI related at all. Please kill
> the thing.

I agree with you that IPI is not a good name. Can you suggest a better
name?

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  6:14         ` Huang Ying
@ 2010-10-12  6:31           ` Peter Zijlstra
  2010-10-12  6:37             ` Huang Ying
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Zijlstra @ 2010-10-12  6:31 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 14:14 +0800, Huang Ying wrote:
> On Tue, 2010-10-12 at 14:04 +0800, Peter Zijlstra wrote:
> > On Tue, 2010-10-12 at 08:50 +0800, Huang Ying wrote:
> > > On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote:
> > > > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote:
> > > > > notify_die(DIE_NMI_IPI);
> > > > > notify_die(DIE_NMI);
> > > > > /* process io port 0x61 */
> > > > > nmi_watchdog_touch();
> > > > > unknown_nmi(); 
> > > > 
> > > > Why keep NMI_IPI? What the heck is it for?
> > > 
> > > DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf
> > > NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as
> > > NMI notification from source bridge.
> > > 
> > > The order between these two is important. So we use two die value to
> > > enforce the order.
> > 
> > But you can't know about that, there is no reason field to distinguish
> > between these cases, so you might as well fold it into a single notifier
> > chain and be done with it.
> 
> NMI users know that. Such as perf uses CPU-specific NMI, while APEI GHES
> uses non-CPU-specific NMI. Different users expect different die values,
> such as perf expects DIE_NMI_IPI, while APEI GHES expects DIE_NMI, so
> that perf can be checked before APEI GHES.

GAAHHH, so why can't they live on a single notifier list? Its got
priorities to deal with that?

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  6:31           ` Peter Zijlstra
@ 2010-10-12  6:37             ` Huang Ying
  2010-10-12  6:40               ` Peter Zijlstra
  0 siblings, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-12  6:37 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 14:31 +0800, Peter Zijlstra wrote:
> On Tue, 2010-10-12 at 14:14 +0800, Huang Ying wrote:
> > On Tue, 2010-10-12 at 14:04 +0800, Peter Zijlstra wrote:
> > > On Tue, 2010-10-12 at 08:50 +0800, Huang Ying wrote:
> > > > On Tue, 2010-10-12 at 00:13 +0800, Peter Zijlstra wrote:
> > > > > On Sat, 2010-10-09 at 14:49 +0800, Huang Ying wrote:
> > > > > > notify_die(DIE_NMI_IPI);
> > > > > > notify_die(DIE_NMI);
> > > > > > /* process io port 0x61 */
> > > > > > nmi_watchdog_touch();
> > > > > > unknown_nmi(); 
> > > > > 
> > > > > Why keep NMI_IPI? What the heck is it for?
> > > > 
> > > > DIE_NMI_IPI is used for CPU-specific or CPU-local NMIs, such as perf
> > > > NMI. While DIE_NMI is used for non-CPU-specific or global NMIs, such as
> > > > NMI notification from source bridge.
> > > > 
> > > > The order between these two is important. So we use two die value to
> > > > enforce the order.
> > > 
> > > But you can't know about that, there is no reason field to distinguish
> > > between these cases, so you might as well fold it into a single notifier
> > > chain and be done with it.
> > 
> > NMI users know that. Such as perf uses CPU-specific NMI, while APEI GHES
> > uses non-CPU-specific NMI. Different users expect different die values,
> > such as perf expects DIE_NMI_IPI, while APEI GHES expects DIE_NMI, so
> > that perf can be checked before APEI GHES.
> 
> GAAHHH, so why can't they live on a single notifier list? Its got
> priorities to deal with that?

It is possible to use priority to deal with the order. I just think two
die_value make order more explicit.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  6:37             ` Huang Ying
@ 2010-10-12  6:40               ` Peter Zijlstra
  2010-10-12  6:45                 ` Huang Ying
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Zijlstra @ 2010-10-12  6:40 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 14:37 +0800, Huang Ying wrote:
> I just think two die_value make order more explicit. 

I think it sucks, you get to iterate the list twice for no gain.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  6:40               ` Peter Zijlstra
@ 2010-10-12  6:45                 ` Huang Ying
  2010-10-12  6:49                   ` Peter Zijlstra
  0 siblings, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-12  6:45 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 14:40 +0800, Peter Zijlstra wrote:
> On Tue, 2010-10-12 at 14:37 +0800, Huang Ying wrote:
> > I just think two die_value make order more explicit. 
> 
> I think it sucks, you get to iterate the list twice for no gain.

The gain is the code readability. And maybe we can split the list into 2
in the future.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  6:45                 ` Huang Ying
@ 2010-10-12  6:49                   ` Peter Zijlstra
  2010-10-12  6:54                     ` Huang Ying
  2010-10-12 13:51                     ` Andi Kleen
  0 siblings, 2 replies; 44+ messages in thread
From: Peter Zijlstra @ 2010-10-12  6:49 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 14:45 +0800, Huang Ying wrote:
> On Tue, 2010-10-12 at 14:40 +0800, Peter Zijlstra wrote:
> > On Tue, 2010-10-12 at 14:37 +0800, Huang Ying wrote:
> > > I just think two die_value make order more explicit. 
> > 
> > I think it sucks, you get to iterate the list twice for no gain.
> 
> The gain is the code readability. And maybe we can split the list into 2
> in the future.

No, its not making sense, there's only one event source - the NMI, it
doesn't make any sense what so ever to then artificially split it in
two.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  6:49                   ` Peter Zijlstra
@ 2010-10-12  6:54                     ` Huang Ying
  2010-10-12 13:51                     ` Andi Kleen
  1 sibling, 0 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-12  6:54 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 14:49 +0800, Peter Zijlstra wrote:
> On Tue, 2010-10-12 at 14:45 +0800, Huang Ying wrote:
> > On Tue, 2010-10-12 at 14:40 +0800, Peter Zijlstra wrote:
> > > On Tue, 2010-10-12 at 14:37 +0800, Huang Ying wrote:
> > > > I just think two die_value make order more explicit. 
> > > 
> > > I think it sucks, you get to iterate the list twice for no gain.
> > 
> > The gain is the code readability. And maybe we can split the list into 2
> > in the future.
> 
> No, its not making sense, there's only one event source - the NMI, it
> doesn't make any sense what so ever to then artificially split it in
> two.

They are two steps to process NMI.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12  6:49                   ` Peter Zijlstra
  2010-10-12  6:54                     ` Huang Ying
@ 2010-10-12 13:51                     ` Andi Kleen
  2010-10-12 14:15                       ` Peter Zijlstra
  1 sibling, 1 reply; 44+ messages in thread
From: Andi Kleen @ 2010-10-12 13:51 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter

> No, its not making sense, there's only one event source - the NMI, it
> doesn't make any sense what so ever to then artificially split it in
> two.

I can see that "unknown NMI" is a different kind of event than
"NMI event triggered"

e.g. the debugger would only hook into "unknown NMI"

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12 13:51                     ` Andi Kleen
@ 2010-10-12 14:15                       ` Peter Zijlstra
  2010-10-27 16:45                         ` Don Zickus
  0 siblings, 1 reply; 44+ messages in thread
From: Peter Zijlstra @ 2010-10-12 14:15 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter

On Tue, 2010-10-12 at 15:51 +0200, Andi Kleen wrote:
> > No, its not making sense, there's only one event source - the NMI, it
> > doesn't make any sense what so ever to then artificially split it in
> > two.
> 
> I can see that "unknown NMI" is a different kind of event than
> "NMI event triggered"
> 
> e.g. the debugger would only hook into "unknown NMI"

Sure, I'm not arguing about DIE_NMI vs DIE_NMI_UNKNOWN. I simply don't
see the point of DIE_NMI_IPI.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-12 14:15                       ` Peter Zijlstra
@ 2010-10-27 16:45                         ` Don Zickus
  2010-10-27 17:08                           ` Peter Zijlstra
  0 siblings, 1 reply; 44+ messages in thread
From: Don Zickus @ 2010-10-27 16:45 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter

On Tue, Oct 12, 2010 at 04:15:27PM +0200, Peter Zijlstra wrote:
> On Tue, 2010-10-12 at 15:51 +0200, Andi Kleen wrote:
> > > No, its not making sense, there's only one event source - the NMI, it
> > > doesn't make any sense what so ever to then artificially split it in
> > > two.
> > 
> > I can see that "unknown NMI" is a different kind of event than
> > "NMI event triggered"
> > 
> > e.g. the debugger would only hook into "unknown NMI"
> 
> Sure, I'm not arguing about DIE_NMI vs DIE_NMI_UNKNOWN. I simply don't
> see the point of DIE_NMI_IPI.

I threw together a patch last week and did some panic and perf testing on
it.  Things still seemed to work correctly.  This patch goes on top of the
3/6 patch in this thread.

I assume this is sorta of what Peter was looking for.

Cheers,
Don


diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca8..e28ec43 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -18,7 +18,6 @@ enum die_val {
 	DIE_TRAP,
 	DIE_GPF,
 	DIE_CALL,
-	DIE_NMI_IPI,
 	DIE_PAGE_FAULT,
 	DIE_NMIUNKNOWN,
 };
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index cefd694..7f9e3ac 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -53,7 +53,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
 
 	switch (cmd) {
 	case DIE_NMI:
-	case DIE_NMI_IPI:
 		break;
 
 	default:
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index f744f54..1f657a2 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -602,7 +602,7 @@ void __cpuinit uv_cpu_init(void)
  */
 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
-	if (reason != DIE_NMI_IPI)
+	if (reason != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	if (in_crash_kexec)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7..7e0dba6 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -83,7 +83,7 @@ static int mce_raise_notify(struct notifier_block *self,
 	struct die_args *args = (struct die_args *)data;
 	int cpu = smp_processor_id();
 	struct mce *m = &__get_cpu_var(injectm);
-	if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
+	if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
 		return NOTIFY_DONE;
 	cpumask_clear_cpu(cpu, mce_inject_cpumask);
 	if (m->inject_flags & MCJ_EXCEPTION)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index da98b6d..ccd3b6d 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 		return NOTIFY_DONE;
 
 	switch (cmd) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		break;
 	case DIE_NMIUNKNOWN:
 		this_nmi = percpu_read(irq_stat.__nmi_count);
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 852b819..805c5ab 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
 		}
 		return NOTIFY_DONE;
 
-	case DIE_NMI_IPI:
-		/* Just ignore, we will handle the roundup on DIE_NMI. */
-		return NOTIFY_DONE;
-
 	case DIE_NMIUNKNOWN:
 		if (was_in_debug_nmi[raw_smp_processor_id()]) {
 			was_in_debug_nmi[raw_smp_processor_id()] = 0;
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e3af342..fd7dc85 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -753,7 +753,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 {
 	int cpu;
 
-	if (val != DIE_NMI_IPI)
+	if (val != DIE_NMI)
 		return NOTIFY_OK;
 
 	cpu = raw_smp_processor_id();
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d8acab3..9e56e3d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 	 * CPU-specific NMI: send to specific CPU or NMI sources must
 	 * be processed on specific CPU
 	 */
-	if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
-	    == NOTIFY_STOP)
+	if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
@@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 	if (!cpu) {
 		reason = get_nmi_reason();
 		if (reason & NMI_REASON_MASK) {
-			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-			    == NOTIFY_STOP)
-				return;
 			if (reason & NMI_REASON_SERR)
 				pci_serr_error(reason, regs);
 			else if (reason & NMI_REASON_IOCHK)
@@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		}
 	}
 
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
-		return;
-
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
 	if (nmi_watchdog_tick(regs, reason))
 		return;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 57f01bb..dd0039c 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		if (ctr_running)
 			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
 		else if (!nmi_enabled)
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index ab72a21..e3ecb71 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		oprofile_add_sample(args->regs, 0);
 		ret = NOTIFY_STOP;
 		break;

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-27 16:45                         ` Don Zickus
@ 2010-10-27 17:08                           ` Peter Zijlstra
  2010-10-27 18:07                             ` Don Zickus
  2010-11-02 17:50                             ` Don Zickus
  0 siblings, 2 replies; 44+ messages in thread
From: Peter Zijlstra @ 2010-10-27 17:08 UTC (permalink / raw)
  To: Don Zickus
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter

On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote:
> I assume this is sorta of what Peter was looking for.

Yeah close, except the prio field of the various notification blocks
need to get adjusted to preserve semantics.



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-27 17:08                           ` Peter Zijlstra
@ 2010-10-27 18:07                             ` Don Zickus
  2010-11-02 17:50                             ` Don Zickus
  1 sibling, 0 replies; 44+ messages in thread
From: Don Zickus @ 2010-10-27 18:07 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter

On Wed, Oct 27, 2010 at 07:08:44PM +0200, Peter Zijlstra wrote:
> On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote:
> > I assume this is sorta of what Peter was looking for.
> 
> Yeah close, except the prio field of the various notification blocks
> need to get adjusted to preserve semantics.

Heh.  Of course.  Let me add that to.

Thanks,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-10-27 17:08                           ` Peter Zijlstra
  2010-10-27 18:07                             ` Don Zickus
@ 2010-11-02 17:50                             ` Don Zickus
  2010-11-02 18:16                               ` Huang Ying
  1 sibling, 1 reply; 44+ messages in thread
From: Don Zickus @ 2010-11-02 17:50 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter

On Wed, Oct 27, 2010 at 07:08:44PM +0200, Peter Zijlstra wrote:
> On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote:
> > I assume this is sorta of what Peter was looking for.
> 
> Yeah close, except the prio field of the various notification blocks
> need to get adjusted to preserve semantics.

Here is my next crack at it.  I added some global defines to make it more
clear what the priorities are.

Again the intent was to roll something like this patch into Huang's
original patch.  This would help simplify the notifier chain and add
priorities to maintain the original relationships.

Cheers,
Don


diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca8..e28ec43 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -18,7 +18,6 @@ enum die_val {
 	DIE_TRAP,
 	DIE_GPF,
 	DIE_CALL,
-	DIE_NMI_IPI,
 	DIE_PAGE_FAULT,
 	DIE_NMIUNKNOWN,
 };
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f8..cfb6156 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -68,6 +68,26 @@ static inline int nmi_watchdog_active(void)
 }
 #endif
 
+/*
+ * Define some priorities for the nmi notifier call chain.
+ *
+ * Create a local nmi bit that has a higher priority than
+ * external nmis, because the local ones are more frequent.
+ * 
+ * Also setup some default high/normal/low settings for
+ * subsystems to registers with.  Using 4 bits to seperate
+ * the priorities.  This can go alot higher if needed be.
+ */
+
+#define NMI_LOCAL_SHIFT		16	/* randomly picked */
+#define NMI_LOCAL_BIT		(1ULL << NMI_LOCAL_SHIFT)
+#define NMI_HIGH_PRIOR		(1ULL << 8)
+#define NMI_NORMAL_PRIOR	(1ULL << 4)
+#define NMI_LOW_PRIOR		(1ULL << 0)
+#define NMI_LOCAL_HIGH_PRIOR	(NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
+#define NMI_LOCAL_NORMAL_PRIOR	(NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
+#define NMI_LOCAL_LOW_PRIOR	(NMI_LOCAL_BIT | NMI_LOW_PRIOR)
+
 void lapic_watchdog_stop(void);
 int lapic_watchdog_init(unsigned nmi_hz);
 int lapic_wd_event(unsigned nmi_hz);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 07a837d..44ff8c9 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -67,7 +67,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
 
 	switch (cmd) {
 	case DIE_NMI:
-	case DIE_NMI_IPI:
 		break;
 
 	default:
@@ -95,7 +94,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
 static __read_mostly struct notifier_block backtrace_notifier = {
 	.notifier_call          = arch_trigger_all_cpu_backtrace_handler,
 	.next                   = NULL,
-	.priority               = 1
+	.priority               = NMI_LOCAL_LOW_PRIOR,
 };
 
 static int __init register_trigger_all_cpu_backtrace(void)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index be6f9c4..e6c6294 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -605,7 +605,7 @@ void __cpuinit uv_cpu_init(void)
  */
 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
-	if (reason != DIE_NMI_IPI)
+	if (reason != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	if (in_crash_kexec)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7..a779719 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
 #include <linux/gfp.h>
 #include <asm/mce.h>
 #include <asm/apic.h>
+#include <asm/nmi.h>
 
 /* Update fake mce registers on current CPU. */
 static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
 	struct die_args *args = (struct die_args *)data;
 	int cpu = smp_processor_id();
 	struct mce *m = &__get_cpu_var(injectm);
-	if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
+	if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
 		return NOTIFY_DONE;
 	cpumask_clear_cpu(cpu, mce_inject_cpumask);
 	if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
 
 static struct notifier_block mce_raise_nb = {
 	.notifier_call = mce_raise_notify,
-	.priority = 1000,
+	.priority = NMI_LOCAL_NORMAL_PRIOR,
 };
 
 /* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index da98b6d..55a3797 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 		return NOTIFY_DONE;
 
 	switch (cmd) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		break;
 	case DIE_NMIUNKNOWN:
 		this_nmi = percpu_read(irq_stat.__nmi_count);
@@ -1276,7 +1276,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
 	.notifier_call		= perf_event_nmi_handler,
 	.next			= NULL,
-	.priority		= 1
+	.priority		= NMI_LOCAL_LOW_PRIOR,
 };
 
 static struct event_constraint unconstrained;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 852b819..020d052 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
 		}
 		return NOTIFY_DONE;
 
-	case DIE_NMI_IPI:
-		/* Just ignore, we will handle the roundup on DIE_NMI. */
-		return NOTIFY_DONE;
-
 	case DIE_NMIUNKNOWN:
 		if (was_in_debug_nmi[raw_smp_processor_id()]) {
 			was_in_debug_nmi[raw_smp_processor_id()] = 0;
@@ -604,7 +600,7 @@ static struct notifier_block kgdb_notifier = {
 	/*
 	 * Lowest-prio notifier priority, we want to be notified last:
 	 */
-	.priority	= -INT_MAX,
+	.priority	= NMI_LOCAL_LOW_PRIOR,
 };
 
 /**
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e3af342..eabbde6 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -18,6 +18,7 @@
 #include <asm/pci_x86.h>
 #include <asm/virtext.h>
 #include <asm/cpu.h>
+#include <asm/nmi.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/ctype.h>
@@ -753,7 +754,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 {
 	int cpu;
 
-	if (val != DIE_NMI_IPI)
+	if (val != DIE_NMI)
 		return NOTIFY_OK;
 
 	cpu = raw_smp_processor_id();
@@ -784,6 +785,8 @@ static void smp_send_nmi_allbutself(void)
 
 static struct notifier_block crash_nmi_nb = {
 	.notifier_call = crash_nmi_callback,
+	/* we want to be the first one called */
+	.priority = NMI_LOCAL_HIGH_PRIOR+1,
 };
 
 /* Halt all other CPUs, calling the specified function on each of them
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d8acab3..9e56e3d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 	 * CPU-specific NMI: send to specific CPU or NMI sources must
 	 * be processed on specific CPU
 	 */
-	if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
-	    == NOTIFY_STOP)
+	if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
@@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 	if (!cpu) {
 		reason = get_nmi_reason();
 		if (reason & NMI_REASON_MASK) {
-			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-			    == NOTIFY_STOP)
-				return;
 			if (reason & NMI_REASON_SERR)
 				pci_serr_error(reason, regs);
 			else if (reason & NMI_REASON_IOCHK)
@@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		}
 	}
 
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
-		return;
-
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
 	if (nmi_watchdog_tick(regs, reason))
 		return;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 57f01bb..43b4f35 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		if (ctr_running)
 			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
 		else if (!nmi_enabled)
@@ -360,7 +360,7 @@ static void nmi_cpu_setup(void *dummy)
 static struct notifier_block profile_exceptions_nb = {
 	.notifier_call = profile_exceptions_notify,
 	.next = NULL,
-	.priority = 2
+	.priority = NMI_LOCAL_LOW_PRIOR,
 };
 
 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index ab72a21..0197aa1 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		oprofile_add_sample(args->regs, 0);
 		ret = NOTIFY_STOP;
 		break;
@@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
 static struct notifier_block profile_timer_exceptions_nb = {
 	.notifier_call = profile_timer_exceptions_notify,
 	.next = NULL,
-	.priority = 0
+	.priority = NMI_EXT_LOW_PRIOR,
 };
 
 static int timer_start(void)

^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-11-02 17:50                             ` Don Zickus
@ 2010-11-02 18:16                               ` Huang Ying
  2010-11-02 19:11                                 ` Don Zickus
  2010-11-02 20:47                                 ` Don Zickus
  0 siblings, 2 replies; 44+ messages in thread
From: Huang Ying @ 2010-11-02 18:16 UTC (permalink / raw)
  To: Don Zickus
  Cc: Peter Zijlstra, Andi Kleen, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter

[-- Attachment #1: Type: text/plain, Size: 10108 bytes --]

Hi, Don,

On Tue, 2010-11-02 at 10:50 -0700, Don Zickus wrote:
> On Wed, Oct 27, 2010 at 07:08:44PM +0200, Peter Zijlstra wrote:
> > On Wed, 2010-10-27 at 12:45 -0400, Don Zickus wrote:
> > > I assume this is sorta of what Peter was looking for.
> > 
> > Yeah close, except the prio field of the various notification blocks
> > need to get adjusted to preserve semantics.
> 
> Here is my next crack at it.  I added some global defines to make it more
> clear what the priorities are.
> 
> Again the intent was to roll something like this patch into Huang's
> original patch.  This would help simplify the notifier chain and add
> priorities to maintain the original relationships.
> 
> Cheers,
> Don
> 
> 
> diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
> index 5bdfca8..e28ec43 100644
> --- a/arch/x86/include/asm/kdebug.h
> +++ b/arch/x86/include/asm/kdebug.h
> @@ -18,7 +18,6 @@ enum die_val {
>  	DIE_TRAP,
>  	DIE_GPF,
>  	DIE_CALL,
> -	DIE_NMI_IPI,
>  	DIE_PAGE_FAULT,
>  	DIE_NMIUNKNOWN,
>  };
> diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
> index 932f0f8..cfb6156 100644
> --- a/arch/x86/include/asm/nmi.h
> +++ b/arch/x86/include/asm/nmi.h
> @@ -68,6 +68,26 @@ static inline int nmi_watchdog_active(void)
>  }
>  #endif
>  
> +/*
> + * Define some priorities for the nmi notifier call chain.
> + *
> + * Create a local nmi bit that has a higher priority than
> + * external nmis, because the local ones are more frequent.
> + * 
> + * Also setup some default high/normal/low settings for
> + * subsystems to registers with.  Using 4 bits to seperate
> + * the priorities.  This can go alot higher if needed be.
> + */
> +
> +#define NMI_LOCAL_SHIFT		16	/* randomly picked */
> +#define NMI_LOCAL_BIT		(1ULL << NMI_LOCAL_SHIFT)
> +#define NMI_HIGH_PRIOR		(1ULL << 8)
> +#define NMI_NORMAL_PRIOR	(1ULL << 4)
> +#define NMI_LOW_PRIOR		(1ULL << 0)
> +#define NMI_LOCAL_HIGH_PRIOR	(NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
> +#define NMI_LOCAL_NORMAL_PRIOR	(NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
> +#define NMI_LOCAL_LOW_PRIOR	(NMI_LOCAL_BIT | NMI_LOW_PRIOR)
> +
>  void lapic_watchdog_stop(void);
>  int lapic_watchdog_init(unsigned nmi_hz);
>  int lapic_wd_event(unsigned nmi_hz);
> diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
> index 07a837d..44ff8c9 100644
> --- a/arch/x86/kernel/apic/hw_nmi.c
> +++ b/arch/x86/kernel/apic/hw_nmi.c
> @@ -67,7 +67,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
>  
>  	switch (cmd) {
>  	case DIE_NMI:
> -	case DIE_NMI_IPI:
>  		break;
>  
>  	default:
> @@ -95,7 +94,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
>  static __read_mostly struct notifier_block backtrace_notifier = {
>  	.notifier_call          = arch_trigger_all_cpu_backtrace_handler,
>  	.next                   = NULL,
> -	.priority               = 1
> +	.priority               = NMI_LOCAL_LOW_PRIOR,
>  };
>  
>  static int __init register_trigger_all_cpu_backtrace(void)
> diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
> index be6f9c4..e6c6294 100644
> --- a/arch/x86/kernel/apic/x2apic_uv_x.c
> +++ b/arch/x86/kernel/apic/x2apic_uv_x.c
> @@ -605,7 +605,7 @@ void __cpuinit uv_cpu_init(void)
>   */
>  int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
>  {
> -	if (reason != DIE_NMI_IPI)
> +	if (reason != DIE_NMIUNKNOWN)
>  		return NOTIFY_OK;
>  
>  	if (in_crash_kexec)
> diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
> index e7dbde7..a779719 100644
> --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
> +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
> @@ -25,6 +25,7 @@
>  #include <linux/gfp.h>
>  #include <asm/mce.h>
>  #include <asm/apic.h>
> +#include <asm/nmi.h>
>  
>  /* Update fake mce registers on current CPU. */
>  static void inject_mce(struct mce *m)
> @@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
>  	struct die_args *args = (struct die_args *)data;
>  	int cpu = smp_processor_id();
>  	struct mce *m = &__get_cpu_var(injectm);
> -	if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
> +	if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
>  		return NOTIFY_DONE;
>  	cpumask_clear_cpu(cpu, mce_inject_cpumask);
>  	if (m->inject_flags & MCJ_EXCEPTION)
> @@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
>  
>  static struct notifier_block mce_raise_nb = {
>  	.notifier_call = mce_raise_notify,
> -	.priority = 1000,
> +	.priority = NMI_LOCAL_NORMAL_PRIOR,
>  };
>  
>  /* Inject mce on current CPU */
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index da98b6d..55a3797 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self,
>  		return NOTIFY_DONE;
>  
>  	switch (cmd) {
> -	case DIE_NMI_IPI:
> +	case DIE_NMI:
>  		break;
>  	case DIE_NMIUNKNOWN:
>  		this_nmi = percpu_read(irq_stat.__nmi_count);
> @@ -1276,7 +1276,7 @@ perf_event_nmi_handler(struct notifier_block *self,
>  static __read_mostly struct notifier_block perf_event_nmi_notifier = {
>  	.notifier_call		= perf_event_nmi_handler,
>  	.next			= NULL,
> -	.priority		= 1
> +	.priority		= NMI_LOCAL_LOW_PRIOR,
>  };
>  
>  static struct event_constraint unconstrained;
> diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
> index 852b819..020d052 100644
> --- a/arch/x86/kernel/kgdb.c
> +++ b/arch/x86/kernel/kgdb.c
> @@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
>  		}
>  		return NOTIFY_DONE;
>  
> -	case DIE_NMI_IPI:
> -		/* Just ignore, we will handle the roundup on DIE_NMI. */
> -		return NOTIFY_DONE;
> -
>  	case DIE_NMIUNKNOWN:
>  		if (was_in_debug_nmi[raw_smp_processor_id()]) {
>  			was_in_debug_nmi[raw_smp_processor_id()] = 0;
> @@ -604,7 +600,7 @@ static struct notifier_block kgdb_notifier = {
>  	/*
>  	 * Lowest-prio notifier priority, we want to be notified last:
>  	 */
> -	.priority	= -INT_MAX,
> +	.priority	= NMI_LOCAL_LOW_PRIOR,
>  };
>  
>  /**
> diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
> index e3af342..eabbde6 100644
> --- a/arch/x86/kernel/reboot.c
> +++ b/arch/x86/kernel/reboot.c
> @@ -18,6 +18,7 @@
>  #include <asm/pci_x86.h>
>  #include <asm/virtext.h>
>  #include <asm/cpu.h>
> +#include <asm/nmi.h>
>  
>  #ifdef CONFIG_X86_32
>  # include <linux/ctype.h>
> @@ -753,7 +754,7 @@ static int crash_nmi_callback(struct notifier_block *self,
>  {
>  	int cpu;
>  
> -	if (val != DIE_NMI_IPI)
> +	if (val != DIE_NMI)
>  		return NOTIFY_OK;
>  
>  	cpu = raw_smp_processor_id();
> @@ -784,6 +785,8 @@ static void smp_send_nmi_allbutself(void)
>  
>  static struct notifier_block crash_nmi_nb = {
>  	.notifier_call = crash_nmi_callback,
> +	/* we want to be the first one called */
> +	.priority = NMI_LOCAL_HIGH_PRIOR+1,
>  };
>  
>  /* Halt all other CPUs, calling the specified function on each of them
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index d8acab3..9e56e3d 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
>  	 * CPU-specific NMI: send to specific CPU or NMI sources must
>  	 * be processed on specific CPU
>  	 */
> -	if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
> -	    == NOTIFY_STOP)
> +	if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
>  		return;
>  
>  	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
> @@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
>  	if (!cpu) {
>  		reason = get_nmi_reason();
>  		if (reason & NMI_REASON_MASK) {
> -			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
> -			    == NOTIFY_STOP)
> -				return;
>  			if (reason & NMI_REASON_SERR)
>  				pci_serr_error(reason, regs);
>  			else if (reason & NMI_REASON_IOCHK)
> @@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
>  		}
>  	}
>  
> -	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
> -		return;
> -
>  #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
>  	if (nmi_watchdog_tick(regs, reason))
>  		return;
> diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
> index 57f01bb..43b4f35 100644
> --- a/arch/x86/oprofile/nmi_int.c
> +++ b/arch/x86/oprofile/nmi_int.c
> @@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
>  	int ret = NOTIFY_DONE;
>  
>  	switch (val) {
> -	case DIE_NMI_IPI:
> +	case DIE_NMI:
>  		if (ctr_running)
>  			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
>  		else if (!nmi_enabled)
> @@ -360,7 +360,7 @@ static void nmi_cpu_setup(void *dummy)
>  static struct notifier_block profile_exceptions_nb = {
>  	.notifier_call = profile_exceptions_notify,
>  	.next = NULL,
> -	.priority = 2
> +	.priority = NMI_LOCAL_LOW_PRIOR,
>  };
>  
>  static void nmi_cpu_restore_registers(struct op_msrs *msrs)
> diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
> index ab72a21..0197aa1 100644
> --- a/arch/x86/oprofile/nmi_timer_int.c
> +++ b/arch/x86/oprofile/nmi_timer_int.c
> @@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
>  	int ret = NOTIFY_DONE;
>  
>  	switch (val) {
> -	case DIE_NMI_IPI:
> +	case DIE_NMI:
>  		oprofile_add_sample(args->regs, 0);
>  		ret = NOTIFY_STOP;
>  		break;
> @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
>  static struct notifier_block profile_timer_exceptions_nb = {
>  	.notifier_call = profile_timer_exceptions_notify,
>  	.next = NULL,
> -	.priority = 0
> +	.priority = NMI_EXT_LOW_PRIOR,

Do not find definition of NMI_EXT_LOW_PRIOR, forget to add it?

BTW: Attach a patch I used to test external NMI. Hope that is useful to
you.

Best Regards,
Huang Ying


[-- Attachment #2: 0008-x86-NMI-NMI-injecting-support.patch --]
[-- Type: text/x-patch, Size: 7733 bytes --]

>From 99a4accf4ccc36ac79c8663bd08e81884aedddaf Mon Sep 17 00:00:00 2001
From: Huang Ying <ying.huang@intel.com>
Date: Thu, 9 Sep 2010 14:28:44 +0800
Subject: [PATCH 08/38] x86, NMI, NMI injecting support

This patch implements trigger NMI on specified CPUs. At the same time,
the NMI reason (contents of port 0x61) can be faked too. This can be
used to debug and test the NMI handler.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/Kconfig.debug            |   10 +++
 arch/x86/include/asm/mach_traps.h |    9 +++-
 arch/x86/kernel/Makefile          |    1 +
 arch/x86/kernel/nmi_inject.c      |  117 +++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/traps.c           |   34 ++++++++++-
 5 files changed, 167 insertions(+), 4 deletions(-)
 create mode 100644 arch/x86/kernel/nmi_inject.c

diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index 7508508..d6bb833 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -299,4 +299,14 @@ config DEBUG_STRICT_USER_COPY_CHECKS
 
 	  If unsure, or if you run an older (pre 4.4) gcc, say N.
 
+config NMI_INJECT
+	tristate "NMI injecting support"
+	depends on DEBUG_KERNEL
+	---help---
+	  This can be used to trigger NMI on specified CPUs. And the
+	  reason of NMI (contents of port 0x61) can be faked
+	  too. This can be used to debug and test the NMI handler.
+
+	  If unsure, say N.
+
 endmenu
diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h
index 72a8b52..4235bb3 100644
--- a/arch/x86/include/asm/mach_traps.h
+++ b/arch/x86/include/asm/mach_traps.h
@@ -17,7 +17,7 @@
 #define NMI_REASON_CLEAR_IOCHK	0x08
 #define NMI_REASON_CLEAR_MASK	0x0f
 
-static inline unsigned char get_nmi_reason(void)
+static inline unsigned char __get_nmi_reason(void)
 {
 	return inb(NMI_REASON_PORT);
 }
@@ -40,4 +40,11 @@ static inline void reassert_nmi(void)
 		unlock_cmos();
 }
 
+struct nmi_reason_inject_data {
+	unsigned char reason;
+	unsigned char valid : 1;
+};
+
+extern struct nmi_reason_inject_data nmi_reason_inject_data;
+
 #endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 2dde3c7..5f98f33 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -119,6 +119,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
 obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 
 obj-y					+= hwerr.o
+obj-$(CONFIG_NMI_INJECT)		+= nmi_inject.o
 
 ###
 # 64 bit specific files
diff --git a/arch/x86/kernel/nmi_inject.c b/arch/x86/kernel/nmi_inject.c
new file mode 100644
index 0000000..2b61148
--- /dev/null
+++ b/arch/x86/kernel/nmi_inject.c
@@ -0,0 +1,117 @@
+/*
+ * NMI injector, for NMI handler testing
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/debugfs.h>
+#include <linux/cpu.h>
+#include <asm/mach_traps.h>
+#include <asm/apic.h>
+
+static int nmi_reason_inject_get(void *data, u64 *val)
+{
+	if (nmi_reason_inject_data.valid)
+		*val = nmi_reason_inject_data.reason;
+	else
+		*val = ~0ULL;
+	return 0;
+}
+
+static int nmi_reason_inject_set(void *data, u64 val)
+{
+	nmi_reason_inject_data.reason = val;
+	nmi_reason_inject_data.valid = 1;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(nmi_reason_inject_fops, nmi_reason_inject_get,
+			nmi_reason_inject_set, "0x%llx\n");
+
+static int nmi_reason_uninject_set(void *data, u64 val)
+{
+	nmi_reason_inject_data.valid = 0;
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(nmi_reason_uninject_fops, NULL,
+			nmi_reason_uninject_set, "%llu\n");
+
+static int nmi_inject_set(void *data, u64 val)
+{
+	int cpu;
+	cpumask_var_t cpu_mask;
+
+	alloc_cpumask_var(&cpu_mask, GFP_KERNEL);
+	cpumask_clear(cpu_mask);
+	for_each_online_cpu(cpu) {
+		if (cpu >= sizeof(val))
+			continue;
+		if (val & (1ULL << cpu))
+			cpumask_set_cpu(cpu, cpu_mask);
+	}
+	if (!cpumask_empty(cpu_mask))
+		apic->send_IPI_mask(cpu_mask, NMI_VECTOR);
+	free_cpumask_var(cpu_mask);
+	return 0;
+}
+
+DEFINE_SIMPLE_ATTRIBUTE(nmi_inject_fops, NULL, nmi_inject_set, "0x%llx\n");
+
+static struct dentry *nmi_debug_dir;
+
+static int __init nmi_inject_init(void)
+{
+	int rc;
+	struct dentry *de;
+
+	rc = -ENOMEM;
+	nmi_debug_dir = debugfs_create_dir("nmi", NULL);
+	if (!nmi_debug_dir)
+		return rc;
+	de = debugfs_create_file("inject", S_IWUSR, nmi_debug_dir,
+				 NULL, &nmi_inject_fops);
+	if (!de)
+		goto err;
+	de = debugfs_create_file("reason_inject", S_IRUSR | S_IWUSR,
+				 nmi_debug_dir, NULL, &nmi_reason_inject_fops);
+	if (!de)
+		goto err;
+	de = debugfs_create_file("reason_uninject", S_IWUSR,
+				 nmi_debug_dir, NULL, &nmi_reason_uninject_fops);
+	if (!de)
+		goto err;
+
+	return 0;
+err:
+	debugfs_remove_recursive(nmi_debug_dir);
+	return rc;
+}
+
+static void __exit nmi_inject_exit(void)
+{
+	debugfs_remove_recursive(nmi_debug_dir);
+}
+
+module_init(nmi_inject_init);
+module_exit(nmi_inject_exit);
+
+MODULE_AUTHOR("Huang Ying");
+MODULE_DESCRIPTION("NMI injecting support");
+MODULE_LICENSE("GPL");
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 42f16f7..acfb1a9 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -316,6 +316,34 @@ static int __init setup_unknown_nmi_panic(char *str)
 }
 __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
 
+struct nmi_reason_inject_data nmi_reason_inject_data;
+EXPORT_SYMBOL_GPL(nmi_reason_inject_data);
+
+static inline unsigned char get_nmi_reason(void)
+{
+	if (nmi_reason_inject_data.valid)
+		return nmi_reason_inject_data.reason;
+	else
+		return __get_nmi_reason();
+}
+
+static inline void outb_nmi_reason(unsigned char reason)
+{
+	static unsigned char prev_reason;
+
+	if (nmi_reason_inject_data.valid) {
+		if (reason & NMI_REASON_CLEAR_SERR)
+			nmi_reason_inject_data.reason &= ~NMI_REASON_SERR;
+		if (prev_reason == (reason | NMI_REASON_CLEAR_IOCHK) &&
+		    !(reason & NMI_REASON_CLEAR_IOCHK))
+			nmi_reason_inject_data.reason &= ~NMI_REASON_IOCHK;
+		if (!nmi_reason_inject_data.reason)
+			nmi_reason_inject_data.valid = 0;
+		prev_reason = reason;
+	} else
+		outb(reason, NMI_REASON_PORT);
+}
+
 static notrace __kprobes void
 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
@@ -340,7 +368,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
 
 	/* Clear and disable the PCI SERR error line. */
 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
-	outb(reason, NMI_REASON_PORT);
+	outb_nmi_reason(reason);
 }
 
 static notrace __kprobes void
@@ -358,7 +386,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
 
 	/* Re-enable the IOCK line, wait for a few seconds */
 	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
-	outb(reason, NMI_REASON_PORT);
+	outb_nmi_reason(reason);
 
 	i = 20000;
 	while (--i) {
@@ -367,7 +395,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
 	}
 
 	reason &= ~NMI_REASON_CLEAR_IOCHK;
-	outb(reason, NMI_REASON_PORT);
+	outb_nmi_reason(reason);
 }
 
 static notrace __kprobes void
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-11-02 18:16                               ` Huang Ying
@ 2010-11-02 19:11                                 ` Don Zickus
  2010-11-02 20:47                                 ` Don Zickus
  1 sibling, 0 replies; 44+ messages in thread
From: Don Zickus @ 2010-11-02 19:11 UTC (permalink / raw)
  To: Huang Ying
  Cc: Peter Zijlstra, Andi Kleen, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter

On Wed, Nov 03, 2010 at 02:16:03AM +0800, Huang Ying wrote:
> Hi, Don,
> > @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
> >  static struct notifier_block profile_timer_exceptions_nb = {
> >  	.notifier_call = profile_timer_exceptions_notify,
> >  	.next = NULL,
> > -	.priority = 0
> > +	.priority = NMI_EXT_LOW_PRIOR,
> 
> Do not find definition of NMI_EXT_LOW_PRIOR, forget to add it?

Gah.  I meant to use NMI_LOW_PRIOR there.  Thanks for finding that.  Guess
I should any randomness to my compiles.

> 
> BTW: Attach a patch I used to test external NMI. Hope that is useful to
> you.

It looks useful.  I'll try to play with later this week.

Thanks!

Cheers,
Don

> 
> Best Regards,
> Huang Ying
> 

> From 99a4accf4ccc36ac79c8663bd08e81884aedddaf Mon Sep 17 00:00:00 2001
> From: Huang Ying <ying.huang@intel.com>
> Date: Thu, 9 Sep 2010 14:28:44 +0800
> Subject: [PATCH 08/38] x86, NMI, NMI injecting support
> 
> This patch implements trigger NMI on specified CPUs. At the same time,
> the NMI reason (contents of port 0x61) can be faked too. This can be
> used to debug and test the NMI handler.
> 
> Signed-off-by: Huang Ying <ying.huang@intel.com>
> ---
>  arch/x86/Kconfig.debug            |   10 +++
>  arch/x86/include/asm/mach_traps.h |    9 +++-
>  arch/x86/kernel/Makefile          |    1 +
>  arch/x86/kernel/nmi_inject.c      |  117 +++++++++++++++++++++++++++++++++++++
>  arch/x86/kernel/traps.c           |   34 ++++++++++-
>  5 files changed, 167 insertions(+), 4 deletions(-)
>  create mode 100644 arch/x86/kernel/nmi_inject.c
> 
> diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
> index 7508508..d6bb833 100644
> --- a/arch/x86/Kconfig.debug
> +++ b/arch/x86/Kconfig.debug
> @@ -299,4 +299,14 @@ config DEBUG_STRICT_USER_COPY_CHECKS
>  
>  	  If unsure, or if you run an older (pre 4.4) gcc, say N.
>  
> +config NMI_INJECT
> +	tristate "NMI injecting support"
> +	depends on DEBUG_KERNEL
> +	---help---
> +	  This can be used to trigger NMI on specified CPUs. And the
> +	  reason of NMI (contents of port 0x61) can be faked
> +	  too. This can be used to debug and test the NMI handler.
> +
> +	  If unsure, say N.
> +
>  endmenu
> diff --git a/arch/x86/include/asm/mach_traps.h b/arch/x86/include/asm/mach_traps.h
> index 72a8b52..4235bb3 100644
> --- a/arch/x86/include/asm/mach_traps.h
> +++ b/arch/x86/include/asm/mach_traps.h
> @@ -17,7 +17,7 @@
>  #define NMI_REASON_CLEAR_IOCHK	0x08
>  #define NMI_REASON_CLEAR_MASK	0x0f
>  
> -static inline unsigned char get_nmi_reason(void)
> +static inline unsigned char __get_nmi_reason(void)
>  {
>  	return inb(NMI_REASON_PORT);
>  }
> @@ -40,4 +40,11 @@ static inline void reassert_nmi(void)
>  		unlock_cmos();
>  }
>  
> +struct nmi_reason_inject_data {
> +	unsigned char reason;
> +	unsigned char valid : 1;
> +};
> +
> +extern struct nmi_reason_inject_data nmi_reason_inject_data;
> +
>  #endif /* _ASM_X86_MACH_DEFAULT_MACH_TRAPS_H */
> diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
> index 2dde3c7..5f98f33 100644
> --- a/arch/x86/kernel/Makefile
> +++ b/arch/x86/kernel/Makefile
> @@ -119,6 +119,7 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION) += check.o
>  obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
>  
>  obj-y					+= hwerr.o
> +obj-$(CONFIG_NMI_INJECT)		+= nmi_inject.o
>  
>  ###
>  # 64 bit specific files
> diff --git a/arch/x86/kernel/nmi_inject.c b/arch/x86/kernel/nmi_inject.c
> new file mode 100644
> index 0000000..2b61148
> --- /dev/null
> +++ b/arch/x86/kernel/nmi_inject.c
> @@ -0,0 +1,117 @@
> +/*
> + * NMI injector, for NMI handler testing
> + *
> + * Copyright 2010 Intel Corp.
> + *   Author: Huang Ying <ying.huang@intel.com>
> + *
> + * This program is free software; you can redistribute it and/or
> + * modify it under the terms of the GNU General Public License version
> + * 2 as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful,
> + * but WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
> + * GNU General Public License for more details.
> + *
> + * You should have received a copy of the GNU General Public License
> + * along with this program; if not, write to the Free Software
> + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/module.h>
> +#include <linux/debugfs.h>
> +#include <linux/cpu.h>
> +#include <asm/mach_traps.h>
> +#include <asm/apic.h>
> +
> +static int nmi_reason_inject_get(void *data, u64 *val)
> +{
> +	if (nmi_reason_inject_data.valid)
> +		*val = nmi_reason_inject_data.reason;
> +	else
> +		*val = ~0ULL;
> +	return 0;
> +}
> +
> +static int nmi_reason_inject_set(void *data, u64 val)
> +{
> +	nmi_reason_inject_data.reason = val;
> +	nmi_reason_inject_data.valid = 1;
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(nmi_reason_inject_fops, nmi_reason_inject_get,
> +			nmi_reason_inject_set, "0x%llx\n");
> +
> +static int nmi_reason_uninject_set(void *data, u64 val)
> +{
> +	nmi_reason_inject_data.valid = 0;
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(nmi_reason_uninject_fops, NULL,
> +			nmi_reason_uninject_set, "%llu\n");
> +
> +static int nmi_inject_set(void *data, u64 val)
> +{
> +	int cpu;
> +	cpumask_var_t cpu_mask;
> +
> +	alloc_cpumask_var(&cpu_mask, GFP_KERNEL);
> +	cpumask_clear(cpu_mask);
> +	for_each_online_cpu(cpu) {
> +		if (cpu >= sizeof(val))
> +			continue;
> +		if (val & (1ULL << cpu))
> +			cpumask_set_cpu(cpu, cpu_mask);
> +	}
> +	if (!cpumask_empty(cpu_mask))
> +		apic->send_IPI_mask(cpu_mask, NMI_VECTOR);
> +	free_cpumask_var(cpu_mask);
> +	return 0;
> +}
> +
> +DEFINE_SIMPLE_ATTRIBUTE(nmi_inject_fops, NULL, nmi_inject_set, "0x%llx\n");
> +
> +static struct dentry *nmi_debug_dir;
> +
> +static int __init nmi_inject_init(void)
> +{
> +	int rc;
> +	struct dentry *de;
> +
> +	rc = -ENOMEM;
> +	nmi_debug_dir = debugfs_create_dir("nmi", NULL);
> +	if (!nmi_debug_dir)
> +		return rc;
> +	de = debugfs_create_file("inject", S_IWUSR, nmi_debug_dir,
> +				 NULL, &nmi_inject_fops);
> +	if (!de)
> +		goto err;
> +	de = debugfs_create_file("reason_inject", S_IRUSR | S_IWUSR,
> +				 nmi_debug_dir, NULL, &nmi_reason_inject_fops);
> +	if (!de)
> +		goto err;
> +	de = debugfs_create_file("reason_uninject", S_IWUSR,
> +				 nmi_debug_dir, NULL, &nmi_reason_uninject_fops);
> +	if (!de)
> +		goto err;
> +
> +	return 0;
> +err:
> +	debugfs_remove_recursive(nmi_debug_dir);
> +	return rc;
> +}
> +
> +static void __exit nmi_inject_exit(void)
> +{
> +	debugfs_remove_recursive(nmi_debug_dir);
> +}
> +
> +module_init(nmi_inject_init);
> +module_exit(nmi_inject_exit);
> +
> +MODULE_AUTHOR("Huang Ying");
> +MODULE_DESCRIPTION("NMI injecting support");
> +MODULE_LICENSE("GPL");
> diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
> index 42f16f7..acfb1a9 100644
> --- a/arch/x86/kernel/traps.c
> +++ b/arch/x86/kernel/traps.c
> @@ -316,6 +316,34 @@ static int __init setup_unknown_nmi_panic(char *str)
>  }
>  __setup("unknown_nmi_panic", setup_unknown_nmi_panic);
>  
> +struct nmi_reason_inject_data nmi_reason_inject_data;
> +EXPORT_SYMBOL_GPL(nmi_reason_inject_data);
> +
> +static inline unsigned char get_nmi_reason(void)
> +{
> +	if (nmi_reason_inject_data.valid)
> +		return nmi_reason_inject_data.reason;
> +	else
> +		return __get_nmi_reason();
> +}
> +
> +static inline void outb_nmi_reason(unsigned char reason)
> +{
> +	static unsigned char prev_reason;
> +
> +	if (nmi_reason_inject_data.valid) {
> +		if (reason & NMI_REASON_CLEAR_SERR)
> +			nmi_reason_inject_data.reason &= ~NMI_REASON_SERR;
> +		if (prev_reason == (reason | NMI_REASON_CLEAR_IOCHK) &&
> +		    !(reason & NMI_REASON_CLEAR_IOCHK))
> +			nmi_reason_inject_data.reason &= ~NMI_REASON_IOCHK;
> +		if (!nmi_reason_inject_data.reason)
> +			nmi_reason_inject_data.valid = 0;
> +		prev_reason = reason;
> +	} else
> +		outb(reason, NMI_REASON_PORT);
> +}
> +
>  static notrace __kprobes void
>  pci_serr_error(unsigned char reason, struct pt_regs *regs)
>  {
> @@ -340,7 +368,7 @@ pci_serr_error(unsigned char reason, struct pt_regs *regs)
>  
>  	/* Clear and disable the PCI SERR error line. */
>  	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_SERR;
> -	outb(reason, NMI_REASON_PORT);
> +	outb_nmi_reason(reason);
>  }
>  
>  static notrace __kprobes void
> @@ -358,7 +386,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
>  
>  	/* Re-enable the IOCK line, wait for a few seconds */
>  	reason = (reason & NMI_REASON_CLEAR_MASK) | NMI_REASON_CLEAR_IOCHK;
> -	outb(reason, NMI_REASON_PORT);
> +	outb_nmi_reason(reason);
>  
>  	i = 20000;
>  	while (--i) {
> @@ -367,7 +395,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs)
>  	}
>  
>  	reason &= ~NMI_REASON_CLEAR_IOCHK;
> -	outb(reason, NMI_REASON_PORT);
> +	outb_nmi_reason(reason);
>  }
>  
>  static notrace __kprobes void
> -- 
> 1.7.1
> 


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler
  2010-11-02 18:16                               ` Huang Ying
  2010-11-02 19:11                                 ` Don Zickus
@ 2010-11-02 20:47                                 ` Don Zickus
  1 sibling, 0 replies; 44+ messages in thread
From: Don Zickus @ 2010-11-02 20:47 UTC (permalink / raw)
  To: Huang Ying
  Cc: Peter Zijlstra, Andi Kleen, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter

On Wed, Nov 03, 2010 at 02:16:03AM +0800, Huang Ying wrote:
> >  		break;
> > @@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
> >  static struct notifier_block profile_timer_exceptions_nb = {
> >  	.notifier_call = profile_timer_exceptions_notify,
> >  	.next = NULL,
> > -	.priority = 0
> > +	.priority = NMI_EXT_LOW_PRIOR,
> 
> Do not find definition of NMI_EXT_LOW_PRIOR, forget to add it?

I attached an updated patch with that change.

Cheers,
Don

---
 arch/x86/include/asm/kdebug.h           |    1 -
 arch/x86/include/asm/nmi.h              |   20 ++++++++++++++++++++
 arch/x86/kernel/apic/hw_nmi.c           |    3 +--
 arch/x86/kernel/apic/x2apic_uv_x.c      |    2 +-
 arch/x86/kernel/cpu/mcheck/mce-inject.c |    5 +++--
 arch/x86/kernel/cpu/perf_event.c        |    4 ++--
 arch/x86/kernel/kgdb.c                  |    6 +-----
 arch/x86/kernel/reboot.c                |    5 ++++-
 arch/x86/kernel/traps.c                 |    9 +--------
 arch/x86/oprofile/nmi_int.c             |    4 ++--
 arch/x86/oprofile/nmi_timer_int.c       |    4 ++--
 11 files changed, 37 insertions(+), 26 deletions(-)

diff --git a/arch/x86/include/asm/kdebug.h b/arch/x86/include/asm/kdebug.h
index 5bdfca8..e28ec43 100644
--- a/arch/x86/include/asm/kdebug.h
+++ b/arch/x86/include/asm/kdebug.h
@@ -18,7 +18,6 @@ enum die_val {
 	DIE_TRAP,
 	DIE_GPF,
 	DIE_CALL,
-	DIE_NMI_IPI,
 	DIE_PAGE_FAULT,
 	DIE_NMIUNKNOWN,
 };
diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
index 932f0f8..cfb6156 100644
--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -68,6 +68,26 @@ static inline int nmi_watchdog_active(void)
 }
 #endif
 
+/*
+ * Define some priorities for the nmi notifier call chain.
+ *
+ * Create a local nmi bit that has a higher priority than
+ * external nmis, because the local ones are more frequent.
+ * 
+ * Also setup some default high/normal/low settings for
+ * subsystems to registers with.  Using 4 bits to seperate
+ * the priorities.  This can go alot higher if needed be.
+ */
+
+#define NMI_LOCAL_SHIFT		16	/* randomly picked */
+#define NMI_LOCAL_BIT		(1ULL << NMI_LOCAL_SHIFT)
+#define NMI_HIGH_PRIOR		(1ULL << 8)
+#define NMI_NORMAL_PRIOR	(1ULL << 4)
+#define NMI_LOW_PRIOR		(1ULL << 0)
+#define NMI_LOCAL_HIGH_PRIOR	(NMI_LOCAL_BIT | NMI_HIGH_PRIOR)
+#define NMI_LOCAL_NORMAL_PRIOR	(NMI_LOCAL_BIT | NMI_NORMAL_PRIOR)
+#define NMI_LOCAL_LOW_PRIOR	(NMI_LOCAL_BIT | NMI_LOW_PRIOR)
+
 void lapic_watchdog_stop(void);
 int lapic_watchdog_init(unsigned nmi_hz);
 int lapic_wd_event(unsigned nmi_hz);
diff --git a/arch/x86/kernel/apic/hw_nmi.c b/arch/x86/kernel/apic/hw_nmi.c
index 07a837d..44ff8c9 100644
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -67,7 +67,6 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
 
 	switch (cmd) {
 	case DIE_NMI:
-	case DIE_NMI_IPI:
 		break;
 
 	default:
@@ -95,7 +94,7 @@ arch_trigger_all_cpu_backtrace_handler(struct notifier_block *self,
 static __read_mostly struct notifier_block backtrace_notifier = {
 	.notifier_call          = arch_trigger_all_cpu_backtrace_handler,
 	.next                   = NULL,
-	.priority               = 1
+	.priority               = NMI_LOCAL_LOW_PRIOR,
 };
 
 static int __init register_trigger_all_cpu_backtrace(void)
diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c
index be6f9c4..e6c6294 100644
--- a/arch/x86/kernel/apic/x2apic_uv_x.c
+++ b/arch/x86/kernel/apic/x2apic_uv_x.c
@@ -605,7 +605,7 @@ void __cpuinit uv_cpu_init(void)
  */
 int uv_handle_nmi(struct notifier_block *self, unsigned long reason, void *data)
 {
-	if (reason != DIE_NMI_IPI)
+	if (reason != DIE_NMIUNKNOWN)
 		return NOTIFY_OK;
 
 	if (in_crash_kexec)
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index e7dbde7..a779719 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -25,6 +25,7 @@
 #include <linux/gfp.h>
 #include <asm/mce.h>
 #include <asm/apic.h>
+#include <asm/nmi.h>
 
 /* Update fake mce registers on current CPU. */
 static void inject_mce(struct mce *m)
@@ -83,7 +84,7 @@ static int mce_raise_notify(struct notifier_block *self,
 	struct die_args *args = (struct die_args *)data;
 	int cpu = smp_processor_id();
 	struct mce *m = &__get_cpu_var(injectm);
-	if (val != DIE_NMI_IPI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
+	if (val != DIE_NMI || !cpumask_test_cpu(cpu, mce_inject_cpumask))
 		return NOTIFY_DONE;
 	cpumask_clear_cpu(cpu, mce_inject_cpumask);
 	if (m->inject_flags & MCJ_EXCEPTION)
@@ -95,7 +96,7 @@ static int mce_raise_notify(struct notifier_block *self,
 
 static struct notifier_block mce_raise_nb = {
 	.notifier_call = mce_raise_notify,
-	.priority = 1000,
+	.priority = NMI_LOCAL_NORMAL_PRIOR,
 };
 
 /* Inject mce on current CPU */
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index da98b6d..55a3797 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1226,7 +1226,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 		return NOTIFY_DONE;
 
 	switch (cmd) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		break;
 	case DIE_NMIUNKNOWN:
 		this_nmi = percpu_read(irq_stat.__nmi_count);
@@ -1276,7 +1276,7 @@ perf_event_nmi_handler(struct notifier_block *self,
 static __read_mostly struct notifier_block perf_event_nmi_notifier = {
 	.notifier_call		= perf_event_nmi_handler,
 	.next			= NULL,
-	.priority		= 1
+	.priority		= NMI_LOCAL_LOW_PRIOR,
 };
 
 static struct event_constraint unconstrained;
diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c
index 852b819..020d052 100644
--- a/arch/x86/kernel/kgdb.c
+++ b/arch/x86/kernel/kgdb.c
@@ -523,10 +523,6 @@ static int __kgdb_notify(struct die_args *args, unsigned long cmd)
 		}
 		return NOTIFY_DONE;
 
-	case DIE_NMI_IPI:
-		/* Just ignore, we will handle the roundup on DIE_NMI. */
-		return NOTIFY_DONE;
-
 	case DIE_NMIUNKNOWN:
 		if (was_in_debug_nmi[raw_smp_processor_id()]) {
 			was_in_debug_nmi[raw_smp_processor_id()] = 0;
@@ -604,7 +600,7 @@ static struct notifier_block kgdb_notifier = {
 	/*
 	 * Lowest-prio notifier priority, we want to be notified last:
 	 */
-	.priority	= -INT_MAX,
+	.priority	= NMI_LOCAL_LOW_PRIOR,
 };
 
 /**
diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
index e3af342..eabbde6 100644
--- a/arch/x86/kernel/reboot.c
+++ b/arch/x86/kernel/reboot.c
@@ -18,6 +18,7 @@
 #include <asm/pci_x86.h>
 #include <asm/virtext.h>
 #include <asm/cpu.h>
+#include <asm/nmi.h>
 
 #ifdef CONFIG_X86_32
 # include <linux/ctype.h>
@@ -753,7 +754,7 @@ static int crash_nmi_callback(struct notifier_block *self,
 {
 	int cpu;
 
-	if (val != DIE_NMI_IPI)
+	if (val != DIE_NMI)
 		return NOTIFY_OK;
 
 	cpu = raw_smp_processor_id();
@@ -784,6 +785,8 @@ static void smp_send_nmi_allbutself(void)
 
 static struct notifier_block crash_nmi_nb = {
 	.notifier_call = crash_nmi_callback,
+	/* we want to be the first one called */
+	.priority = NMI_LOCAL_HIGH_PRIOR+1,
 };
 
 /* Halt all other CPUs, calling the specified function on each of them
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index d8acab3..9e56e3d 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -395,8 +395,7 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 	 * CPU-specific NMI: send to specific CPU or NMI sources must
 	 * be processed on specific CPU
 	 */
-	if (notify_die(DIE_NMI_IPI, "nmi_ipi", regs, 0, 2, SIGINT)
-	    == NOTIFY_STOP)
+	if (notify_die(DIE_NMI, "nmi_ipi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
@@ -405,9 +404,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 	if (!cpu) {
 		reason = get_nmi_reason();
 		if (reason & NMI_REASON_MASK) {
-			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-			    == NOTIFY_STOP)
-				return;
 			if (reason & NMI_REASON_SERR)
 				pci_serr_error(reason, regs);
 			else if (reason & NMI_REASON_IOCHK)
@@ -423,9 +419,6 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 		}
 	}
 
-	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
-		return;
-
 #if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
 	if (nmi_watchdog_tick(regs, reason))
 		return;
diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
index 57f01bb..43b4f35 100644
--- a/arch/x86/oprofile/nmi_int.c
+++ b/arch/x86/oprofile/nmi_int.c
@@ -64,7 +64,7 @@ static int profile_exceptions_notify(struct notifier_block *self,
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		if (ctr_running)
 			model->check_ctrs(args->regs, &__get_cpu_var(cpu_msrs));
 		else if (!nmi_enabled)
@@ -360,7 +360,7 @@ static void nmi_cpu_setup(void *dummy)
 static struct notifier_block profile_exceptions_nb = {
 	.notifier_call = profile_exceptions_notify,
 	.next = NULL,
-	.priority = 2
+	.priority = NMI_LOCAL_LOW_PRIOR,
 };
 
 static void nmi_cpu_restore_registers(struct op_msrs *msrs)
diff --git a/arch/x86/oprofile/nmi_timer_int.c b/arch/x86/oprofile/nmi_timer_int.c
index ab72a21..5f14fcb 100644
--- a/arch/x86/oprofile/nmi_timer_int.c
+++ b/arch/x86/oprofile/nmi_timer_int.c
@@ -25,7 +25,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
 	int ret = NOTIFY_DONE;
 
 	switch (val) {
-	case DIE_NMI_IPI:
+	case DIE_NMI:
 		oprofile_add_sample(args->regs, 0);
 		ret = NOTIFY_STOP;
 		break;
@@ -38,7 +38,7 @@ static int profile_timer_exceptions_notify(struct notifier_block *self,
 static struct notifier_block profile_timer_exceptions_nb = {
 	.notifier_call = profile_timer_exceptions_notify,
 	.next = NULL,
-	.priority = 0
+	.priority = NMI_LOW_PRIOR,
 };
 
 static int timer_start(void)
-- 
1.7.2.3


^ permalink raw reply related	[flat|nested] 44+ messages in thread

* [PATCH -v3 4/6] Make NMI reason io port (0x61) can be processed on any CPU
  2010-10-09  6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying
  2010-10-09  6:49 ` [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
  2010-10-09  6:49 ` [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler Huang Ying
@ 2010-10-09  6:49 ` Huang Ying
  2010-10-09  6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying
  2010-10-09  6:49 ` [PATCH -v3 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying
  4 siblings, 0 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-09  6:49 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying

In original NMI handler, NMI reason io port (0x61) is only processed
on BSP. This makes it impossible to hot-remove BSP. To solve the
issue, a raw spinlock is used to make the port can be processed on any
CPU.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/kernel/traps.c |   45 +++++++++++++++++++++++++--------------------
 1 file changed, 25 insertions(+), 20 deletions(-)

--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,12 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+/*
+ * Prevent NMI reason port (0x61) being accessed simultaneously, can
+ * only be used in NMI handler.
+ */
+static DEFINE_RAW_SPINLOCK(nmi_reason_lock);
+
 static inline void conditional_sti(struct pt_regs *regs)
 {
 	if (regs->flags & X86_EFLAGS_IF)
@@ -383,7 +389,6 @@ unknown_nmi_error(unsigned char reason,
 static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
-	int cpu;
 
 	/*
 	 * CPU-specific NMI must be processed before non-CPU-specific
@@ -400,28 +405,28 @@ static notrace __kprobes void default_do
 		return;
 
 	/* Non-CPU-specific NMI: NMI sources can be processed on any CPU */
-	cpu = smp_processor_id();
-	/* Only the BSP gets external NMIs from the system. */
-	if (!cpu) {
-		reason = get_nmi_reason();
-		if (reason & NMI_REASON_MASK) {
-			if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
-			    == NOTIFY_STOP)
-				return;
-			if (reason & NMI_REASON_SERR)
-				pci_serr_error(reason, regs);
-			else if (reason & NMI_REASON_IOCHK)
-				io_check_error(reason, regs);
+	raw_spin_lock(&nmi_reason_lock);
+	reason = get_nmi_reason();
+	if (reason & NMI_REASON_MASK) {
+		if (notify_die(DIE_NMI, "nmi", regs, reason, 2, SIGINT)
+		    == NOTIFY_STOP)
+			goto unlock_return;
+		if (reason & NMI_REASON_SERR)
+			pci_serr_error(reason, regs);
+		else if (reason & NMI_REASON_IOCHK)
+			io_check_error(reason, regs);
 #ifdef CONFIG_X86_32
-			/*
-			 * Reassert NMI in case it became active
-			 * meanwhile as it's edge-triggered:
-			 */
-			reassert_nmi();
+		/*
+		 * Reassert NMI in case it became active
+		 * meanwhile as it's edge-triggered:
+		 */
+		reassert_nmi();
 #endif
-			return;
-		}
+unlock_return:
+		raw_spin_unlock(&nmi_reason_lock);
+		return;
 	}
+	raw_spin_unlock(&nmi_reason_lock);
 
 	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-09  6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying
                   ` (2 preceding siblings ...)
  2010-10-09  6:49 ` [PATCH -v3 4/6] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying
@ 2010-10-09  6:49 ` Huang Ying
  2010-10-10 14:07   ` Alan Cox
  2010-10-11 21:20   ` Don Zickus
  2010-10-09  6:49 ` [PATCH -v3 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying
  4 siblings, 2 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-09  6:49 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying

In general, unknown NMI is used by hardware and firmware to notify
fatal hardware errors to OS. So the Linux should treat unknown NMI as
hardware error and go panic upon unknown NMI for better error
containment.

But there are some broken hardware, which will generate unknown NMI
not for hardware error. To support these machines, a white list
mechanism is provided to treat unknown NMI as hardware error only on
some known working system.

These systems are identified via the presentation of APEI HEST or
some PCI ID of the host bridge. The PCI ID of host bridge instead of
DMI ID is used, so that the checking can be done based on the platform
type instead of motherboard. This should be simpler and sufficient.

The method to identify the platforms is designed by Andi Kleen.


v3:

- Change document and comments

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/nmi.h |    1 
 arch/x86/kernel/Makefile   |    2 +
 arch/x86/kernel/hwerr.c    |   62 +++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kernel/traps.c    |   11 +++++++
 drivers/acpi/apei/hest.c   |    8 +++++
 5 files changed, 84 insertions(+)
 create mode 100644 arch/x86/kernel/hwerr.c

--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -44,6 +44,7 @@ struct ctl_table;
 extern int proc_nmi_enabled(struct ctl_table *, int ,
 			void __user *, size_t *, loff_t *);
 extern int unknown_nmi_panic;
+extern int unknown_nmi_as_hwerr;
 
 void arch_trigger_all_cpu_backtrace(void);
 #define arch_trigger_all_cpu_backtrace arch_trigger_all_cpu_backtrace
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -119,6 +119,8 @@ obj-$(CONFIG_X86_CHECK_BIOS_CORRUPTION)
 
 obj-$(CONFIG_SWIOTLB)			+= pci-swiotlb.o
 
+obj-y					+= hwerr.o
+
 ###
 # 64 bit specific files
 ifeq ($(CONFIG_X86_64),y)
--- /dev/null
+++ b/arch/x86/kernel/hwerr.c
@@ -0,0 +1,62 @@
+/*
+ * Hardware error architecture dependent processing
+ *
+ * Copyright 2010 Intel Corp.
+ *   Author: Huang Ying <ying.huang@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ */
+
+#include <linux/kernel.h>
+#include <linux/pci.h>
+#include <linux/init.h>
+#include <linux/nmi.h>
+
+/*
+ * In general, unknown NMI is used by hardware and firmware to notify
+ * fatal hardware errors to OS. So the Linux should treat unknown NMI
+ * as hardware error and go panic upon unknown NMI for better error
+ * containment.
+ *
+ * But there are some broken hardware, which will generate unknown NMI
+ * not for hardware error. To support these systems, a white list
+ * mechanism is used to treat unknown NMI as hardware error only on
+ * some known working system.
+ *
+ * The PCI ID of host bridge instead of DMI ID is used, so that the
+ * checking can be done based on the platform instead of motherboard.
+ * This should be simpler and sufficient.
+ */
+static const
+struct pci_device_id unknown_nmi_as_hwerr_platform[] __initdata = {
+	{ PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x3406) },
+	{ 0, }
+};
+
+int __init check_unknown_nmi_as_hwerr(void)
+{
+	struct pci_dev *dev = NULL;
+
+	for_each_pci_dev(dev) {
+		if (pci_match_id(unknown_nmi_as_hwerr_platform, dev)) {
+			pr_info(
+"Host bridge is identified, will treat unknown NMI as hardware error!\n");
+			unknown_nmi_as_hwerr = 1;
+			break;
+		}
+	}
+
+	return 0;
+}
+late_initcall(check_unknown_nmi_as_hwerr);
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -83,6 +83,8 @@ EXPORT_SYMBOL_GPL(used_vectors);
 
 static int ignore_nmis;
 
+int unknown_nmi_as_hwerr;
+
 /*
  * Prevent NMI reason port (0x61) being accessed simultaneously, can
  * only be used in NMI handler.
@@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason,
 	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
 			NOTIFY_STOP)
 		return;
+	/*
+	 * On some platforms, hardware errors may be notified via
+	 * unknown NMI
+	 */
+	if (unknown_nmi_as_hwerr)
+		panic(
+		"NMI for hardware error without error record: Not continuing\n"
+		"Please check BIOS/BMC log for further information.");
+
 #ifdef CONFIG_MCA
 	/*
 	 * Might actually be able to figure out what the guilty party
--- a/drivers/acpi/apei/hest.c
+++ b/drivers/acpi/apei/hest.c
@@ -35,6 +35,7 @@
 #include <linux/highmem.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/nmi.h>
 #include <acpi/apei.h>
 
 #include "apei-internal.h"
@@ -225,6 +226,13 @@ static int __init hest_init(void)
 	if (rc)
 		goto err;
 
+	/*
+	 * System has proper HEST should treat unknown NMI as fatal
+	 * hardware error notification
+	 */
+	pr_info("HEST is valid, will treat unknown NMI as hardware error!\n");
+	unknown_nmi_as_hwerr = 1;
+
 	rc = hest_ghes_dev_register(ghes_count);
 	if (rc)
 		goto err;

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-09  6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying
@ 2010-10-10 14:07   ` Alan Cox
  2010-10-10 14:13     ` Andi Kleen
  2010-10-11 21:20   ` Don Zickus
  1 sibling, 1 reply; 44+ messages in thread
From: Alan Cox @ 2010-10-10 14:07 UTC (permalink / raw)
  To: Huang Ying
  Cc: Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen,
	Robert Richter

On Sat,  9 Oct 2010 14:49:46 +0800
Huang Ying <ying.huang@intel.com> wrote:

> In general, unknown NMI is used by hardware and firmware to notify
> fatal hardware errors to OS. So the Linux should treat unknown NMI as
> hardware error and go panic upon unknown NMI for better error
> containment.

Not entirely true. Older machines use NMI for all sorts of interesting
purposes. In particular many 486 laptops trigger NMI as part of power
manaagement, (Hence the choice of the dazed and confused message)

> But there are some broken hardware, which will generate unknown NMI
> not for hardware error. To support these machines, a white list

"Broken" is not the right term. There is no formal documentation about
such uses of NMI on older PC platforms that forbids such use. They may not
agree with your personal preferred behaviour.

> These systems are identified via the presentation of APEI HEST or
> some PCI ID of the host bridge. The PCI ID of host bridge instead of
> DMI ID is used, so that the checking can be done based on the platform
> type instead of motherboard. This should be simpler and sufficient.
> 
> The method to identify the platforms is designed by Andi Kleen.

Why not make the new flag also a boot option so you can force it on for
platforms where we don't auto whitelist it.

Alan



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-10 14:07   ` Alan Cox
@ 2010-10-10 14:13     ` Andi Kleen
  2010-10-11 21:08       ` Don Zickus
  0 siblings, 1 reply; 44+ messages in thread
From: Andi Kleen @ 2010-10-10 14:13 UTC (permalink / raw)
  To: Alan Cox
  Cc: Huang Ying, Don Zickus, Ingo Molnar, H. Peter Anvin, linux-kernel,
	Andi Kleen, Robert Richter

On Sun, Oct 10, 2010 at 03:07:13PM +0100, Alan Cox wrote:
> On Sat,  9 Oct 2010 14:49:46 +0800
> Huang Ying <ying.huang@intel.com> wrote:
> 
> > In general, unknown NMI is used by hardware and firmware to notify
> > fatal hardware errors to OS. So the Linux should treat unknown NMI as
> > hardware error and go panic upon unknown NMI for better error
> > containment.
> 
> Not entirely true. Older machines use NMI for all sorts of interesting
> purposes. In particular many 486 laptops trigger NMI as part of power
> manaagement, (Hence the choice of the dazed and confused message)

In general, on any post stone age x86 system, ...

> > These systems are identified via the presentation of APEI HEST or
> > some PCI ID of the host bridge. The PCI ID of host bridge instead of
> > DMI ID is used, so that the checking can be done based on the platform
> > type instead of motherboard. This should be simpler and sufficient.
> > 
> > The method to identify the platforms is designed by Andi Kleen.
> 
> Why not make the new flag also a boot option so you can force it on for
> platforms where we don't auto whitelist it.

You can already set it at run time using sysctl.

echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi
echo 1 > /proc/sys/kernel/panic_on_io_nmi

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-10 14:13     ` Andi Kleen
@ 2010-10-11 21:08       ` Don Zickus
  2010-10-11 21:12         ` Don Zickus
  0 siblings, 1 reply; 44+ messages in thread
From: Don Zickus @ 2010-10-11 21:08 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Alan Cox, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel,
	Robert Richter

On Sun, Oct 10, 2010 at 04:13:11PM +0200, Andi Kleen wrote:
> > > These systems are identified via the presentation of APEI HEST or
> > > some PCI ID of the host bridge. The PCI ID of host bridge instead of
> > > DMI ID is used, so that the checking can be done based on the platform
> > > type instead of motherboard. This should be simpler and sufficient.
> > > 
> > > The method to identify the platforms is designed by Andi Kleen.
> > 
> > Why not make the new flag also a boot option so you can force it on for
> > platforms where we don't auto whitelist it.
> 
> You can already set it at run time using sysctl.
> 
> echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi
> echo 1 > /proc/sys/kernel/panic_on_io_nmi

I get the impression Alan was talking about the option

unknown_nmi_as_hwerr

which isn't hooked into a sysctl but will cause machines to panic.

Cheers,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-11 21:08       ` Don Zickus
@ 2010-10-11 21:12         ` Don Zickus
  0 siblings, 0 replies; 44+ messages in thread
From: Don Zickus @ 2010-10-11 21:12 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Alan Cox, Huang Ying, Ingo Molnar, H. Peter Anvin, linux-kernel,
	Robert Richter

On Mon, Oct 11, 2010 at 05:08:29PM -0400, Don Zickus wrote:
> On Sun, Oct 10, 2010 at 04:13:11PM +0200, Andi Kleen wrote:
> > > > These systems are identified via the presentation of APEI HEST or
> > > > some PCI ID of the host bridge. The PCI ID of host bridge instead of
> > > > DMI ID is used, so that the checking can be done based on the platform
> > > > type instead of motherboard. This should be simpler and sufficient.
> > > > 
> > > > The method to identify the platforms is designed by Andi Kleen.
> > > 
> > > Why not make the new flag also a boot option so you can force it on for
> > > platforms where we don't auto whitelist it.
> > 
> > You can already set it at run time using sysctl.
> > 
> > echo 1 > /proc/sys/kernel/panic_on_unrecovered_nmi
> > echo 1 > /proc/sys/kernel/panic_on_io_nmi
> 
> I get the impression Alan was talking about the option
> 
> unknown_nmi_as_hwerr
> 
> which isn't hooked into a sysctl but will cause machines to panic.

Gah.  disregard this, my brain is tired from a long day.

Cheers,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-09  6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying
  2010-10-10 14:07   ` Alan Cox
@ 2010-10-11 21:20   ` Don Zickus
  2010-10-12  1:10     ` Huang Ying
  2010-10-20  6:12     ` Huang Ying
  1 sibling, 2 replies; 44+ messages in thread
From: Don Zickus @ 2010-10-11 21:20 UTC (permalink / raw)
  To: Huang Ying
  Cc: Ingo Molnar, H. Peter Anvin, linux-kernel, Andi Kleen,
	Robert Richter

On Sat, Oct 09, 2010 at 02:49:46PM +0800, Huang Ying wrote:
> In general, unknown NMI is used by hardware and firmware to notify
> fatal hardware errors to OS. So the Linux should treat unknown NMI as
> hardware error and go panic upon unknown NMI for better error
> containment.
> 
> But there are some broken hardware, which will generate unknown NMI
> not for hardware error. To support these machines, a white list
> mechanism is provided to treat unknown NMI as hardware error only on
> some known working system.
> 
> These systems are identified via the presentation of APEI HEST or
> some PCI ID of the host bridge. The PCI ID of host bridge instead of
> DMI ID is used, so that the checking can be done based on the platform
> type instead of motherboard. This should be simpler and sufficient.
> 
> The method to identify the platforms is designed by Andi Kleen.

I don't have any major problems with the other patches in the patch
series.  In fact I would like to get them committed somewhere, so we can
continue building on them.

> @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason,
>  	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
>  			NOTIFY_STOP)
>  		return;
> +	/*
> +	 * On some platforms, hardware errors may be notified via
> +	 * unknown NMI
> +	 */
> +	if (unknown_nmi_as_hwerr)
> +		panic(
> +		"NMI for hardware error without error record: Not continuing\n"
> +		"Please check BIOS/BMC log for further information.");
> +
>  #ifdef CONFIG_MCA
>  	/*
>  	 * Might actually be able to figure out what the guilty party

The only quirk I have left is the above piece, which is basically a
philosophy difference with Robert and myself.  Where we believe it should
be on the die_chain and Andi and yourself would like to see it explicitly
called out.

If we move to a new notifier chain, like we discussed in another thread,
would you guys be willing to move this into that new notifier chain or is
your argument still going to stand?

Cheers,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-11 21:20   ` Don Zickus
@ 2010-10-12  1:10     ` Huang Ying
  2010-10-20  6:12     ` Huang Ying
  1 sibling, 0 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-12  1:10 UTC (permalink / raw)
  To: Don Zickus
  Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org,
	Andi Kleen, Robert Richter

On Tue, 2010-10-12 at 05:20 +0800, Don Zickus wrote:
> On Sat, Oct 09, 2010 at 02:49:46PM +0800, Huang Ying wrote:
> > In general, unknown NMI is used by hardware and firmware to notify
> > fatal hardware errors to OS. So the Linux should treat unknown NMI as
> > hardware error and go panic upon unknown NMI for better error
> > containment.
> > 
> > But there are some broken hardware, which will generate unknown NMI
> > not for hardware error. To support these machines, a white list
> > mechanism is provided to treat unknown NMI as hardware error only on
> > some known working system.
> > 
> > These systems are identified via the presentation of APEI HEST or
> > some PCI ID of the host bridge. The PCI ID of host bridge instead of
> > DMI ID is used, so that the checking can be done based on the platform
> > type instead of motherboard. This should be simpler and sufficient.
> > 
> > The method to identify the platforms is designed by Andi Kleen.
> 
> I don't have any major problems with the other patches in the patch
> series.  In fact I would like to get them committed somewhere, so we can
> continue building on them.

Thanks.

> > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason,
> >  	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
> >  			NOTIFY_STOP)
> >  		return;
> > +	/*
> > +	 * On some platforms, hardware errors may be notified via
> > +	 * unknown NMI
> > +	 */
> > +	if (unknown_nmi_as_hwerr)
> > +		panic(
> > +		"NMI for hardware error without error record: Not continuing\n"
> > +		"Please check BIOS/BMC log for further information.");
> > +
> >  #ifdef CONFIG_MCA
> >  	/*
> >  	 * Might actually be able to figure out what the guilty party
> 
> The only quirk I have left is the above piece, which is basically a
> philosophy difference with Robert and myself.  Where we believe it should
> be on the die_chain and Andi and yourself would like to see it explicitly
> called out.
> 
> If we move to a new notifier chain, like we discussed in another thread,
> would you guys be willing to move this into that new notifier chain or is
> your argument still going to stand?

Perhaps I will not move this into that new notifier chain. If you want
to do that, feel free to pick it up and change it as you will.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-11 21:20   ` Don Zickus
  2010-10-12  1:10     ` Huang Ying
@ 2010-10-20  6:12     ` Huang Ying
  2010-10-20 14:15       ` Don Zickus
  1 sibling, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-20  6:12 UTC (permalink / raw)
  To: Don Zickus
  Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org,
	Andi Kleen, Robert Richter

Hi, Don,

On Tue, 2010-10-12 at 05:20 +0800, Don Zickus wrote:
> > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason,
> >  	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
> >  			NOTIFY_STOP)
> >  		return;
> > +	/*
> > +	 * On some platforms, hardware errors may be notified via
> > +	 * unknown NMI
> > +	 */
> > +	if (unknown_nmi_as_hwerr)
> > +		panic(
> > +		"NMI for hardware error without error record: Not continuing\n"
> > +		"Please check BIOS/BMC log for further information.");
> > +
> >  #ifdef CONFIG_MCA
> >  	/*
> >  	 * Might actually be able to figure out what the guilty party
> 
> The only quirk I have left is the above piece, which is basically a
> philosophy difference with Robert and myself.  Where we believe it should
> be on the die_chain and Andi and yourself would like to see it explicitly
> called out.

After some more thought, I found this is different from DIE_NMI and
DIE_NMI_IPI case. I think the code added is for general unknown NMI
processing instead of a device driver. What we do is not to add special
processing for some devices, but treat unknown NMI as hardware error
notification in general and use a white list to deal with broken
hardware and stone age machine. Do you agree?

If so, it should not be turned into a notifier block unless you want to
turn all general unknown NMI processing code into a notifier block.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-20  6:12     ` Huang Ying
@ 2010-10-20 14:15       ` Don Zickus
  2010-10-21  1:14         ` Huang Ying
  0 siblings, 1 reply; 44+ messages in thread
From: Don Zickus @ 2010-10-20 14:15 UTC (permalink / raw)
  To: Huang Ying
  Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org,
	Andi Kleen, Robert Richter, peterz

On Wed, Oct 20, 2010 at 02:12:37PM +0800, Huang Ying wrote:
> Hi, Don,
> 
> On Tue, 2010-10-12 at 05:20 +0800, Don Zickus wrote:
> > > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason,
> > >  	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
> > >  			NOTIFY_STOP)
> > >  		return;
> > > +	/*
> > > +	 * On some platforms, hardware errors may be notified via
> > > +	 * unknown NMI
> > > +	 */
> > > +	if (unknown_nmi_as_hwerr)
> > > +		panic(
> > > +		"NMI for hardware error without error record: Not continuing\n"
> > > +		"Please check BIOS/BMC log for further information.");
> > > +
> > >  #ifdef CONFIG_MCA
> > >  	/*
> > >  	 * Might actually be able to figure out what the guilty party
> > 
> > The only quirk I have left is the above piece, which is basically a
> > philosophy difference with Robert and myself.  Where we believe it should
> > be on the die_chain and Andi and yourself would like to see it explicitly
> > called out.
> 
> After some more thought, I found this is different from DIE_NMI and
> DIE_NMI_IPI case. I think the code added is for general unknown NMI
> processing instead of a device driver. What we do is not to add special
> processing for some devices, but treat unknown NMI as hardware error
> notification in general and use a white list to deal with broken
> hardware and stone age machine. Do you agree?
> 
> If so, it should not be turned into a notifier block unless you want to
> turn all general unknown NMI processing code into a notifier block.

Well, yes I actually do, mainly to keep the code simpler.  But also, after
having a conversation with someone yesterday, realized that unknown NMIs
are dealt with on a platform level and not a chipset level.

The reason I say that is some companies, like HP, have a special driver
hpwdt that they want to run in the case of an unknown NMI.  They don't
care about HEST or the other stuff, they want their BIOS call to take care
of it.  So now that hack has to be put into notifier somewhere.

I can only imagine Dell trying to do something similar as a value add.

To me it just makes sense to setup all the HEST stuff as default notifier
blocks and then have platform specific drivers register on top of them
(using the priority scheme).  This to me gives everyone flexibility on how
to handle the unknown NMIs.

Thoughts?

Cheers,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-20 14:15       ` Don Zickus
@ 2010-10-21  1:14         ` Huang Ying
  2010-10-21  2:31           ` Don Zickus
  0 siblings, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-21  1:14 UTC (permalink / raw)
  To: Don Zickus
  Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org,
	Andi Kleen, Robert Richter, peterz@infradead.org

On Wed, 2010-10-20 at 22:15 +0800, Don Zickus wrote:
> On Wed, Oct 20, 2010 at 02:12:37PM +0800, Huang Ying wrote:
> > Hi, Don,
> > 
> > On Tue, 2010-10-12 at 05:20 +0800, Don Zickus wrote:
> > > > @@ -366,6 +368,15 @@ unknown_nmi_error(unsigned char reason,
> > > >  	if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) ==
> > > >  			NOTIFY_STOP)
> > > >  		return;
> > > > +	/*
> > > > +	 * On some platforms, hardware errors may be notified via
> > > > +	 * unknown NMI
> > > > +	 */
> > > > +	if (unknown_nmi_as_hwerr)
> > > > +		panic(
> > > > +		"NMI for hardware error without error record: Not continuing\n"
> > > > +		"Please check BIOS/BMC log for further information.");
> > > > +
> > > >  #ifdef CONFIG_MCA
> > > >  	/*
> > > >  	 * Might actually be able to figure out what the guilty party
> > > 
> > > The only quirk I have left is the above piece, which is basically a
> > > philosophy difference with Robert and myself.  Where we believe it should
> > > be on the die_chain and Andi and yourself would like to see it explicitly
> > > called out.
> > 
> > After some more thought, I found this is different from DIE_NMI and
> > DIE_NMI_IPI case. I think the code added is for general unknown NMI
> > processing instead of a device driver. What we do is not to add special
> > processing for some devices, but treat unknown NMI as hardware error
> > notification in general and use a white list to deal with broken
> > hardware and stone age machine. Do you agree?
> > 
> > If so, it should not be turned into a notifier block unless you want to
> > turn all general unknown NMI processing code into a notifier block.
> 
> Well, yes I actually do, mainly to keep the code simpler.  But also, after
> having a conversation with someone yesterday, realized that unknown NMIs
> are dealt with on a platform level and not a chipset level.

But there is some general rules for unknown NMI. We think unknown NMI is
hardware error notification on all systems except systems with broken
hardware or software bugs, stone age machines. Do you agree with that?

> The reason I say that is some companies, like HP, have a special driver
> hpwdt that they want to run in the case of an unknown NMI.  They don't
> care about HEST or the other stuff, they want their BIOS call to take care
> of it.  So now that hack has to be put into notifier somewhere.

Yes. I found that during NMI handler development. It sits in a notifier
chain and in a driver. hpwdt uses unknown NMI for watchdog timeout
notification, it is a platform feature and should be implemented in a
driver. But we want to implement a general default unknown NMI
processing logic, not do that for some specific platform or chipset.

> I can only imagine Dell trying to do something similar as a value add.
> 
> To me it just makes sense to setup all the HEST stuff as default notifier
> blocks and then have platform specific drivers register on top of them
> (using the priority scheme).  This to me gives everyone flexibility on how
> to handle the unknown NMIs.

Yes. HEST code will be in a driver and will register a notifier block to
do its work.

> Thoughts?

But the code in this patch is not for HEST. (HEST is only used to
implement the white list). I think the code is for a general standard
feature. I don't want to add HEST processing here.

Do you think it should be a general rule to treat all unknown NMI as
hardware error notification except some broken hardware and stone age
machines?

If so, this patch add that general logic to the general NMI handling
code. It is not for specific hardware. The HEST and PCI ID table are
used to implement a white list to deal with the broken hardware and
stone age machines.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-21  1:14         ` Huang Ying
@ 2010-10-21  2:31           ` Don Zickus
  2010-10-21  5:17             ` Huang Ying
  0 siblings, 1 reply; 44+ messages in thread
From: Don Zickus @ 2010-10-21  2:31 UTC (permalink / raw)
  To: Huang Ying
  Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org,
	Andi Kleen, Robert Richter, peterz@infradead.org

On Thu, Oct 21, 2010 at 09:14:03AM +0800, Huang Ying wrote:
> > > DIE_NMI_IPI case. I think the code added is for general unknown NMI
> > > processing instead of a device driver. What we do is not to add special
> > > processing for some devices, but treat unknown NMI as hardware error
> > > notification in general and use a white list to deal with broken
> > > hardware and stone age machine. Do you agree?
> > > 
> > > If so, it should not be turned into a notifier block unless you want to
> > > turn all general unknown NMI processing code into a notifier block.
> > 
> > Well, yes I actually do, mainly to keep the code simpler.  But also, after
> > having a conversation with someone yesterday, realized that unknown NMIs
> > are dealt with on a platform level and not a chipset level.
> 
> But there is some general rules for unknown NMI. We think unknown NMI is
> hardware error notification on all systems except systems with broken
> hardware or software bugs, stone age machines. Do you agree with that?

Nope.  In my experiences, most of our customers are still running
pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or
bad firmware (at least in the bugzillas I deal with).

I would be excited if I was getting some sort of hardware error
notification, because then I would know where the NMI might be coming
from.  Instead, I have customers pull out cards out of their machine or
instrument their kernel to see which device is causing the problem.  Slow
and painful.

> 
> > The reason I say that is some companies, like HP, have a special driver
> > hpwdt that they want to run in the case of an unknown NMI.  They don't
> > care about HEST or the other stuff, they want their BIOS call to take care
> > of it.  So now that hack has to be put into notifier somewhere.
> 
> Yes. I found that during NMI handler development. It sits in a notifier
> chain and in a driver. hpwdt uses unknown NMI for watchdog timeout
> notification, it is a platform feature and should be implemented in a

Actually no it doesn't, the name HP watchdog is deceiving.  The intent HP
has with that handler is any unknown NMI needs to be trapped by that
driver so it can do an SMI call, which tries to source the NMI and save
its result in NVRAM.  Then it jumps back to the kernel for a reboot.

I have been dealing with HP for 3 years with that driver, I have gotten
quite familiar with the NMI part of it. :-)

> driver. But we want to implement a general default unknown NMI
> processing logic, not do that for some specific platform or chipset.
> 
> > I can only imagine Dell trying to do something similar as a value add.
> > 
> > To me it just makes sense to setup all the HEST stuff as default notifier
> > blocks and then have platform specific drivers register on top of them
> > (using the priority scheme).  This to me gives everyone flexibility on how
> > to handle the unknown NMIs.
> 
> Yes. HEST code will be in a driver and will register a notifier block to
> do its work.
> 
> > Thoughts?
> 
> But the code in this patch is not for HEST. (HEST is only used to
> implement the white list). I think the code is for a general standard
> feature. I don't want to add HEST processing here.
> 
> Do you think it should be a general rule to treat all unknown NMI as
> hardware error notification except some broken hardware and stone age
> machines?

I guess my impression of what unknown NMIs should do might be a little
different than yours (not saying my view is a correct one, just the view I
have when I answer your questions).

(after spending more time thinking about this while looking at nmi
priorities)

I thought anything that registers with a notifier and cases off of
DIE_NMI, should be a driver/subsystem that expects and _can properly
handle_ an NMI.  The expectation is that it can successfully detect the
NMI is its own and return a NOTIFY_STOP if it is (after processing it).
[I excluded DIE_NMI_IPI because of PeterZ's comments]

Whereas DIE_NMIUNKNOWN would be for drivers/subsystem that can probably
detect the NMI is its own but can't do anything but panic or drivers that
don't know but want to handle the panic in their own special way (ie
hpwdt, or sgi's x2apic_uv_x.c where they like to use nmi_buttons to debug
stalls or hangs but don't want to panic).

And if noone wants to attempt to handle it after that, then call
unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN)).

So to me hardware error notification, would just detect what chipset it is
on and if it is something that matches its whitelist, register and use
DIE_NMIUNKNOWN.  unknown_nmi_error() would just continue to be this
general and vague thing that on more modern systems will likely never be
called.

Anyway that is how I viewed everything or how I wouldn't mind seeing it
implemented.  Then again, my view could be completely wrong.  :-)

I'll just rely on majority concensus on somebody's view.

Cheers,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-21  2:31           ` Don Zickus
@ 2010-10-21  5:17             ` Huang Ying
  2010-10-21 14:10               ` Don Zickus
  0 siblings, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-21  5:17 UTC (permalink / raw)
  To: Don Zickus
  Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org,
	Andi Kleen, Robert Richter, peterz@infradead.org

On Thu, 2010-10-21 at 10:31 +0800, Don Zickus wrote:
> On Thu, Oct 21, 2010 at 09:14:03AM +0800, Huang Ying wrote:
> > > > DIE_NMI_IPI case. I think the code added is for general unknown NMI
> > > > processing instead of a device driver. What we do is not to add special
> > > > processing for some devices, but treat unknown NMI as hardware error
> > > > notification in general and use a white list to deal with broken
> > > > hardware and stone age machine. Do you agree?
> > > > 
> > > > If so, it should not be turned into a notifier block unless you want to
> > > > turn all general unknown NMI processing code into a notifier block.
> > > 
> > > Well, yes I actually do, mainly to keep the code simpler.  But also, after
> > > having a conversation with someone yesterday, realized that unknown NMIs
> > > are dealt with on a platform level and not a chipset level.
> > 
> > But there is some general rules for unknown NMI. We think unknown NMI is
> > hardware error notification on all systems except systems with broken
> > hardware or software bugs, stone age machines. Do you agree with that?
> 
> Nope.  In my experiences, most of our customers are still running
> pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or
> bad firmware (at least in the bugzillas I deal with).

It seems that we have different point of view for reason of unknown NMI.
Should broken hardware be seen as hardware error?

As far as I know, Windows treat unknown NMI as hardware errors. Although
we are programming for Linux not Windows. Many hardware are built for
Windows.

> I would be excited if I was getting some sort of hardware error
> notification, because then I would know where the NMI might be coming
> from.  Instead, I have customers pull out cards out of their machine or
> instrument their kernel to see which device is causing the problem.  Slow
> and painful.

Hope new machine will have better hardware error reporting. :)

> > > The reason I say that is some companies, like HP, have a special driver
> > > hpwdt that they want to run in the case of an unknown NMI.  They don't
> > > care about HEST or the other stuff, they want their BIOS call to take care
> > > of it.  So now that hack has to be put into notifier somewhere.
> > 
> > Yes. I found that during NMI handler development. It sits in a notifier
> > chain and in a driver. hpwdt uses unknown NMI for watchdog timeout
> > notification, it is a platform feature and should be implemented in a
> 
> Actually no it doesn't, the name HP watchdog is deceiving.  The intent HP
> has with that handler is any unknown NMI needs to be trapped by that
> driver so it can do an SMI call, which tries to source the NMI and save
> its result in NVRAM.  Then it jumps back to the kernel for a reboot.
> 
> I have been dealing with HP for 3 years with that driver, I have gotten
> quite familiar with the NMI part of it. :-)

It seems that I am fooled by the naming :). Originally I had thought
that if someone does not write the misc file (notify firmware) in time,
NMI will be triggered as a watchdog.

> > driver. But we want to implement a general default unknown NMI
> > processing logic, not do that for some specific platform or chipset.
> > 
> > > I can only imagine Dell trying to do something similar as a value add.
> > > 
> > > To me it just makes sense to setup all the HEST stuff as default notifier
> > > blocks and then have platform specific drivers register on top of them
> > > (using the priority scheme).  This to me gives everyone flexibility on how
> > > to handle the unknown NMIs.
> > 
> > Yes. HEST code will be in a driver and will register a notifier block to
> > do its work.
> > 
> > > Thoughts?
> > 
> > But the code in this patch is not for HEST. (HEST is only used to
> > implement the white list). I think the code is for a general standard
> > feature. I don't want to add HEST processing here.
> > 
> > Do you think it should be a general rule to treat all unknown NMI as
> > hardware error notification except some broken hardware and stone age
> > machines?
> 
> I guess my impression of what unknown NMIs should do might be a little
> different than yours (not saying my view is a correct one, just the view I
> have when I answer your questions).

Yes. I think so too. The reply following is my understanding for that.
My understanding may be not correct too. :)

> (after spending more time thinking about this while looking at nmi
> priorities)
> 
> I thought anything that registers with a notifier and cases off of
> DIE_NMI, should be a driver/subsystem that expects and _can properly
> handle_ an NMI.  The expectation is that it can successfully detect the
> NMI is its own and return a NOTIFY_STOP if it is (after processing it).
> [I excluded DIE_NMI_IPI because of PeterZ's comments]

I think notifier registered on DIE_NMI can panic too. Why prevent it?

> Whereas DIE_NMIUNKNOWN would be for drivers/subsystem that can probably
> detect the NMI is its own but can't do anything but panic or drivers that
> don't know but want to handle the panic in their own special way (ie
> hpwdt, or sgi's x2apic_uv_x.c where they like to use nmi_buttons to debug
> stalls or hangs but don't want to panic).

I think drivers want to handle the unknown NMI in their own special way
are the expected users of DIE_NMIUNKNOWN. While drivers that can detect
the NMI is its own and will go panic should be registered on DIE_NMI.

> And if noone wants to attempt to handle it after that, then call
> unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN)).

I think unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN) is the
general default operation for unknown NMI. DIE_NMIUNKNOWN is for drivers
processing after determining the NMI is unknown and before the general
default operation.

> So to me hardware error notification, would just detect what chipset it is
> on and if it is something that matches its whitelist, register and use
> DIE_NMIUNKNOWN.  unknown_nmi_error() would just continue to be this
> general and vague thing that on more modern systems will likely never be
> called.

The difference between us is that I think it should be a general rule to
treat unknown NMI as hardware error notification, while you think it
should be in a driver for some special hardware. That is, it is general
or special?

> Anyway that is how I viewed everything or how I wouldn't mind seeing it
> implemented.  Then again, my view could be completely wrong.  :-)
> 
> I'll just rely on majority concensus on somebody's view.

That is fair. Thanks.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-21  5:17             ` Huang Ying
@ 2010-10-21 14:10               ` Don Zickus
  2010-10-21 15:45                 ` Andi Kleen
  0 siblings, 1 reply; 44+ messages in thread
From: Don Zickus @ 2010-10-21 14:10 UTC (permalink / raw)
  To: Huang Ying
  Cc: Ingo Molnar, H. Peter Anvin, linux-kernel@vger.kernel.org,
	Andi Kleen, Robert Richter, peterz@infradead.org

On Thu, Oct 21, 2010 at 01:17:31PM +0800, Huang Ying wrote:
> > > But there is some general rules for unknown NMI. We think unknown NMI is
> > > hardware error notification on all systems except systems with broken
> > > hardware or software bugs, stone age machines. Do you agree with that?
> > 
> > Nope.  In my experiences, most of our customers are still running
> > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or
> > bad firmware (at least in the bugzillas I deal with).
> 
> It seems that we have different point of view for reason of unknown NMI.
> Should broken hardware be seen as hardware error?

Well, do you have an alternative way to handle broken hardware?  Broken
hardware has generated NMIs, sometimes if I am lucky SERRs.  The ones that
generate SERRs can be filtered through a different path, but what about
the ones that don't?

I understand you are trying to make a distinction between the two, but I
don't understand how you plan on handling the different scenarios.  That's
probably part of my confusion.

> 
> As far as I know, Windows treat unknown NMI as hardware errors. Although
> we are programming for Linux not Windows. Many hardware are built for
> Windows.

I was told Windows treats _any_ NMI as hardware errors, not just unknown
ones. :-)

> 
> > I would be excited if I was getting some sort of hardware error
> > notification, because then I would know where the NMI might be coming
> > from.  Instead, I have customers pull out cards out of their machine or
> > instrument their kernel to see which device is causing the problem.  Slow
> > and painful.
> 
> Hope new machine will have better hardware error reporting. :)

Me too.

<snip>

> > > 
> > > But the code in this patch is not for HEST. (HEST is only used to
> > > implement the white list). I think the code is for a general standard
> > > feature. I don't want to add HEST processing here.
> > > 
> > > Do you think it should be a general rule to treat all unknown NMI as
> > > hardware error notification except some broken hardware and stone age
> > > machines?
> > 
> > I guess my impression of what unknown NMIs should do might be a little
> > different than yours (not saying my view is a correct one, just the view I
> > have when I answer your questions).
> 
> Yes. I think so too. The reply following is my understanding for that.
> My understanding may be not correct too. :)
> 
> > (after spending more time thinking about this while looking at nmi
> > priorities)
> > 
> > I thought anything that registers with a notifier and cases off of
> > DIE_NMI, should be a driver/subsystem that expects and _can properly
> > handle_ an NMI.  The expectation is that it can successfully detect the
> > NMI is its own and return a NOTIFY_STOP if it is (after processing it).
> > [I excluded DIE_NMI_IPI because of PeterZ's comments]
> 
> I think notifier registered on DIE_NMI can panic too. Why prevent it?

True, I guess as long as the handler can determine the NMI is its own, I
can't see why not (/me realizes that is what the nmi watchdog does :-) ).

> 
> > Whereas DIE_NMIUNKNOWN would be for drivers/subsystem that can probably
> > detect the NMI is its own but can't do anything but panic or drivers that
> > don't know but want to handle the panic in their own special way (ie
> > hpwdt, or sgi's x2apic_uv_x.c where they like to use nmi_buttons to debug
> > stalls or hangs but don't want to panic).
> 
> I think drivers want to handle the unknown NMI in their own special way
> are the expected users of DIE_NMIUNKNOWN. While drivers that can detect
> the NMI is its own and will go panic should be registered on DIE_NMI.

Ok.  I can agree with this.

> 
> > And if noone wants to attempt to handle it after that, then call
> > unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN)).
> 
> I think unknown_nmi_error() (minus the notify_die(DIE_NMIUNKNOWN) is the
> general default operation for unknown NMI. DIE_NMIUNKNOWN is for drivers
> processing after determining the NMI is unknown and before the general
> default operation.

Yes.

> 
> > So to me hardware error notification, would just detect what chipset it is
> > on and if it is something that matches its whitelist, register and use
> > DIE_NMIUNKNOWN.  unknown_nmi_error() would just continue to be this
> > general and vague thing that on more modern systems will likely never be
> > called.
> 
> The difference between us is that I think it should be a general rule to
> treat unknown NMI as hardware error notification, while you think it
> should be in a driver for some special hardware. That is, it is general
> or special?

Probably.  I guess I don't fully understand your definition of hardware
error notification so I can't tell if we are arguing or agreeing (but
using different words).

How do you envision the code looking like with hardware error
notification?

I just wanted to keep the code in traps.c simple and clean and not
constantly add new #ifdefs every time Intel came up with an interesting
way to determine a hardware error condition.

For example, I am not the biggest fan of seeing stuff like edac or mce
inside the code and would prefer them to use notifiers.  But that is just
my opinion.

If you have a framework that you wanted to propose that could encapsulate
an ever growing class of hardware error notifications, I would be
interested.

Anyway, perhaps providing some examples about what you had in mind and how
it would scale going forward might help me understand what you are looking
to do.

Cheers,
Don


^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-21 14:10               ` Don Zickus
@ 2010-10-21 15:45                 ` Andi Kleen
  2010-10-22  1:49                   ` Don Zickus
  0 siblings, 1 reply; 44+ messages in thread
From: Andi Kleen @ 2010-10-21 15:45 UTC (permalink / raw)
  To: Don Zickus
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Andi Kleen, Robert Richter,
	peterz@infradead.org

On Thu, Oct 21, 2010 at 10:10:02AM -0400, Don Zickus wrote:
> On Thu, Oct 21, 2010 at 01:17:31PM +0800, Huang Ying wrote:
> > > > But there is some general rules for unknown NMI. We think unknown NMI is
> > > > hardware error notification on all systems except systems with broken
> > > > hardware or software bugs, stone age machines. Do you agree with that?
> > > 
> > > Nope.  In my experiences, most of our customers are still running
> > > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or
> > > bad firmware (at least in the bugzillas I deal with).
> > 
> > It seems that we have different point of view for reason of unknown NMI.
> > Should broken hardware be seen as hardware error?
> 
> Well, do you have an alternative way to handle broken hardware?  Broken
> hardware has generated NMIs, sometimes if I am lucky SERRs.  The ones that
> generate SERRs can be filtered through a different path, but what about
> the ones that don't?
> 

Don, AFAIK you're saying the same thing as Ying: an unknown NMI is 
a hardware error.

The reason the hardware does that is that it wants to tell us:

"I lost track of an error. There is corrupted data somewhere in the system.
Please stop, don't do anything that could consume that data. S.O.S."

The correct answer for that is panic.

The only problem we're trying to solve here is to distingush
other cases, like when software uses NMIs for something else
or there may be other cases

> > 
> > As far as I know, Windows treat unknown NMI as hardware errors. Although
> > we are programming for Linux not Windows. Many hardware are built for
> > Windows.
> 
> I was told Windows treats _any_ NMI as hardware errors, not just unknown
> ones. :-)

I don't know details about Windows' code here, but I assume that 
they have a way to hook into the NMI too, otherwise drivers like the HP
watchdog wouldn't work on Windows. But yes we know they shut down.

Essentially the hardware (and the BIOS) is designed 
under the assumption that the OS behaves like Windows for this.

In my long experience in making Linux work on all kinds
of hardware I learned very firmly that trying to drive PC hardware 
in a different way than Windows does is usually a bad idea.

The approach of Ying's patch kit was really to make
the behaviour more like Windows.

> Probably.  I guess I don't fully understand your definition of hardware
> error notification so I can't tell if we are arguing or agreeing (but
> using different words).
> 
> How do you envision the code looking like with hardware error
> notification?
> 
> I just wanted to keep the code in traps.c simple and clean and not
> constantly add new #ifdefs every time Intel came up with an interesting
> way to determine a hardware error condition.
> 
> For example, I am not the biggest fan of seeing stuff like edac or mce
> inside the code and would prefer them to use notifiers.  But that is just
> my opinion.

mce is only for testing anyways, "real" mce doesn't need vector 0.
So basically it's just a fancy "smp_call_nmi()" thingy.

EDAC shouldn't need it either, if it's does it's some probably
broken legacy behaviour.

> If you have a framework that you wanted to propose that could encapsulate
> an ever growing class of hardware error notifications, I would be
> interested

The trend in error reporting is away from NMIs and towards machine checks.
My bunch is that NMI reporting is more or less a legacy problem, so we have
to deal with what is there today.

I don't think you need to worry about a lot more hardware NMI sources.

The only thing that we need to worry about is more software NMI sources,
these unfortunately like to multiply.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-21 15:45                 ` Andi Kleen
@ 2010-10-22  1:49                   ` Don Zickus
  2010-10-22  2:05                     ` Huang Ying
  2010-10-22  9:24                     ` Andi Kleen
  0 siblings, 2 replies; 44+ messages in thread
From: Don Zickus @ 2010-10-22  1:49 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter,
	peterz@infradead.org

On Thu, Oct 21, 2010 at 05:45:44PM +0200, Andi Kleen wrote:
> On Thu, Oct 21, 2010 at 10:10:02AM -0400, Don Zickus wrote:
> > On Thu, Oct 21, 2010 at 01:17:31PM +0800, Huang Ying wrote:
> > > > > But there is some general rules for unknown NMI. We think unknown NMI is
> > > > > hardware error notification on all systems except systems with broken
> > > > > hardware or software bugs, stone age machines. Do you agree with that?
> > > > 
> > > > Nope.  In my experiences, most of our customers are still running
> > > > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or
> > > > bad firmware (at least in the bugzillas I deal with).
> > > 
> > > It seems that we have different point of view for reason of unknown NMI.
> > > Should broken hardware be seen as hardware error?
> > 
> > Well, do you have an alternative way to handle broken hardware?  Broken
> > hardware has generated NMIs, sometimes if I am lucky SERRs.  The ones that
> > generate SERRs can be filtered through a different path, but what about
> > the ones that don't?
> > 
> 
> Don, AFAIK you're saying the same thing as Ying: an unknown NMI is 
> a hardware error.
> 
> The reason the hardware does that is that it wants to tell us:
> 
> "I lost track of an error. There is corrupted data somewhere in the system.
> Please stop, don't do anything that could consume that data. S.O.S."
> 
> The correct answer for that is panic.

After re-reading Huang's patch, I am starting to understand what you mean
by broken hardware.  Basically you are trying to distinguish between
legacy systems that were 'broken' in the sense they would randomly send
uknown NMIs for no good reason, hence the 'Dazed and confused' messages
and hardware errors on more modern systems that say, 'Hardware error,
panicing check your BIOS for more info' (or whatever).

So Huang's patch was sort of acting like a switch.  On legacy systems use
'Dazed and confused' for unknown NMIs.  Whereas on whitelisted modern
systems use a more relavant 'Check BIOS for error' message.  Is that
right?

That's why you guys are complaining that registering a die_notifier would
be silly?

> 
> The only problem we're trying to solve here is to distingush
> other cases, like when software uses NMIs for something else
> or there may be other cases

Ok.

> 
> > > 
> > > As far as I know, Windows treat unknown NMI as hardware errors. Although
> > > we are programming for Linux not Windows. Many hardware are built for
> > > Windows.
> > 
> > I was told Windows treats _any_ NMI as hardware errors, not just unknown
> > ones. :-)
> 
> I don't know details about Windows' code here, but I assume that 
> they have a way to hook into the NMI too, otherwise drivers like the HP
> watchdog wouldn't work on Windows. But yes we know they shut down.
> 
> Essentially the hardware (and the BIOS) is designed 
> under the assumption that the OS behaves like Windows for this.
> 
> In my long experience in making Linux work on all kinds
> of hardware I learned very firmly that trying to drive PC hardware 
> in a different way than Windows does is usually a bad idea.

Of course.

> 
> The approach of Ying's patch kit was really to make
> the behaviour more like Windows.

Ok.

> 
> 
> > Probably.  I guess I don't fully understand your definition of hardware
> > error notification so I can't tell if we are arguing or agreeing (but
> > using different words).
> > 
> > How do you envision the code looking like with hardware error
> > notification?
> > 
> > I just wanted to keep the code in traps.c simple and clean and not
> > constantly add new #ifdefs every time Intel came up with an interesting
> > way to determine a hardware error condition.
> > 
> > For example, I am not the biggest fan of seeing stuff like edac or mce
> > inside the code and would prefer them to use notifiers.  But that is just
> > my opinion.
> 
> mce is only for testing anyways, "real" mce doesn't need vector 0.
> So basically it's just a fancy "smp_call_nmi()" thingy.
> 
> EDAC shouldn't need it either, if it's does it's some probably
> broken legacy behaviour.
> 
> > If you have a framework that you wanted to propose that could encapsulate
> > an ever growing class of hardware error notifications, I would be
> > interested
> 
> The trend in error reporting is away from NMIs and towards machine checks.
> My bunch is that NMI reporting is more or less a legacy problem, so we have
> to deal with what is there today.

That's good.

> 
> I don't think you need to worry about a lot more hardware NMI sources.

Well until those machines dominate the marketplace, I'm stuck supporting
those pre-Nahelam boxes with customers that committed to 10 years with
last year's technology.  ;-)

> 
> The only thing that we need to worry about is more software NMI sources,
> these unfortunately like to multiply.

Sure.

Thanks,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-22  1:49                   ` Don Zickus
@ 2010-10-22  2:05                     ` Huang Ying
  2010-10-22  2:56                       ` Don Zickus
  2010-10-22  9:24                     ` Andi Kleen
  1 sibling, 1 reply; 44+ messages in thread
From: Huang Ying @ 2010-10-22  2:05 UTC (permalink / raw)
  To: Don Zickus
  Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter,
	peterz@infradead.org

On Fri, 2010-10-22 at 09:49 +0800, Don Zickus wrote:
> On Thu, Oct 21, 2010 at 05:45:44PM +0200, Andi Kleen wrote:
> > On Thu, Oct 21, 2010 at 10:10:02AM -0400, Don Zickus wrote:
> > > On Thu, Oct 21, 2010 at 01:17:31PM +0800, Huang Ying wrote:
> > > > > > But there is some general rules for unknown NMI. We think unknown NMI is
> > > > > > hardware error notification on all systems except systems with broken
> > > > > > hardware or software bugs, stone age machines. Do you agree with that?
> > > > > 
> > > > > Nope.  In my experiences, most of our customers are still running
> > > > > pre-Nehalem boxes, therefore most unknown NMIs are from broken hardware or
> > > > > bad firmware (at least in the bugzillas I deal with).
> > > > 
> > > > It seems that we have different point of view for reason of unknown NMI.
> > > > Should broken hardware be seen as hardware error?
> > > 
> > > Well, do you have an alternative way to handle broken hardware?  Broken
> > > hardware has generated NMIs, sometimes if I am lucky SERRs.  The ones that
> > > generate SERRs can be filtered through a different path, but what about
> > > the ones that don't?
> > > 
> > 
> > Don, AFAIK you're saying the same thing as Ying: an unknown NMI is 
> > a hardware error.
> > 
> > The reason the hardware does that is that it wants to tell us:
> > 
> > "I lost track of an error. There is corrupted data somewhere in the system.
> > Please stop, don't do anything that could consume that data. S.O.S."
> > 
> > The correct answer for that is panic.
> 
> After re-reading Huang's patch, I am starting to understand what you mean
> by broken hardware.  Basically you are trying to distinguish between
> legacy systems that were 'broken' in the sense they would randomly send
> uknown NMIs for no good reason, hence the 'Dazed and confused' messages
> and hardware errors on more modern systems that say, 'Hardware error,
> panicing check your BIOS for more info' (or whatever).

Yes.

> So Huang's patch was sort of acting like a switch.  On legacy systems use
> 'Dazed and confused' for unknown NMIs.  Whereas on whitelisted modern
> systems use a more relavant 'Check BIOS for error' message.  Is that
> right?

In fact we want to go panic and 'check BIOS for error, contact your
hardware vendor' for all systems. But as you said, there are some
'broken hardware' randomly send unknown NMIs for no good reason. So a
white list is used for them. And not all pre-Nehalem machines are
'broken' in fact.

> That's why you guys are complaining that registering a die_notifier would
> be silly?

I think whether going die_notifier or unknown_nmi_error() depends on it
is general or specific for some hardware. Do you agree with that?

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-22  2:05                     ` Huang Ying
@ 2010-10-22  2:56                       ` Don Zickus
  2010-10-22  5:23                         ` Huang Ying
  0 siblings, 1 reply; 44+ messages in thread
From: Don Zickus @ 2010-10-22  2:56 UTC (permalink / raw)
  To: Huang Ying
  Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter,
	peterz@infradead.org

On Fri, Oct 22, 2010 at 10:05:10AM +0800, Huang Ying wrote:
> > > > Well, do you have an alternative way to handle broken hardware?  Broken
> > > > hardware has generated NMIs, sometimes if I am lucky SERRs.  The ones that
> > > > generate SERRs can be filtered through a different path, but what about
> > > > the ones that don't?
> > > > 
> > > 
> > > Don, AFAIK you're saying the same thing as Ying: an unknown NMI is 
> > > a hardware error.
> > > 
> > > The reason the hardware does that is that it wants to tell us:
> > > 
> > > "I lost track of an error. There is corrupted data somewhere in the system.
> > > Please stop, don't do anything that could consume that data. S.O.S."
> > > 
> > > The correct answer for that is panic.
> > 
> > After re-reading Huang's patch, I am starting to understand what you mean
> > by broken hardware.  Basically you are trying to distinguish between
> > legacy systems that were 'broken' in the sense they would randomly send
> > uknown NMIs for no good reason, hence the 'Dazed and confused' messages
> > and hardware errors on more modern systems that say, 'Hardware error,
> > panicing check your BIOS for more info' (or whatever).
> 
> Yes.
> 
> > So Huang's patch was sort of acting like a switch.  On legacy systems use
> > 'Dazed and confused' for unknown NMIs.  Whereas on whitelisted modern
> > systems use a more relavant 'Check BIOS for error' message.  Is that
> > right?
> 
> In fact we want to go panic and 'check BIOS for error, contact your
> hardware vendor' for all systems. But as you said, there are some
> 'broken hardware' randomly send unknown NMIs for no good reason. So a
> white list is used for them. And not all pre-Nehalem machines are
> 'broken' in fact.

Ok, I think I finally understand what you guys are trying to do.  I also
can't see a problem with it.  Though I think the patch could probably use
some clean up to make it more clear.  Off the top of my head perhaps a
function call that sets the variable unknown_nmi_as_hwerr instead of
setting it explicitly and maybe structuring unknown_nmi() with an if-then
modern-message; else legacy-message; to possibly make it obvious what the
code is trying to acheive.

And yeah I know not all pre-Nehalem machines are broken.  I am usually
sarcastic when I mention that just because being at IDF last year, I got
the impression that pre-Nehalem machines were considered the dark ages.
:-)

I am actually curious to know how many x86_64 machines would be considered
broken?

> 
> > That's why you guys are complaining that registering a die_notifier would
> > be silly?
> 
> I think whether going die_notifier or unknown_nmi_error() depends on it
> is general or specific for some hardware. Do you agree with that?

Well I am hoping the only general case would be the one you want to use
now.  Everything else would be specific and require a die_notifier.  I
mean how many different ways do we want to have a printk/panic in
unknown_nmi()?

Cheers,
Don

^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-22  2:56                       ` Don Zickus
@ 2010-10-22  5:23                         ` Huang Ying
  0 siblings, 0 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-22  5:23 UTC (permalink / raw)
  To: Don Zickus
  Cc: Andi Kleen, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter,
	peterz@infradead.org

On Fri, 2010-10-22 at 10:56 +0800, Don Zickus wrote:
> On Fri, Oct 22, 2010 at 10:05:10AM +0800, Huang Ying wrote:
> > > > > Well, do you have an alternative way to handle broken hardware?  Broken
> > > > > hardware has generated NMIs, sometimes if I am lucky SERRs.  The ones that
> > > > > generate SERRs can be filtered through a different path, but what about
> > > > > the ones that don't?
> > > > > 
> > > > 
> > > > Don, AFAIK you're saying the same thing as Ying: an unknown NMI is 
> > > > a hardware error.
> > > > 
> > > > The reason the hardware does that is that it wants to tell us:
> > > > 
> > > > "I lost track of an error. There is corrupted data somewhere in the system.
> > > > Please stop, don't do anything that could consume that data. S.O.S."
> > > > 
> > > > The correct answer for that is panic.
> > > 
> > > After re-reading Huang's patch, I am starting to understand what you mean
> > > by broken hardware.  Basically you are trying to distinguish between
> > > legacy systems that were 'broken' in the sense they would randomly send
> > > uknown NMIs for no good reason, hence the 'Dazed and confused' messages
> > > and hardware errors on more modern systems that say, 'Hardware error,
> > > panicing check your BIOS for more info' (or whatever).
> > 
> > Yes.
> > 
> > > So Huang's patch was sort of acting like a switch.  On legacy systems use
> > > 'Dazed and confused' for unknown NMIs.  Whereas on whitelisted modern
> > > systems use a more relavant 'Check BIOS for error' message.  Is that
> > > right?
> > 
> > In fact we want to go panic and 'check BIOS for error, contact your
> > hardware vendor' for all systems. But as you said, there are some
> > 'broken hardware' randomly send unknown NMIs for no good reason. So a
> > white list is used for them. And not all pre-Nehalem machines are
> > 'broken' in fact.
> 
> Ok, I think I finally understand what you guys are trying to do.  I also
> can't see a problem with it.  

Thanks.

> Though I think the patch could probably use
> some clean up to make it more clear.  Off the top of my head perhaps a
> function call that sets the variable unknown_nmi_as_hwerr instead of
> setting it explicitly and maybe structuring unknown_nmi() with an if-then
> modern-message; else legacy-message; to possibly make it obvious what the
> code is trying to acheive.

OK. Will do it.

> And yeah I know not all pre-Nehalem machines are broken.  I am usually
> sarcastic when I mention that just because being at IDF last year, I got
> the impression that pre-Nehalem machines were considered the dark ages.
> :-)

Haha

> I am actually curious to know how many x86_64 machines would be considered
> broken?

Don't know either.

> > > That's why you guys are complaining that registering a die_notifier would
> > > be silly?
> > 
> > I think whether going die_notifier or unknown_nmi_error() depends on it
> > is general or specific for some hardware. Do you agree with that?
> 
> Well I am hoping the only general case would be the one you want to use
> now.  Everything else would be specific and require a die_notifier.  I
> mean how many different ways do we want to have a printk/panic in
> unknown_nmi()?

I think this one should be the only one for general unknown NMI
processing.

Best Regards,
Huang Ying



^ permalink raw reply	[flat|nested] 44+ messages in thread

* Re: [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error
  2010-10-22  1:49                   ` Don Zickus
  2010-10-22  2:05                     ` Huang Ying
@ 2010-10-22  9:24                     ` Andi Kleen
  1 sibling, 0 replies; 44+ messages in thread
From: Andi Kleen @ 2010-10-22  9:24 UTC (permalink / raw)
  To: Don Zickus
  Cc: Andi Kleen, Huang Ying, Ingo Molnar, H. Peter Anvin,
	linux-kernel@vger.kernel.org, Robert Richter,
	peterz@infradead.org

On Thu, Oct 21, 2010 at 09:49:55PM -0400, Don Zickus wrote:
> After re-reading Huang's patch, I am starting to understand what you mean
> by broken hardware.  Basically you are trying to distinguish between
> legacy systems that were 'broken' in the sense they would randomly send
> uknown NMIs for no good reason, hence the 'Dazed and confused' messages
> and hardware errors on more modern systems that say, 'Hardware error,
> panicing check your BIOS for more info' (or whatever).

Yes that's it.

Unfortunately there are some cases where the BIOS lost it either,
so the fallback has to be panic (at least for the modern boxes)

> 
> So Huang's patch was sort of acting like a switch.  On legacy systems use
> 'Dazed and confused' for unknown NMIs.  Whereas on whitelisted modern
> systems use a more relavant 'Check BIOS for error' message.  Is that
> right?

Yes.

> > I don't think you need to worry about a lot more hardware NMI sources.
> 
> Well until those machines dominate the marketplace, I'm stuck supporting
> those pre-Nahelam boxes with customers that committed to 10 years with
> last year's technology.  ;-)

I should clarify that the NMI model I described long predates Nehalem.
If you assume 3-5 years deprecation cycles on servers it should be pretty
much universal in this space. 

The HEDT detection was a proposed way to detect that, because most
of these systems should have HEDT.

The older machines still need to be supported, but it's ok to
just behave the same as today on them, no need for great improvements
here.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 44+ messages in thread

* [PATCH -v3 6/6] x86, NMI, Remove do_nmi_callback logic
  2010-10-09  6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying
                   ` (3 preceding siblings ...)
  2010-10-09  6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying
@ 2010-10-09  6:49 ` Huang Ying
  4 siblings, 0 replies; 44+ messages in thread
From: Huang Ying @ 2010-10-09  6:49 UTC (permalink / raw)
  To: Don Zickus, Ingo Molnar, H. Peter Anvin
  Cc: linux-kernel, Andi Kleen, Robert Richter, Huang Ying

do_nmi_callback related logic is removed, because it is useless, just
adds code complexity.

unknown_nmi_panic sysctl is reserved to keep kernel ABI unchanged.

Signed-off-by: Huang Ying <ying.huang@intel.com>
---
 arch/x86/include/asm/nmi.h    |   10 +++++++++-
 arch/x86/kernel/apic/hw_nmi.c |    1 -
 arch/x86/kernel/apic/nmi.c    |   29 +----------------------------
 arch/x86/kernel/traps.c       |   16 ++++++++++------
 4 files changed, 20 insertions(+), 36 deletions(-)

--- a/arch/x86/include/asm/nmi.h
+++ b/arch/x86/include/asm/nmi.h
@@ -30,9 +30,17 @@ extern void setup_apic_nmi_watchdog(void
 extern void stop_apic_nmi_watchdog(void *);
 extern void disable_timer_nmi_watchdog(void);
 extern void enable_timer_nmi_watchdog(void);
-extern int nmi_watchdog_tick(struct pt_regs *regs, unsigned reason);
 extern void cpu_nmi_set_wd_enabled(void);
 
+#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
+extern int nmi_watchdog_tick(struct pt_regs *regs);
+#else
+static inline int nmi_watchdog_tick(struct pt_regs *regs)
+{
+	return 0;
+}
+#endif
+
 extern atomic_t nmi_active;
 extern unsigned int nmi_watchdog;
 #define NMI_NONE	0
--- a/arch/x86/kernel/apic/hw_nmi.c
+++ b/arch/x86/kernel/apic/hw_nmi.c
@@ -100,7 +100,6 @@ void acpi_nmi_disable(void) { return; }
 #endif
 atomic_t nmi_active = ATOMIC_INIT(0);           /* oprofile uses this */
 EXPORT_SYMBOL(nmi_active);
-int unknown_nmi_panic;
 void cpu_nmi_set_wd_enabled(void) { return; }
 void stop_apic_nmi_watchdog(void *unused) { return; }
 void setup_apic_nmi_watchdog(void *unused) { return; }
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
@@ -37,7 +37,6 @@
 
 #include <asm/mach_traps.h>
 
-int unknown_nmi_panic;
 int nmi_watchdog_enabled;
 
 /* For reliability, we're prepared to waste bits here. */
@@ -389,7 +388,7 @@ void touch_nmi_watchdog(void)
 EXPORT_SYMBOL(touch_nmi_watchdog);
 
 notrace __kprobes int
-nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
+nmi_watchdog_tick(struct pt_regs *regs)
 {
 	/*
 	 * Since current_thread_info()-> is always on the stack, and we
@@ -483,23 +482,6 @@ static void disable_ioapic_nmi_watchdog(
 	on_each_cpu(stop_apic_nmi_watchdog, NULL, 1);
 }
 
-static int __init setup_unknown_nmi_panic(char *str)
-{
-	unknown_nmi_panic = 1;
-	return 1;
-}
-__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
-
-static int unknown_nmi_panic_callback(struct pt_regs *regs, int cpu)
-{
-	unsigned char reason = get_nmi_reason();
-	char buf[64];
-
-	sprintf(buf, "NMI received for unknown reason %02x\n", reason);
-	die_nmi(buf, regs, 1); /* Always panic here */
-	return 0;
-}
-
 /*
  * proc handler for /proc/sys/kernel/nmi
  */
@@ -540,15 +522,6 @@ int proc_nmi_enabled(struct ctl_table *t
 
 #endif /* CONFIG_SYSCTL */
 
-int do_nmi_callback(struct pt_regs *regs, int cpu)
-{
-#ifdef CONFIG_SYSCTL
-	if (unknown_nmi_panic)
-		return unknown_nmi_panic_callback(regs, cpu);
-#endif
-	return 0;
-}
-
 void arch_trigger_all_cpu_backtrace(void)
 {
 	int i;
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -84,6 +84,7 @@ EXPORT_SYMBOL_GPL(used_vectors);
 static int ignore_nmis;
 
 int unknown_nmi_as_hwerr;
+int unknown_nmi_panic;
 
 /*
  * Prevent NMI reason port (0x61) being accessed simultaneously, can
@@ -308,6 +309,13 @@ gp_in_kernel:
 	die("general protection fault", regs, error_code);
 }
 
+static int __init setup_unknown_nmi_panic(char *str)
+{
+	unknown_nmi_panic = 1;
+	return 1;
+}
+__setup("unknown_nmi_panic", setup_unknown_nmi_panic);
+
 static notrace __kprobes void
 pci_serr_error(unsigned char reason, struct pt_regs *regs)
 {
@@ -391,7 +399,7 @@ unknown_nmi_error(unsigned char reason,
 		 reason, smp_processor_id());
 
 	pr_emerg("Do you have a strange power saving mode enabled?\n");
-	if (panic_on_unrecovered_nmi)
+	if (unknown_nmi_panic || panic_on_unrecovered_nmi)
 		panic("NMI: Not continuing");
 
 	pr_emerg("Dazed and confused, but trying to continue\n");
@@ -442,12 +450,8 @@ unlock_return:
 	if (notify_die(DIE_NMI, "nmi", regs, 0, 2, SIGINT) == NOTIFY_STOP)
 		return;
 
-#if defined(CONFIG_X86_LOCAL_APIC) && !defined(CONFIG_LOCKUP_DETECTOR)
-	if (nmi_watchdog_tick(regs, reason))
-		return;
-	if (do_nmi_callback(regs, cpu))
+	if (nmi_watchdog_tick(regs))
 		return;
-#endif
 
 	unknown_nmi_error(reason, regs);
 }

^ permalink raw reply	[flat|nested] 44+ messages in thread

end of thread, other threads:[~2010-11-02 20:47 UTC | newest]

Thread overview: 44+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-10-09  6:49 [PATCH -v3 1/6] x86, NMI, Add NMI symbol constants and rename memory parity to PCI SERR Huang Ying
2010-10-09  6:49 ` [PATCH -v3 2/6] x86, NMI, Add touch_nmi_watchdog to io_check_error delay Huang Ying
2010-10-09  6:49 ` [PATCH -v3 3/6] x86, NMI, Rewrite NMI handler Huang Ying
2010-10-11 16:13   ` Peter Zijlstra
2010-10-11 20:35     ` Don Zickus
2010-10-12  0:50     ` Huang Ying
2010-10-12  6:04       ` Peter Zijlstra
2010-10-12  6:14         ` Huang Ying
2010-10-12  6:31           ` Peter Zijlstra
2010-10-12  6:37             ` Huang Ying
2010-10-12  6:40               ` Peter Zijlstra
2010-10-12  6:45                 ` Huang Ying
2010-10-12  6:49                   ` Peter Zijlstra
2010-10-12  6:54                     ` Huang Ying
2010-10-12 13:51                     ` Andi Kleen
2010-10-12 14:15                       ` Peter Zijlstra
2010-10-27 16:45                         ` Don Zickus
2010-10-27 17:08                           ` Peter Zijlstra
2010-10-27 18:07                             ` Don Zickus
2010-11-02 17:50                             ` Don Zickus
2010-11-02 18:16                               ` Huang Ying
2010-11-02 19:11                                 ` Don Zickus
2010-11-02 20:47                                 ` Don Zickus
2010-10-09  6:49 ` [PATCH -v3 4/6] Make NMI reason io port (0x61) can be processed on any CPU Huang Ying
2010-10-09  6:49 ` [PATCH -v3 5/6] x86, NMI, treat unknown NMI as hardware error Huang Ying
2010-10-10 14:07   ` Alan Cox
2010-10-10 14:13     ` Andi Kleen
2010-10-11 21:08       ` Don Zickus
2010-10-11 21:12         ` Don Zickus
2010-10-11 21:20   ` Don Zickus
2010-10-12  1:10     ` Huang Ying
2010-10-20  6:12     ` Huang Ying
2010-10-20 14:15       ` Don Zickus
2010-10-21  1:14         ` Huang Ying
2010-10-21  2:31           ` Don Zickus
2010-10-21  5:17             ` Huang Ying
2010-10-21 14:10               ` Don Zickus
2010-10-21 15:45                 ` Andi Kleen
2010-10-22  1:49                   ` Don Zickus
2010-10-22  2:05                     ` Huang Ying
2010-10-22  2:56                       ` Don Zickus
2010-10-22  5:23                         ` Huang Ying
2010-10-22  9:24                     ` Andi Kleen
2010-10-09  6:49 ` [PATCH -v3 6/6] x86, NMI, Remove do_nmi_callback logic Huang Ying

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox