* PATCH: Allow users to force a panic on NMI
@ 2005-10-21 13:21 Alan Cox
2005-10-21 13:06 ` Jesper Juhl
2005-10-21 16:16 ` PATCH: Allow users to force a panic on NMI - Header file Alan Cox
0 siblings, 2 replies; 3+ messages in thread
From: Alan Cox @ 2005-10-21 13:21 UTC (permalink / raw)
To: linux-kernel, akpm
The default Linux behaviour on an NMI of either memory or unknown is to
continue operation. For many environments such as scientific computing
it is preferable that the box is taken out and the error dealt with than
an uncorrected parity/ECC error get propogated.
A small number of systems do generate NMI's for bizarre random reasons
such as power management so the default is unchanged. In other respects
the new proc/sys entry works like the existing panic controls already in
that directory.
This is separate to the edac support - EDAC allows supported chipsets to
handle ECC errors well, this change allows unsupported cases to at least
panic rather than cause problems further down the line.
Signed-off-by: Alan Cox <alan@redhat.com>
diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla-2.6.14-rc4-mm1/include/linux/sysctl.h linux-2.6.14-rc4-mm1/include/linux/sysctl.h
--- linux.vanilla-2.6.14-rc4-mm1/include/linux/sysctl.h 2005-10-20 16:12:41.000000000 +0100
+++ linux-2.6.14-rc4-mm1/include/linux/sysctl.h 2005-10-20 17:31:08.000000000 +0100
@@ -146,6 +146,7 @@
KERN_RANDOMIZE=68, /* int: randomize virtual address space */
KERN_SETUID_DUMPABLE=69, /* int: behaviour of dumps for setuid core */
KERN_SPIN_RETRY=70, /* int: number of spinlock retries */
+ KERN_PANIC_ON_NMI=71, /* int: whether we will panic on an unrecovered NMI */
};
diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla-2.6.14-rc4-mm1/kernel/panic.c linux-2.6.14-rc4-mm1/kernel/panic.c
--- linux.vanilla-2.6.14-rc4-mm1/kernel/panic.c 2005-10-20 16:10:19.000000000 +0100
+++ linux-2.6.14-rc4-mm1/kernel/panic.c 2005-10-20 17:27:54.000000000 +0100
@@ -22,6 +22,7 @@
int panic_timeout;
int panic_on_oops;
+int panic_on_unrecovered_nmi;
int tainted;
EXPORT_SYMBOL(panic_timeout);
diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla-2.6.14-rc4-mm1/kernel/sysctl.c linux-2.6.14-rc4-mm1/kernel/sysctl.c
--- linux.vanilla-2.6.14-rc4-mm1/kernel/sysctl.c 2005-10-20 16:12:41.000000000 +0100
+++ linux-2.6.14-rc4-mm1/kernel/sysctl.c 2005-10-20 17:29:48.000000000 +0100
@@ -594,6 +594,14 @@
.proc_handler = &proc_dointvec,
},
{
+ .ctl_name = KERN_PANIC_ON_NMI,
+ .procname = "panic_on_unrecovered_nmi",
+ .data = &panic_on_unrecovered_nmi,
+ .maxlen = sizeof(int),
+ .mode = 0644,
+ .proc_handler = &proc_dointvec,
+ },
+ {
.ctl_name = KERN_PRINTK_RATELIMIT,
.procname = "printk_ratelimit",
.data = &printk_ratelimit_jiffies,
diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla-2.6.14-rc4-mm1/arch/i386/kernel/traps.c linux-2.6.14-rc4-mm1/arch/i386/kernel/traps.c
--- linux.vanilla-2.6.14-rc4-mm1/arch/i386/kernel/traps.c 2005-10-20 16:12:39.000000000 +0100
+++ linux-2.6.14-rc4-mm1/arch/i386/kernel/traps.c 2005-10-20 17:28:10.000000000 +0100
@@ -576,6 +576,9 @@
{
printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
printk("You probably have a hardware problem with your RAM chips\n");
+
+ if(panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
/* Clear and disable the memory parity error line. */
clear_mem_error(reason);
@@ -611,6 +614,9 @@
reason, smp_processor_id());
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
+
+ if(panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
}
static DEFINE_SPINLOCK(nmi_print_lock);
diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla-2.6.14-rc4-mm1/arch/x86_64/kernel/traps.c linux-2.6.14-rc4-mm1/arch/x86_64/kernel/traps.c
--- linux.vanilla-2.6.14-rc4-mm1/arch/x86_64/kernel/traps.c 2005-10-20 16:12:39.000000000 +0100
+++ linux-2.6.14-rc4-mm1/arch/x86_64/kernel/traps.c 2005-10-20 17:29:03.000000000 +0100
@@ -563,6 +563,9 @@
printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
printk("You probably have a hardware problem with your RAM chips\n");
+ if(panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
+
/* Clear and disable the memory parity error line. */
reason = (reason & 0xf) | 4;
outb(reason, 0x61);
@@ -585,6 +588,9 @@
{ printk("Uhhuh. NMI received for unknown reason %02x.\n", reason);
printk("Dazed and confused, but trying to continue\n");
printk("Do you have a strange power saving mode enabled?\n");
+
+ if(panic_on_unrecovered_nmi)
+ panic("NMI: Not continuing");
}
/* Runs on IST stack. This code must keep interrupts off all the time.
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: PATCH: Allow users to force a panic on NMI
2005-10-21 13:21 PATCH: Allow users to force a panic on NMI Alan Cox
@ 2005-10-21 13:06 ` Jesper Juhl
2005-10-21 16:16 ` PATCH: Allow users to force a panic on NMI - Header file Alan Cox
1 sibling, 0 replies; 3+ messages in thread
From: Jesper Juhl @ 2005-10-21 13:06 UTC (permalink / raw)
To: Alan Cox; +Cc: linux-kernel, akpm
On 10/21/05, Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> The default Linux behaviour on an NMI of either memory or unknown is to
> continue operation. For many environments such as scientific computing
> it is preferable that the box is taken out and the error dealt with than
> an uncorrected parity/ECC error get propogated.
>
[snip]
> {
> printk("Uhhuh. NMI received. Dazed and confused, but trying to continue\n");
> printk("You probably have a hardware problem with your RAM chips\n");
> +
> + if(panic_on_unrecovered_nmi)
> + panic("NMI: Not continuing");
>
How about something like this instead?
printk(KERN_WARNING "Uhhuh. NMI received. Dazed and confused\n");
printk(KERN_WARNING "You probably have a hardware problem with your
RAM chips\n");
if (panic_on_unrecovered_nmi)
panic("NMI: panic_on_unrecovered_nmi enabled - Not continuing");
else
printk(KERN_WARNING "NMI: panic_on_unrecovered_nmi disabled -
continuing\n");
First of all then it won't start out by saying that it's going to
continue, only to panic a few lines down.
Secondly it shows clearly to anyone reading the messages that there's
a control available for changing the behaviour, and that person can
then go look up how that's done.
Just a suggestion...
--
Jesper Juhl <jesper.juhl@gmail.com>
Don't top-post http://www.catb.org/~esr/jargon/html/T/top-post.html
Plain text mails only, please http://www.expita.com/nomime.html
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: PATCH: Allow users to force a panic on NMI - Header file
2005-10-21 13:21 PATCH: Allow users to force a panic on NMI Alan Cox
2005-10-21 13:06 ` Jesper Juhl
@ 2005-10-21 16:16 ` Alan Cox
1 sibling, 0 replies; 3+ messages in thread
From: Alan Cox @ 2005-10-21 16:16 UTC (permalink / raw)
To: linux-kernel; +Cc: akpm
Forgot the header file in that one.
Signed-off-by: Alan Cox <alan@redhat.com>
diff -u --new-file --recursive --exclude-from /usr/src/exclude linux.vanilla-2.6.14-rc4-mm1/include/linux/kernel.h linux-2.6.14-rc4-mm1/include/linux/kernel.h
--- linux.vanilla-2.6.14-rc4-mm1/include/linux/kernel.h 2005-10-20 16:12:41.000000000 +0100
+++ linux-2.6.14-rc4-mm1/include/linux/kernel.h 2005-10-20 17:30:10.000000000 +0100
@@ -170,6 +170,7 @@
extern int oops_in_progress; /* If set, an oops, panic(), BUG() or die() is in progress */
extern __deprecated_for_modules int panic_timeout;
extern int panic_on_oops;
+extern int panic_on_unrecovered_nmi;
extern int tainted;
extern const char *print_tainted(void);
extern void add_taint(unsigned);
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2005-10-21 15:48 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-10-21 13:21 PATCH: Allow users to force a panic on NMI Alan Cox
2005-10-21 13:06 ` Jesper Juhl
2005-10-21 16:16 ` PATCH: Allow users to force a panic on NMI - Header file Alan Cox
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox