public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] NMI/CMOS RTC race fix for x86-64
@ 2005-03-07 18:48 Corey Minyard
  2005-03-07 19:08 ` Andi Kleen
  0 siblings, 1 reply; 5+ messages in thread
From: Corey Minyard @ 2005-03-07 18:48 UTC (permalink / raw)
  To: Andrew Morton; +Cc: lkml

[-- Attachment #1: Type: text/plain, Size: 1 bytes --]



[-- Attachment #2: nmicmos_x86_64_race.diff --]
[-- Type: text/plain, Size: 5775 bytes --]

This patch fixes a race between the CMOS clock setting and the NMI
code.  The NMI code indiscriminatly sets index registers and values
in the same place the CMOS clock is set.  If you are setting the
CMOS clock and an NMI occurs, Bad values could be written to or
read from the CMOS RAM, or the NMI operation might not occur
correctly.

Fixing this requires creating a special lock so the NMI code can
know its CPU owns the lock an "do the right thing" in that case.

This was discovered and the fix has been tested by a very demanding
customer who tests the heck of out the software we deliver.

This is basically the same as the x86 patch at
http://marc.theaimsgroup.com/?l=linux-kernel&m=110634968908435&w=2
but for x86_64.

Signed-off-by: Corey Minyard <minyard@acm.org>

Index: linux-2.6.11-rc5-mm1/include/asm-x86_64/mc146818rtc.h
===================================================================
--- linux-2.6.11-rc5-mm1.orig/include/asm-x86_64/mc146818rtc.h
+++ linux-2.6.11-rc5-mm1/include/asm-x86_64/mc146818rtc.h
@@ -5,6 +5,8 @@
 #define _ASM_MC146818RTC_H
 
 #include <asm/io.h>
+#include <asm/system.h>
+#include <linux/mc146818rtc.h>
 
 #ifndef RTC_PORT
 #define RTC_PORT(x)	(0x70 + (x))
@@ -12,17 +14,71 @@
 #endif
 
 /*
+ * This lock provides nmi access to the CMOS/RTC registers.  It has some
+ * special properties.  It is owned by a CPU and stores the index register
+ * currently being accessed (if owned).  The idea here is that it works
+ * like a normal lock (normally).  However, in an NMI, the NMI code will
+ * first check to see if its CPU owns the lock, meaning that the NMI
+ * interrupted during the read/write of the device.  If it does, it goes ahead
+ * and performs the access and then restores the index register.  If it does
+ * not, it locks normally.
+ *
+ * Note that since we are working with NMIs, we need this lock even in
+ * a non-SMP machine just to mark that the lock is owned.
+ *
+ * This only works with compare-and-swap.  There is no other way to
+ * atomically claim the lock and set the owner.
+ */
+#include <linux/smp.h>
+extern volatile unsigned long cmos_lock;
+
+/*
+ * All of these below must be called with interrupts off, preempt
+ * disabled, etc.
+ */
+
+static inline void lock_cmos(unsigned char reg)
+{
+	unsigned long new;
+	new = ((smp_processor_id()+1) << 8) | reg;
+	for (;;) {
+		if (cmos_lock)
+			continue;
+		if (__cmpxchg(&cmos_lock, 0, new, sizeof(cmos_lock)) == 0)
+			return;
+	}
+}
+
+static inline void unlock_cmos(void)
+{
+	cmos_lock = 0;
+}
+static inline int do_i_have_lock_cmos(void)
+{
+	return (cmos_lock >> 8) == (smp_processor_id()+1);
+}
+static inline unsigned char current_lock_cmos_reg(void)
+{
+	return cmos_lock & 0xff;
+}
+#define lock_cmos_prefix(reg) \
+	do {					\
+		unsigned long cmos_flags;	\
+		local_irq_save(cmos_flags);	\
+		lock_cmos(reg)
+#define lock_cmos_suffix(reg) \
+		unlock_cmos();			\
+		local_irq_restore(cmos_flags);	\
+	} while (0)
+
+/*
  * The yet supported machines all access the RTC index register via
  * an ISA port access but the way to access the date register differs ...
  */
-#define CMOS_READ(addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-inb_p(RTC_PORT(1)); \
-})
-#define CMOS_WRITE(val, addr) ({ \
-outb_p((addr),RTC_PORT(0)); \
-outb_p((val),RTC_PORT(1)); \
-})
+#define CMOS_READ(addr) rtc_cmos_read(addr)
+#define CMOS_WRITE(val, addr) rtc_cmos_write(val, addr)
+unsigned char rtc_cmos_read(unsigned char addr);
+void rtc_cmos_write(unsigned char val, unsigned char addr);
 
 #define RTC_IRQ 8
 
Index: linux-2.6.11-rc5-mm1/arch/x86_64/kernel/time.c
===================================================================
--- linux-2.6.11-rc5-mm1.orig/arch/x86_64/kernel/time.c
+++ linux-2.6.11-rc5-mm1/arch/x86_64/kernel/time.c
@@ -50,6 +50,35 @@
 extern int using_apic_timer;
 
 DEFINE_SPINLOCK(rtc_lock);
+
+/*
+ * This is a special lock that is owned by the CPU and holds the index
+ * register we are working with.  It is required for NMI access to the
+ * CMOS/RTC registers.  See include/asm-i386/mc146818rtc.h for details.
+ */
+volatile unsigned long cmos_lock = 0;
+EXPORT_SYMBOL(cmos_lock);
+
+/* Routines for accessing the CMOS RAM/RTC. */
+unsigned char rtc_cmos_read(unsigned char addr)
+{
+	unsigned char val;
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	val = inb_p(RTC_PORT(1));
+	lock_cmos_suffix(addr);
+	return val;
+}
+EXPORT_SYMBOL(rtc_cmos_read);
+void rtc_cmos_write(unsigned char val, unsigned char addr)
+{
+	lock_cmos_prefix(addr);
+	outb_p(addr, RTC_PORT(0));
+	outb_p(val, RTC_PORT(1));
+	lock_cmos_suffix(addr);
+}
+EXPORT_SYMBOL(rtc_cmos_write);
+
 DEFINE_SPINLOCK(i8253_lock);
 
 static int nohpet __initdata = 0;
Index: linux-2.6.11-rc5-mm1/arch/x86_64/kernel/traps.c
===================================================================
--- linux-2.6.11-rc5-mm1.orig/arch/x86_64/kernel/traps.c
+++ linux-2.6.11-rc5-mm1/arch/x86_64/kernel/traps.c
@@ -38,6 +38,7 @@
 #include <asm/i387.h>
 #include <asm/kdebug.h>
 #include <asm/processor.h>
+#include <asm/mc146818rtc.h>
 
 #include <asm/smp.h>
 #include <asm/pgalloc.h>
@@ -589,6 +590,7 @@
 asmlinkage void default_do_nmi(struct pt_regs *regs)
 {
 	unsigned char reason = 0;
+	int old_reg = -1;
 
 	/* Only the BSP gets external NMIs from the system.  */
 	if (!smp_processor_id())
@@ -625,10 +627,18 @@
 	 * Reassert NMI in case it became active meanwhile
 	 * as it's edge-triggered.
 	 */
+	if (do_i_have_lock_cmos())
+		old_reg = current_lock_cmos_reg();
+	else
+		lock_cmos(0); /* register doesn't matter here */
 	outb(0x8f, 0x70);
 	inb(0x71);		/* dummy */
 	outb(0x0f, 0x70);
 	inb(0x71);		/* dummy */
+	if (old_reg >= 0)
+		outb(old_reg, 0x70);
+	else
+		unlock_cmos();
 }
 
 asmlinkage void do_int3(struct pt_regs * regs, long error_code)

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] NMI/CMOS RTC race fix for x86-64
  2005-03-07 18:48 [PATCH] NMI/CMOS RTC race fix for x86-64 Corey Minyard
@ 2005-03-07 19:08 ` Andi Kleen
  2005-03-07 20:50   ` Corey Minyard
  0 siblings, 1 reply; 5+ messages in thread
From: Andi Kleen @ 2005-03-07 19:08 UTC (permalink / raw)
  To: Corey Minyard; +Cc: lkml, akpm

Corey Minyard <minyard@acm.org> writes:

> This patch fixes a race between the CMOS clock setting and the NMI
> code.  The NMI code indiscriminatly sets index registers and values
> in the same place the CMOS clock is set.  If you are setting the
> CMOS clock and an NMI occurs, Bad values could be written to or
> read from the CMOS RAM, or the NMI operation might not occur
> correctly.
>

In general you should send all x86-64 patches to me. I would have
eventually merged it from i386 anyways if it was good.

But in this case it isnt. Instead of all this complexity 
just remove the NMI reassert code from the NMI handler.
It is oudated and mostly useless on modern systems anyways.

Since the NMI watchdog runs regularly even if an NMI is missed
it will be eventually handled. And even when it doesn't run
it doesn't matter much because NMI does nothing essential.

-Andi

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] NMI/CMOS RTC race fix for x86-64
  2005-03-07 19:08 ` Andi Kleen
@ 2005-03-07 20:50   ` Corey Minyard
  2005-03-07 21:53     ` Andi Kleen
  0 siblings, 1 reply; 5+ messages in thread
From: Corey Minyard @ 2005-03-07 20:50 UTC (permalink / raw)
  To: Andi Kleen; +Cc: lkml, akpm

Andi Kleen wrote:

>Corey Minyard <minyard@acm.org> writes:
>
>  
>
>>This patch fixes a race between the CMOS clock setting and the NMI
>>code.  The NMI code indiscriminatly sets index registers and values
>>in the same place the CMOS clock is set.  If you are setting the
>>CMOS clock and an NMI occurs, Bad values could be written to or
>>read from the CMOS RAM, or the NMI operation might not occur
>>correctly.
>>
>>    
>>
>
>In general you should send all x86-64 patches to me. I would have
>eventually merged it from i386 anyways if it was good.
>
>But in this case it isnt. Instead of all this complexity 
>just remove the NMI reassert code from the NMI handler.
>It is oudated and mostly useless on modern systems anyways.
>  
>
"mostly useless" and "completely useless" are two different things.

If you run with nmi_watchdog=0, then this code actually does something
useful, which is what I assume you mean by "mostly useless".  I'm all for
removing useless code, so I'd be fine with just removing the code.  But
something really needs to be done.

Do you want me to submit a patch that simply removes this?

-Corey

>Since the NMI watchdog runs regularly even if an NMI is missed
>it will be eventually handled. And even when it doesn't run
>it doesn't matter much because NMI does nothing essential.
>
>-Andi
>  
>


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] NMI/CMOS RTC race fix for x86-64
  2005-03-07 20:50   ` Corey Minyard
@ 2005-03-07 21:53     ` Andi Kleen
  2005-03-07 23:33       ` Corey Minyard
  0 siblings, 1 reply; 5+ messages in thread
From: Andi Kleen @ 2005-03-07 21:53 UTC (permalink / raw)
  To: Corey Minyard; +Cc: lkml, akpm

> >But in this case it isnt. Instead of all this complexity 
> >just remove the NMI reassert code from the NMI handler.
> >It is oudated and mostly useless on modern systems anyways.
> > 
> >
> "mostly useless" and "completely useless" are two different things.

It's completely useless (double checked the AMD8111 and ICH5 data sheets)

There is nothing that should ever clear the NMI bit in this register.
In fact the ICH5 datasheet even explicitely says software should
never touch this bit. 

It may have some meaning in ancient ISA chipsets, but that is 
of no concern on x86-64.

> Do you want me to submit a patch that simply removes this?

Yes, please.

-Andi

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH] NMI/CMOS RTC race fix for x86-64
  2005-03-07 21:53     ` Andi Kleen
@ 2005-03-07 23:33       ` Corey Minyard
  0 siblings, 0 replies; 5+ messages in thread
From: Corey Minyard @ 2005-03-07 23:33 UTC (permalink / raw)
  To: Andi Kleen; +Cc: lkml, akpm

[-- Attachment #1: Type: text/plain, Size: 784 bytes --]

Patch is attached.  Thanks.

-Corey

Andi Kleen wrote:

>>>But in this case it isnt. Instead of all this complexity 
>>>just remove the NMI reassert code from the NMI handler.
>>>It is oudated and mostly useless on modern systems anyways.
>>>
>>>
>>>      
>>>
>>"mostly useless" and "completely useless" are two different things.
>>    
>>
>
>It's completely useless (double checked the AMD8111 and ICH5 data sheets)
>
>There is nothing that should ever clear the NMI bit in this register.
>In fact the ICH5 datasheet even explicitely says software should
>never touch this bit. 
>
>It may have some meaning in ancient ISA chipsets, but that is 
>of no concern on x86-64.
>
>  
>
>>Do you want me to submit a patch that simply removes this?
>>    
>>
>
>Yes, please.
>
>-Andi
>  
>


[-- Attachment #2: nmicmos_x86_64_race.diff --]
[-- Type: text/plain, Size: 1132 bytes --]

This patch fixes a race between the CMOS clock setting and the NMI
code.  The NMI code indiscriminatly sets index registers and values
in the same place the CMOS clock is set.  If you are setting the
CMOS clock and an NMI occurs, Bad values could be written to or
read from the CMOS RAM, or the NMI operation might not occur
correctly.

Resetting the NMI is not required on x86_64 (in fact, it should
not be done according to the ICH5 documentation).  This patch
simply removes the useless code.

Signed-off-by: Corey Minyard <minyard@acm.org>

Index: linux-2.6.11-mm1/arch/x86_64/kernel/traps.c
===================================================================
--- linux-2.6.11-mm1.orig/arch/x86_64/kernel/traps.c
+++ linux-2.6.11-mm1/arch/x86_64/kernel/traps.c
@@ -620,15 +620,6 @@
 		mem_parity_error(reason, regs);
 	if (reason & 0x40)
 		io_check_error(reason, regs);
-
-	/*
-	 * Reassert NMI in case it became active meanwhile
-	 * as it's edge-triggered.
-	 */
-	outb(0x8f, 0x70);
-	inb(0x71);		/* dummy */
-	outb(0x0f, 0x70);
-	inb(0x71);		/* dummy */
 }
 
 asmlinkage void do_int3(struct pt_regs * regs, long error_code)

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2005-03-07 23:43 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-03-07 18:48 [PATCH] NMI/CMOS RTC race fix for x86-64 Corey Minyard
2005-03-07 19:08 ` Andi Kleen
2005-03-07 20:50   ` Corey Minyard
2005-03-07 21:53     ` Andi Kleen
2005-03-07 23:33       ` Corey Minyard

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox