* [PATCH] NMI request/release
@ 2002-10-22 1:32 Corey Minyard
2002-10-22 2:10 ` John Levon
0 siblings, 1 reply; 43+ messages in thread
From: Corey Minyard @ 2002-10-22 1:32 UTC (permalink / raw)
To: linux-kernel
[-- Attachment #1: Type: text/plain, Size: 518 bytes --]
The attached patch implements a way to request to receive an NMI if it
comes from an otherwise unknown source. I needed this for handling NMIs
with the IPMI watchdog. This function was discussed a little a while
back on the mailing list, but nothing was done about it, so I
implemented a request/release for NMIs. The code is in
arch/i386/kernel/traps.c, but it's pretty generic and could possibly
live in kernel. I'm not sure, though.
This is relative to 2.5.44. I have a 2.4 version of it, too.
-Corey
[-- Attachment #2: linux-nmi.diff --]
[-- Type: text/plain, Size: 4761 bytes --]
diff -urN linux.orig/arch/i386/kernel/i386_ksyms.c linux/arch/i386/kernel/i386_ksyms.c
--- linux.orig/arch/i386/kernel/i386_ksyms.c Mon Oct 21 13:25:58 2002
+++ linux/arch/i386/kernel/i386_ksyms.c Mon Oct 21 20:04:35 2002
@@ -14,6 +14,7 @@
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/tty.h>
+#include <linux/nmi.h>
#include <linux/highmem.h>
#include <asm/semaphore.h>
@@ -89,6 +90,9 @@
EXPORT_SYMBOL(get_cmos_time);
EXPORT_SYMBOL(cpu_khz);
EXPORT_SYMBOL(apm_info);
+
+EXPORT_SYMBOL(request_nmi);
+EXPORT_SYMBOL(release_nmi);
#ifdef CONFIG_DEBUG_IOVIRT
EXPORT_SYMBOL(__io_virt_debug);
diff -urN linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c
--- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002
+++ linux/arch/i386/kernel/traps.c Mon Oct 21 20:06:43 2002
@@ -485,6 +485,95 @@
printk("Do you have a strange power saving mode enabled?\n");
}
+/* A list of handlers for NMIs. This list will be called in order
+ when an NMI from an otherwise unidentifiable source somes in. If
+ one of these handles the NMI, it should return 1. NMI handlers
+ cannot claim spinlocks, so we have to handle mutex in a different
+ manner. A spinlock protects the list from multiple writers. When
+ something is removed from the list, it will check to see that
+ calling_nmi_handlers is 0 before returning. If it is not zero,
+ another processor is handling an NMI, and it should wait until it
+ goes to zero to return and allow the user to free that data. */
+static volatile struct nmi_handler *nmi_handler_list = NULL;
+static spinlock_t nmi_handler_lock = SPIN_LOCK_UNLOCKED;
+static atomic_t calling_nmi_handlers = ATOMIC_INIT(0);
+
+int request_nmi(struct nmi_handler *handler)
+{
+ volatile struct nmi_handler *curr;
+
+ spin_lock(&nmi_handler_lock);
+
+ /* Make sure the thing is not already in the list. */
+ curr = nmi_handler_list;
+ while (curr) {
+ if (curr == handler) {
+ break;
+ }
+ curr = curr->next;
+ }
+ if (curr)
+ return EBUSY;
+
+ handler->next = nmi_handler_list;
+ xchg(&nmi_handler_list, handler);
+
+ spin_unlock(&nmi_handler_lock);
+ return 0;
+}
+
+void release_nmi(struct nmi_handler *handler)
+{
+ volatile struct nmi_handler *curr, *prev;
+
+ spin_lock(&nmi_handler_lock);
+
+ /* Find it in the list. */
+ curr = nmi_handler_list;
+ prev = NULL;
+ while (curr) {
+ if (curr == handler) {
+ break;
+ }
+ prev = curr;
+ curr = curr->next;
+ }
+
+ if (curr) {
+ /* If it was found, remove it from the list. We
+ assume the write operation here is atomic. */
+ if (prev)
+ xchg(&(prev->next), curr->next);
+ else
+ xchg(&nmi_handler_list, curr->next);
+
+ /* If some other part of the kernel is handling an
+ NMI, we make sure that we don't release the handler
+ (or really, allow the user to release the handler)
+ until it has finished handling the NMI. */
+ while (atomic_read(&calling_nmi_handlers))
+ ;
+ }
+ spin_unlock(&nmi_handler_lock);
+}
+
+static int call_nmi_handlers(struct pt_regs * regs)
+{
+ volatile struct nmi_handler *curr;
+ int handled = 0;
+
+ atomic_inc(&calling_nmi_handlers);
+ smp_mb();
+ curr = nmi_handler_list;
+ while (curr) {
+ handled |= curr->handler(curr->dev_id, regs);
+ curr = curr->next;
+ }
+ smp_mb();
+ atomic_dec(&calling_nmi_handlers);
+ return handled;
+}
+
static void default_do_nmi(struct pt_regs * regs)
{
unsigned char reason = inb(0x61);
@@ -500,6 +589,12 @@
return;
}
#endif
+
+ /* Check our handler list to see if anyone can handle this
+ nmi. */
+ if (call_nmi_handlers(regs))
+ return;
+
unknown_nmi_error(reason, regs);
return;
}
diff -urN linux.orig/include/asm-i386/irq.h linux/include/asm-i386/irq.h
--- linux.orig/include/asm-i386/irq.h Mon Oct 21 13:24:41 2002
+++ linux/include/asm-i386/irq.h Mon Oct 21 20:03:52 2002
@@ -28,4 +28,16 @@
#define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */
#endif
+
+/* Structure for handling NMIs */
+#define HAVE_NMI_HANDLER 1
+struct nmi_handler
+{
+ char *dev_name;
+ void *dev_id;
+ int (*handler)(void *dev_id, struct pt_regs *regs);
+
+ volatile struct nmi_handler *next;
+};
+
#endif /* _ASM_IRQ_H */
diff -urN linux.orig/include/linux/nmi.h linux/include/linux/nmi.h
--- linux.orig/include/linux/nmi.h Thu Jun 20 17:53:40 2002
+++ linux/include/linux/nmi.h Mon Oct 21 20:03:53 2002
@@ -19,4 +19,11 @@
# define touch_nmi_watchdog() do { } while(0)
#endif
+/**
+ * Register a handler to get called when an NMI occurs. If the handler
+ * actually handles the NMI, it should return 1.
+ */
+int request_nmi(struct nmi_handler *handler);
+void release_nmi(struct nmi_handler *handler);
+
#endif
^ permalink raw reply [flat|nested] 43+ messages in thread* Re: [PATCH] NMI request/release 2002-10-22 1:32 [PATCH] NMI request/release Corey Minyard @ 2002-10-22 2:10 ` John Levon 2002-10-22 2:32 ` Corey Minyard 2002-10-22 12:23 ` [PATCH] NMI request/release Suparna Bhattacharya 0 siblings, 2 replies; 43+ messages in thread From: John Levon @ 2002-10-22 2:10 UTC (permalink / raw) To: linux-kernel; +Cc: cminyard On Mon, Oct 21, 2002 at 08:32:47PM -0500, Corey Minyard wrote: > The attached patch implements a way to request to receive an NMI if it > comes from an otherwise unknown source. I needed this for handling NMIs > with the IPMI watchdog. This function was discussed a little a while Then NMI watchdog and oprofile should be changed to use this too. We also need priority and/or equivalent of NOTIFY_STOP_MASK so we can break out of calling all the handlers. Actually, why do you continue if one of the handlers returns 1 anyway ? > + atomic_inc(&calling_nmi_handlers); Isn't this going to cause cacheline ping pong ? > + curr = nmi_handler_list; > + while (curr) { > + handled |= curr->handler(curr->dev_id, regs); dev_name is never used at all. What is it for ? Also, would be nice to do an smp_processor_id() just once and pass that in to prevent multiple calls to get_current(). Couldn't you modify the notifier code to do the xchg()s (though that's not available on all CPU types ...) > +#define HAVE_NMI_HANDLER 1 What uses this ? > + volatile struct nmi_handler *next; Hmm ... Is it not possible to use linux/rcupdate.h for this stuff ? regards john -- "Lots of companies would love to be in our hole." - Scott McNealy ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 2:10 ` John Levon @ 2002-10-22 2:32 ` Corey Minyard 2002-10-22 2:53 ` John Levon 2002-10-22 12:23 ` [PATCH] NMI request/release Suparna Bhattacharya 1 sibling, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-22 2:32 UTC (permalink / raw) To: John Levon; +Cc: linux-kernel John Levon wrote: >On Mon, Oct 21, 2002 at 08:32:47PM -0500, Corey Minyard wrote: > >>The attached patch implements a way to request to receive an NMI if it >>comes from an otherwise unknown source. I needed this for handling NMIs >>with the IPMI watchdog. This function was discussed a little a while >> >> >Then NMI watchdog and oprofile should be changed to use this too. > Maybe so. If we get the infrastructure into place, we can change that afterwards. > We >also need priority and/or equivalent of NOTIFY_STOP_MASK so we can break >out of calling all the handlers. Actually, why do you continue if one >of the handlers returns 1 anyway ? > What if there's an NMI from multiple things? Not that it's likely, but it's possible, right? I could easily add priority and sort the list by it, and add a NOTIFY_STOP_MASK, if there is some benefit. >>+ atomic_inc(&calling_nmi_handlers); >> >> >Isn't this going to cause cacheline ping pong ? > This is an NMI, does it really matter? And is there another way to do this without locks? >>+ curr = nmi_handler_list; >>+ while (curr) { >>+ handled |= curr->handler(curr->dev_id, regs); >> >> >dev_name is never used at all. What is it for ? Also, would be nice >to do an smp_processor_id() just once and pass that in to prevent >multiple calls to get_current(). > dev_name could be removed, although it would be nice for reporting later. Basically, for the same reason it's there for interrupts. And again, this is an NMI, I don't think performance really matters (unless we perhaps add the NMI watchdog to this). >Couldn't you modify the notifier code to do the xchg()s (though that's >not available on all CPU types ...) > I don't understand. The xchg()s are for atomicity between the request/release code and the NMI handler. How could the notifier code do it? >>+#define HAVE_NMI_HANDLER 1 >> >> >What uses this ? > This is so the user code can know if it's available or not. >>+ volatile struct nmi_handler *next; >> >> >Hmm ... > >Is it not possible to use linux/rcupdate.h for this stuff ? > I'm not sure. It looks possible, but remember, this is an NMI, normal rules may not apply. Particularly, you cannot block or spin waiting for something else, the NMI code has to run. An NMI can happen at ANY time. If the rcu code can handle this, I could use it, but I have not looked to see if it can. Thanks, -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 2:32 ` Corey Minyard @ 2002-10-22 2:53 ` John Levon 2002-10-22 13:02 ` Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: John Levon @ 2002-10-22 2:53 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel On Mon, Oct 21, 2002 at 09:32:07PM -0500, Corey Minyard wrote: > This is an NMI, does it really matter? Yes. Both for oprofile and the NMI watchdog (which was firing awfully often last time I checked). The handler needs to be as streamlined as possible. > dev_name could be removed, although it would be nice for reporting > later. Reporting what ? from where ? > >Couldn't you modify the notifier code to do the xchg()s (though that's > >not available on all CPU types ...) > > > I don't understand. The xchg()s are for atomicity between the > request/release code and the NMI handler. How could the notifier code > do it? You are using the xchg()s in an attempt to thread onto/off the list safely no ? > >>+#define HAVE_NMI_HANDLER 1 > This is so the user code can know if it's available or not. If we had that for every API or API change, the kernel would be mostly HAVE_*. It's either available or it's not. If you're maintaining an external module, then autoconf or similar is the proper way to check for its existence. > >Is it not possible to use linux/rcupdate.h for this stuff ? > > I'm not sure. It looks possible, but remember, this is an NMI, normal > rules may not apply. Particularly, you cannot block or spin waiting for > something else, the NMI code has to run. An NMI can happen at ANY time. Believe me, I know :) > If the rcu code can handle this, I could use it, but I have not looked > to see if it can. If it's possible (and I have no idea, not having looked at RCU at all) it seems the right way. regards john -- "Lots of companies would love to be in our hole." - Scott McNealy ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 2:53 ` John Levon @ 2002-10-22 13:02 ` Corey Minyard 2002-10-22 15:09 ` John Levon ` (2 more replies) 0 siblings, 3 replies; 43+ messages in thread From: Corey Minyard @ 2002-10-22 13:02 UTC (permalink / raw) To: John Levon; +Cc: linux-kernel John Levon wrote: >On Mon, Oct 21, 2002 at 09:32:07PM -0500, Corey Minyard wrote: > >>This is an NMI, does it really matter? >> >> >Yes. Both for oprofile and the NMI watchdog (which was firing awfully >often last time I checked). The handler needs to be as streamlined as >possible. > Ok. I'd be inclined to leave the high-usage things where they are, although it would be nice to be able to make the NMI watchdog a module. oprofile should probably stay where it is. Do you have an alternate implementation that would be more efficient? >>dev_name could be removed, although it would be nice for reporting >>later. >> >> >Reporting what ? from where ? > Registered NMI users in procfs. >>>Couldn't you modify the notifier code to do the xchg()s (though that's >>>not available on all CPU types ...) >>> >>I don't understand. The xchg()s are for atomicity between the >>request/release code and the NMI handler. How could the notifier code >>do it? >> >> >You are using the xchg()s in an attempt to thread onto/off the list >safely no ? > Yes. But I don't understand why they would be used in the notifier code. >>>>+#define HAVE_NMI_HANDLER 1 >>>> >>>> >>This is so the user code can know if it's available or not. >> >> >If we had that for every API or API change, the kernel would be mostly >HAVE_*. It's either available or it's not. If you're maintaining an >external module, then autoconf or similar is the proper way to check for >its existence. > I'm not worried about kernel versions so much as processor capability. Some processors may not have NMIs, or may not be capable of doing this. A few of these exist (like __HAVE_ARCH_CMPXCHG). The name's probably bad, maybe it should be "__HAVE_ARCH_NMI_HANDLER"? >>If the rcu code can handle this, I could use it, but I have not looked >>to see if it can. >> >> >If it's possible (and I have no idea, not having looked at RCU at all) >it seems the right way. > I looked, and the rcu code relys on turning off interrupts to avoid preemption. So it won't work. Thanks again, -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 13:02 ` Corey Minyard @ 2002-10-22 15:09 ` John Levon 2002-10-22 16:03 ` Corey Minyard 2002-10-22 17:23 ` Robert Love 2002-10-22 17:53 ` Dipankar Sarma 2 siblings, 1 reply; 43+ messages in thread From: John Levon @ 2002-10-22 15:09 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel On Tue, Oct 22, 2002 at 08:02:11AM -0500, Corey Minyard wrote: > Ok. I'd be inclined to leave the high-usage things where they are, > although it would be nice to be able to make the NMI watchdog a module. > oprofile should probably stay where it is. Do you have an alternate > implementation that would be more efficient? I'm beginning to think you're right. You should ask Keith Owens if kdb etc. can use your API successfully. > >>dev_name could be removed, although it would be nice for reporting > >> > >Reporting what ? from where ? > > > Registered NMI users in procfs. Then if you add such code, you can add dev_name ... I hate code that does nothing ... > Yes. But I don't understand why they would be used in the notifier code. I'm trying to reduce code duplication - you do basically the same thing notifier register/unregister does. btw, the stuff you add to header files should all be in asm-i386/nmi.h IMHO. It would make it clear that there's a fast-path "set nmi handler" and the slow one, and you can document the difference there, if that's what we're going to do. regards john -- "Lots of companies would love to be in our hole." - Scott McNealy ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 15:09 ` John Levon @ 2002-10-22 16:03 ` Corey Minyard 0 siblings, 0 replies; 43+ messages in thread From: Corey Minyard @ 2002-10-22 16:03 UTC (permalink / raw) To: John Levon; +Cc: Corey Minyard, linux-kernel John Levon wrote: >On Tue, Oct 22, 2002 at 08:02:11AM -0500, Corey Minyard wrote: > >>Ok. I'd be inclined to leave the high-usage things where they are, >>although it would be nice to be able to make the NMI watchdog a module. >>oprofile should probably stay where it is. Do you have an alternate >>implementation that would be more efficient? >> >> >I'm beginning to think you're right. You should ask Keith Owens if kdb >etc. can use your API successfully. > Ok. Good thought, that would decouple kdb a little. >>>>dev_name could be removed, although it would be nice for reporting >>>> >>>Reporting what ? from where ? >>> >>Registered NMI users in procfs. >> >> >Then if you add such code, you can add dev_name ... I hate code that >does nothing ... > Ok, I'll add a procfs interface then :-). IMHO, there's a different between stuff in an interface that is looking forward and dead code, though. If I added it later, I would break all the users. But there is a balance. >>Yes. But I don't understand why they would be used in the notifier code. >> >> >I'm trying to reduce code duplication - you do basically the same thing >notifier register/unregister does. > Ah. Yes, there is some stuff that looks the same but is subtly different. I'll see what I can do. >btw, the stuff you add to header files should all be in asm-i386/nmi.h >IMHO. > Ok, I agree. >It would make it clear that there's a fast-path "set nmi handler" and >the slow one, and you can document the difference there, if that's what >we're going to do. > >regards >john > > > Thanks, -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 13:02 ` Corey Minyard 2002-10-22 15:09 ` John Levon @ 2002-10-22 17:23 ` Robert Love 2002-10-22 18:08 ` Corey Minyard 2002-10-22 17:53 ` Dipankar Sarma 2 siblings, 1 reply; 43+ messages in thread From: Robert Love @ 2002-10-22 17:23 UTC (permalink / raw) To: Corey Minyard; +Cc: John Levon, linux-kernel On Tue, 2002-10-22 at 09:02, Corey Minyard wrote: > I looked, and the rcu code relys on turning off interrupts to avoid > preemption. So it won't work. At least on the variant of RCU that is in 2.5, the RCU code does the read side by disabling preemption. Nothing else. The write side is the same with or without preemption - wait until all readers are quiescent, change the copy, etc. But anyhow, disabling interrupts should not affect NMIs, no? Robert Love ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 17:23 ` Robert Love @ 2002-10-22 18:08 ` Corey Minyard 2002-10-22 18:16 ` Robert Love 2002-10-22 20:04 ` Dipankar Sarma 0 siblings, 2 replies; 43+ messages in thread From: Corey Minyard @ 2002-10-22 18:08 UTC (permalink / raw) To: Robert Love; +Cc: John Levon, linux-kernel Robert Love wrote: >On Tue, 2002-10-22 at 09:02, Corey Minyard wrote: > > > >>I looked, and the rcu code relys on turning off interrupts to avoid >>preemption. So it won't work. >> >> > >At least on the variant of RCU that is in 2.5, the RCU code does the >read side by disabling preemption. Nothing else. > In 2.5.44, stock from kernel.org, rcu_process_callbacks() calls local_irq_disable(). Is that just preemption disabling, now? >The write side is the same with or without preemption - wait until all >readers are quiescent, change the copy, etc. > >But anyhow, disabling interrupts should not affect NMIs, no? > You are correct. disabling preemption or interrupts has no effect on NMIs. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 18:08 ` Corey Minyard @ 2002-10-22 18:16 ` Robert Love 2002-10-22 20:04 ` Dipankar Sarma 1 sibling, 0 replies; 43+ messages in thread From: Robert Love @ 2002-10-22 18:16 UTC (permalink / raw) To: Corey Minyard; +Cc: John Levon, linux-kernel On Tue, 2002-10-22 at 14:08, Corey Minyard wrote: > In 2.5.44, stock from kernel.org, rcu_process_callbacks() calls > local_irq_disable(). Is that just preemption disabling, now? No, but rcu_process_callbacks() is for the copy update part. Look at rcu_read_lock() and rcu_read_unlock()... those are the read paths. Of course, I can be very wrong about some of this, RCU grosses^Wconfuses me. But the read paths do just seem to call rcu_read_lock/unlock which just do a preempt_disable/enable. Otherwise the read path is entirely transparent. Robert Love ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 18:08 ` Corey Minyard 2002-10-22 18:16 ` Robert Love @ 2002-10-22 20:04 ` Dipankar Sarma 1 sibling, 0 replies; 43+ messages in thread From: Dipankar Sarma @ 2002-10-22 20:04 UTC (permalink / raw) To: Corey Minyard; +Cc: Robert Love, linux-kernel On Tue, Oct 22, 2002 at 08:30:16PM +0200, Corey Minyard wrote: > Robert Love wrote: > >At least on the variant of RCU that is in 2.5, the RCU code does the > >read side by disabling preemption. Nothing else. > > > In 2.5.44, stock from kernel.org, rcu_process_callbacks() calls > local_irq_disable(). Is that just preemption disabling, now? No, that is to allow queueing of callbacks from irq context - see call_rcu(). Since the queues are per-CPU, we don't need any spinlock. rcu_process_callbacks() is always invoked from the RCU per-CPU tasklet, so preemption doesn't come into picture. But irq disabling is needed so that it doesn't race with call_rcu(). Only preemption related issue with RCU is that in the reader side (in your case traversing the nmi handler list for invocation), there should not be any preemption (not possible anyway in your case). This is achieved by rcu_read_lock()/rcu_read_unlock() which essentially disables/enables preemption. The idea is that if you get preempted while holding reference to some RCU protected data, it is not safe to invoke the RCU callback. Once you get preempted, you can run on a different CPU and keeping track of the preempted tasks become difficult. Besides preempted tasks with low priority can delay RCU update for long periods. Hence the disabling of preemption which is not worse than locks. > >But anyhow, disabling interrupts should not affect NMIs, no? > > > You are correct. disabling preemption or interrupts has no effect on NMIs. Yes. Thanks Dipankar ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 13:02 ` Corey Minyard 2002-10-22 15:09 ` John Levon 2002-10-22 17:23 ` Robert Love @ 2002-10-22 17:53 ` Dipankar Sarma 2002-10-22 18:05 ` Corey Minyard 2 siblings, 1 reply; 43+ messages in thread From: Dipankar Sarma @ 2002-10-22 17:53 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel, levon On Tue, Oct 22, 2002 at 03:10:06PM +0200, Corey Minyard wrote: > >If it's possible (and I have no idea, not having looked at RCU at all) > >it seems the right way. > > > I looked, and the rcu code relys on turning off interrupts to avoid > preemption. So it won't work. > Hmm.. Let me see - You need to walk the list in call_nmi_handlers from nmi interrupt handler where preemption is not an issue anyway. Using RCU you can possibly do a safe walking of the nmi handlers. To do this, your update side code (request/release nmi) will still have to be serialized (spinlock), but you should not need to wait for completion of any other CPU executing the nmi handler, instead provide wrappers for nmi_handler allocation/free and there free the nmi_handler using an RCU callback (call_rcu()). The nmi_handler will not be freed until all the CPUs have done a contex switch or executed user-level or been idle. This will gurantee that *this* nmi_handler is not in execution and can safely be freed. This of course is a very simplistic view of the things, there could be complications that I may have overlooked. But I would be happy to help out on this if you want. Thanks Dipankar ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 17:53 ` Dipankar Sarma @ 2002-10-22 18:05 ` Corey Minyard 2002-10-22 18:08 ` Dipankar Sarma 0 siblings, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-22 18:05 UTC (permalink / raw) To: dipankar; +Cc: linux-kernel, levon Dipankar Sarma wrote: >On Tue, Oct 22, 2002 at 03:10:06PM +0200, Corey Minyard wrote: > > >>>If it's possible (and I have no idea, not having looked at RCU at all) >>>it seems the right way. >>> >>> >>> >>I looked, and the rcu code relys on turning off interrupts to avoid >>preemption. So it won't work. >> >> >> > >Hmm.. Let me see - > >You need to walk the list in call_nmi_handlers from nmi interrupt handler where >preemption is not an issue anyway. Using RCU you can possibly do a safe >walking of the nmi handlers. To do this, your update side code >(request/release nmi) will still have to be serialized (spinlock), but >you should not need to wait for completion of any other CPU executing >the nmi handler, instead provide wrappers for nmi_handler >allocation/free and there free the nmi_handler using an RCU callback >(call_rcu()). The nmi_handler will not be freed until all the CPUs >have done a contex switch or executed user-level or been idle. >This will gurantee that *this* nmi_handler is not in execution >and can safely be freed. > >This of course is a very simplistic view of the things, there could >be complications that I may have overlooked. But I would be happy >to help out on this if you want. > This doesn't sound any simpler than what I am doing right now. In fact, it sounds more complex. Am I correct? What I am doing is pretty simple and correct. Maybe more complexity would be required if you couldn't atomically update a pointer, but I think simplicity should win here. Thanks, -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 18:05 ` Corey Minyard @ 2002-10-22 18:08 ` Dipankar Sarma 2002-10-22 18:29 ` Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: Dipankar Sarma @ 2002-10-22 18:08 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel, levon On Tue, Oct 22, 2002 at 01:05:57PM -0500, Corey Minyard wrote: > >You need to walk the list in call_nmi_handlers from nmi interrupt handler where > >preemption is not an issue anyway. Using RCU you can possibly do a safe > >walking of the nmi handlers. To do this, your update side code > >(request/release nmi) will still have to be serialized (spinlock), but > >you should not need to wait for completion of any other CPU executing > >the nmi handler, instead provide wrappers for nmi_handler > >allocation/free and there free the nmi_handler using an RCU callback > >(call_rcu()). The nmi_handler will not be freed until all the CPUs > >have done a contex switch or executed user-level or been idle. > >This will gurantee that *this* nmi_handler is not in execution > >and can safely be freed. > > > >This of course is a very simplistic view of the things, there could > >be complications that I may have overlooked. But I would be happy > >to help out on this if you want. > > > This doesn't sound any simpler than what I am doing right now. In fact, > it sounds more complex. Am I correct? What I am doing is pretty simple > and correct. Maybe more complexity would be required if you couldn't > atomically update a pointer, but I think simplicity should win here. I would vote for simplicity and would normally agree with you here. But it seems to me that using RCU, you can avoid atmic operations and cache line bouncing of calling_nmi_handlers in the fast path (nmi interrupt handler). One could argue whether it is really a fast path or not, but if you are using it for profiling, I would say it is. No ? Thanks Dipankar ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 18:08 ` Dipankar Sarma @ 2002-10-22 18:29 ` Corey Minyard 2002-10-22 19:08 ` John Levon 0 siblings, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-22 18:29 UTC (permalink / raw) To: dipankar; +Cc: linux-kernel, levon Dipankar Sarma wrote: >On Tue, Oct 22, 2002 at 01:05:57PM -0500, Corey Minyard wrote: > > >>>You need to walk the list in call_nmi_handlers from nmi interrupt handler where >>>preemption is not an issue anyway. Using RCU you can possibly do a safe >>>walking of the nmi handlers. To do this, your update side code >>>(request/release nmi) will still have to be serialized (spinlock), but >>>you should not need to wait for completion of any other CPU executing >>>the nmi handler, instead provide wrappers for nmi_handler >>>allocation/free and there free the nmi_handler using an RCU callback >>>(call_rcu()). The nmi_handler will not be freed until all the CPUs >>>have done a contex switch or executed user-level or been idle. >>>This will gurantee that *this* nmi_handler is not in execution >>>and can safely be freed. >>> >>>This of course is a very simplistic view of the things, there could >>>be complications that I may have overlooked. But I would be happy >>>to help out on this if you want. >>> >>> >>> >>This doesn't sound any simpler than what I am doing right now. In fact, >>it sounds more complex. Am I correct? What I am doing is pretty simple >>and correct. Maybe more complexity would be required if you couldn't >>atomically update a pointer, but I think simplicity should win here. >> >> > >I would vote for simplicity and would normally agree with you here. But >it seems to me that using RCU, you can avoid atmic operations >and cache line bouncing of calling_nmi_handlers in the fast path >(nmi interrupt handler). One could argue whether it is really >a fast path or not, but if you are using it for profiling, I would >say it is. No ? > I would vote against using it for profiling; profiling has it's own special fast-path, anyway. The NMI watchdog only gets hit once every minute or so, it seems, so that seems suitable for this. I've looked at the RCU code a little more, and I think I understand it better. I think your scenario will work, if it's true that it won't be called until all CPUs have done what you say. I'll look at it a little more. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 18:29 ` Corey Minyard @ 2002-10-22 19:08 ` John Levon 2002-10-22 21:36 ` [PATCH] NMI request/release, version 3 Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: John Levon @ 2002-10-22 19:08 UTC (permalink / raw) To: linux-kernel; +Cc: dipankar, cminyard On Tue, Oct 22, 2002 at 01:29:55PM -0500, Corey Minyard wrote: > I would vote against using it for profiling; profiling has it's own > special fast-path, anyway. But it would be good (less code, simpler, and even possibly for keeping NMI watchdog ticking when oprofile is running) if we could merge the two cases. > The NMI watchdog only gets hit once every > minute or so, it seems, so that seems suitable for this. It can easily be much more frequent than that (though you could argue this is a mis-setup). > I've looked at the RCU code a little more, and I think I understand it > better. I think your scenario will work, if it's true that it won't be > called until all CPUs have done what you say. I'll look at it a little > more. Thanks for looking into this ... regards john -- "This is mindless pedantism up with which I will not put." - Donald Knuth on Pascal's lack of default: case statement ^ permalink raw reply [flat|nested] 43+ messages in thread
* [PATCH] NMI request/release, version 3 2002-10-22 19:08 ` John Levon @ 2002-10-22 21:36 ` Corey Minyard 2002-10-23 17:33 ` Dipankar Sarma 0 siblings, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-22 21:36 UTC (permalink / raw) To: linux-kernel; +Cc: John Levon, dipankar [-- Attachment #1: Type: text/plain, Size: 326 bytes --] Add a request/release mechanism to the kernel (x86 only for now) for NMIs. This version uses the rcupdate mechanism to free the link items, instead of using the previous algorithm for interacting with the NMI handler code to free the item. It should be more scalable. Thanks to everyone who helped me with this. -Corey [-- Attachment #2: linux-nmi-v3.diff --] [-- Type: text/plain, Size: 10179 bytes --] diff -ur linux.orig/arch/i386/kernel/i386_ksyms.c linux/arch/i386/kernel/i386_ksyms.c --- linux.orig/arch/i386/kernel/i386_ksyms.c Mon Oct 21 13:25:58 2002 +++ linux/arch/i386/kernel/i386_ksyms.c Tue Oct 22 12:19:59 2002 @@ -90,6 +90,9 @@ EXPORT_SYMBOL(cpu_khz); EXPORT_SYMBOL(apm_info); +EXPORT_SYMBOL(request_nmi); +EXPORT_SYMBOL(release_nmi); + #ifdef CONFIG_DEBUG_IOVIRT EXPORT_SYMBOL(__io_virt_debug); #endif diff -ur linux.orig/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c --- linux.orig/arch/i386/kernel/irq.c Mon Oct 21 13:25:58 2002 +++ linux/arch/i386/kernel/irq.c Tue Oct 22 12:08:20 2002 @@ -131,6 +131,8 @@ * Generic, controller-independent functions: */ +extern void nmi_append_user_names(struct seq_file *p); + int show_interrupts(struct seq_file *p, void *v) { int i, j; @@ -166,6 +168,8 @@ for (j = 0; j < NR_CPUS; j++) if (cpu_online(j)) p += seq_printf(p, "%10u ", nmi_count(j)); + seq_printf(p, " "); + nmi_append_user_names(p); seq_putc(p, '\n'); #if CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); diff -ur linux.orig/arch/i386/kernel/nmi.c linux/arch/i386/kernel/nmi.c --- linux.orig/arch/i386/kernel/nmi.c Mon Oct 21 13:25:45 2002 +++ linux/arch/i386/kernel/nmi.c Tue Oct 22 16:18:00 2002 @@ -20,6 +20,7 @@ #include <linux/interrupt.h> #include <linux/mc146818rtc.h> #include <linux/kernel_stat.h> +#include <linux/notifier.h> #include <asm/smp.h> #include <asm/mtrr.h> @@ -102,6 +103,17 @@ return 0; } +static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs); + +static struct nmi_handler nmi_watchdog_handler = +{ + .dev_name = "nmi_watchdog", + .dev_id = NULL, + .handler = nmi_watchdog_tick, + .priority = 255, /* We want to be relatively high priority. */ + .freed = NULL +}; + static int __init setup_nmi_watchdog(char *str) { int nmi; @@ -110,6 +122,12 @@ if (nmi >= NMI_INVALID) return 0; + + if (request_nmi(&nmi_watchdog_handler) != 0) { + /* Couldn't add a watchdog handler, give up. */ + return 0; + } + if (nmi == NMI_NONE) nmi_watchdog = nmi; /* @@ -350,7 +368,7 @@ alert_counter[i] = 0; } -void nmi_watchdog_tick (struct pt_regs * regs) +static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs) { /* @@ -401,4 +419,6 @@ } wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); } + + return NOTIFY_OK; } diff -ur linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002 +++ linux/arch/i386/kernel/traps.c Tue Oct 22 16:01:33 2002 @@ -23,6 +23,10 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/highmem.h> +#include <linux/notifier.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/rcupdate.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -485,21 +489,191 @@ printk("Do you have a strange power saving mode enabled?\n"); } +/* + * A list of handlers for NMIs. This list will be called in order + * when an NMI from an otherwise unidentifiable source somes in. If + * one of these handles the NMI, it should return NOTIFY_OK, otherwise + * it should return NOTIFY_DONE. NMI handlers cannot claim spinlocks, + * so we have to handle freeing these in a different manner. A + * spinlock protects the list from multiple writers. When something + * is removed from the list, it is thrown into another list (with + * another link, so the "next" element stays valid) and scheduled to + * run as an rcu. When the rcu runs, it is guaranteed that nothing in + * the NMI code will be using it. + */ +static struct nmi_handler *nmi_handler_list = NULL; +static spinlock_t nmi_handler_lock = SPIN_LOCK_UNLOCKED; +static struct nmi_handler *nmi_to_free_list = NULL; +static spinlock_t nmi_to_free_lock = SPIN_LOCK_UNLOCKED; + +struct rcu_head nmi_rcu; + +/* + * To free the list item, we use an rcu. The rcu-function will not + * run until all processors have done a context switch, gone idle, or + * gone to a user process, so it's guaranteed that when this runs, any + * NMI handler running at release time has completed and the list item + * can be safely freed. + */ +static void really_free_nmi_list(void *unused) +{ + unsigned long flags; + struct nmi_handler *item; + + spin_lock_irqsave(&nmi_to_free_lock, flags); + while (nmi_to_free_list) { + item = nmi_to_free_list; + nmi_to_free_list = item->link2; + item->freed(item); + } + spin_unlock_irqrestore(&nmi_to_free_lock, flags); +} +static inline void free_nmi_handler(struct nmi_handler *item) +{ + unsigned long flags; + + if (!item->freed) + return; + + spin_lock_irqsave(&nmi_to_free_lock, flags); + /* We only have one copy of nmi_rcu, so we only want to add it + once. If there are items in the list, then it has already + been added. */ + if (nmi_to_free_list == NULL) + call_rcu(&nmi_rcu, really_free_nmi_list, NULL); + item->link2 = nmi_to_free_list; + nmi_to_free_list = item; + spin_unlock_irqrestore(&nmi_to_free_lock, flags); +} + +static inline struct nmi_handler *find_nmi_handler( + struct nmi_handler *handler, + struct nmi_handler **rprev) +{ + struct nmi_handler *curr, *prev; + + curr = nmi_handler_list; + prev = NULL; + while (curr) { + if (curr == handler) { + break; + } + prev = curr; + curr = curr->next; + } + + if (rprev) + *rprev = prev; + + return curr; +} + +int request_nmi(struct nmi_handler *handler) +{ + struct nmi_handler *curr; + + spin_lock(&nmi_handler_lock); + + /* Make sure the thing is not already in the list. */ + if (find_nmi_handler(handler, NULL)) + return EBUSY; + + /* Add it into the list in priority order. */ + curr = nmi_handler_list; + if ((!curr) || (curr->priority < handler->priority)) { + handler->next = nmi_handler_list; + smp_mb(); + xchg(&nmi_handler_list, handler); + } else { + while (curr->next && + (curr->next->priority > handler->priority)) + { + curr = curr->next; + } + handler->next = curr->next; + smp_mb(); + xchg(&(curr->next), handler); + } + + spin_unlock(&nmi_handler_lock); + return 0; +} + +void release_nmi(struct nmi_handler *handler) +{ + struct nmi_handler *curr, *prev; + + spin_lock(&nmi_handler_lock); + + /* Find it in the list. */ + curr = find_nmi_handler(handler, &prev); + + if (curr) { + /* If it was found, remove it from the list. We + assume the write operation here is atomic. */ + smp_mb(); + if (prev) + xchg(&(prev->next), curr->next); + else + xchg(&nmi_handler_list, curr->next); + + free_nmi_handler(curr); + } + spin_unlock(&nmi_handler_lock); +} + +static int call_nmi_handlers(struct pt_regs * regs) +{ + struct nmi_handler *curr, *next; + int handled = 0; + int val; + + curr = nmi_handler_list; + while (curr) { + next = curr->next; + val = curr->handler(curr->dev_id, regs); + switch (val & ~NOTIFY_STOP_MASK) { + case NOTIFY_OK: + handled = 1; + break; + + case NOTIFY_DONE: + default: + } + if (val & NOTIFY_STOP_MASK) + break; + curr = next; + } + + return handled; +} + +void nmi_append_user_names(struct seq_file *p) +{ + struct nmi_handler *curr; + + spin_lock(&nmi_handler_lock); + curr = nmi_handler_list; + while (curr) { + if (curr->dev_name) + p += seq_printf(p, " %s", curr->dev_name); + curr = curr->next; + } + spin_unlock(&nmi_handler_lock); +} + static void default_do_nmi(struct pt_regs * regs) { unsigned char reason = inb(0x61); if (!(reason & 0xc0)) { -#if CONFIG_X86_LOCAL_APIC /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. + * Check our handler list to see if anyone can handle this + * nmi. */ - if (nmi_watchdog) { - nmi_watchdog_tick(regs); + if (call_nmi_handlers(regs)) return; - } -#endif + unknown_nmi_error(reason, regs); return; } diff -ur linux.orig/include/asm-i386/apic.h linux/include/asm-i386/apic.h --- linux.orig/include/asm-i386/apic.h Mon Oct 21 13:26:04 2002 +++ linux/include/asm-i386/apic.h Tue Oct 22 12:40:16 2002 @@ -79,7 +79,6 @@ extern void setup_boot_APIC_clock (void); extern void setup_secondary_APIC_clock (void); extern void setup_apic_nmi_watchdog (void); -extern inline void nmi_watchdog_tick (struct pt_regs * regs); extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); diff -ur linux.orig/include/asm-i386/irq.h linux/include/asm-i386/irq.h --- linux.orig/include/asm-i386/irq.h Mon Oct 21 13:24:41 2002 +++ linux/include/asm-i386/irq.h Tue Oct 22 16:13:30 2002 @@ -28,4 +28,43 @@ #define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ #endif + +/** + * Register a handler to get called when an NMI occurs. If the + * handler actually handles the NMI, it should return NOTIFY_OK. If + * it did not handle the NMI, it should return NOTIFY_DONE. It may "or" + * on NOTIFY_STOP_MASK to the return value if it does not want other + * handlers after it to be notified. Note that this is a slow-path + * handler, if you have a fast-path handler there's another tie-in + * for you. See the oprofile code. + * + * Note that when you release the handler, you may NOT immediately + * free or reuse the handler item, and you may not unload the module + * of any handler, because it still may be referenced in an NMI call. + * Instead, you should wait until the supplied "freed" callback is + * called. + */ +#define HAVE_NMI_HANDLER 1 +struct nmi_handler +{ + char *dev_name; + void *dev_id; + int (*handler)(void *dev_id, struct pt_regs *regs); + int priority; /* Handlers called in priority order. */ + + /* If "freed" is not NULL, this will be called when the item + is no longer in use. */ + void (*freed)(const void *arg); + + /* Don't mess with anything below here. */ + + struct nmi_handler *next; + /* This is for linking into the list of things release in the + rcu callback. We can't use next because we can't touch it + until the rcu callback runs. */ + struct nmi_handler *link2; +}; +int request_nmi(struct nmi_handler *handler); +void release_nmi(struct nmi_handler *handler); + #endif /* _ASM_IRQ_H */ ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 3 2002-10-22 21:36 ` [PATCH] NMI request/release, version 3 Corey Minyard @ 2002-10-23 17:33 ` Dipankar Sarma 2002-10-23 18:03 ` Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: Dipankar Sarma @ 2002-10-23 17:33 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel, John Levon On Tue, Oct 22, 2002 at 04:36:51PM -0500, Corey Minyard wrote: > +static struct nmi_handler *nmi_handler_list = NULL; > +static spinlock_t nmi_handler_lock = SPIN_LOCK_UNLOCKED; > +static struct nmi_handler *nmi_to_free_list = NULL; > +static spinlock_t nmi_to_free_lock = SPIN_LOCK_UNLOCKED; > + > +struct rcu_head nmi_rcu; > + > +/* > + * To free the list item, we use an rcu. The rcu-function will not > + * run until all processors have done a context switch, gone idle, or > + * gone to a user process, so it's guaranteed that when this runs, any > + * NMI handler running at release time has completed and the list item > + * can be safely freed. > + */ > +static void really_free_nmi_list(void *unused) > +{ > + unsigned long flags; > + struct nmi_handler *item; > + > + spin_lock_irqsave(&nmi_to_free_lock, flags); > + while (nmi_to_free_list) { > + item = nmi_to_free_list; > + nmi_to_free_list = item->link2; > + item->freed(item); > + } > + spin_unlock_irqrestore(&nmi_to_free_lock, flags); > +} > +static inline void free_nmi_handler(struct nmi_handler *item) > +{ > + unsigned long flags; > + > + if (!item->freed) > + return; > + > + spin_lock_irqsave(&nmi_to_free_lock, flags); > + /* We only have one copy of nmi_rcu, so we only want to add it > + once. If there are items in the list, then it has already > + been added. */ > + if (nmi_to_free_list == NULL) > + call_rcu(&nmi_rcu, really_free_nmi_list, NULL); > + item->link2 = nmi_to_free_list; > + nmi_to_free_list = item; > + spin_unlock_irqrestore(&nmi_to_free_lock, flags); > +} Hmm... I am not sure if this is correct. Your grace period starts from the moment you do a call_rcu(). So, this example sequence might result in a problem here - CPU#0 CPU#1 CPU#2 CPU#3 free_nmi_hanlder(X) call_rcu cswitch cswitch cswitch nmi_handler(Y) free_nmi_handler(Y) [gets queued in the same free list as X] cswitch reaally_free_nmi_list [nmi_handler still executing] Since context switch happened in all the CPUs since call_rcu(), real update may happen and free the nmi_handler Y while it is executing in CPU#2. IOW, once you start one RCU grace period you cannot add to that list since the adding CPU may already have participated in RCU and indicated that it doesn't have any references. Once you hand over stuff to RCU, you must use a new call_rcu() for that new batch. I am wondering if we could do something like this - static struct list_head nmi_handler_list = LIST_INIT_HEAD(nmi_handler_list); struct nmi_handler { struct list_head link; char *dev_name; void *dev_id; int (*handler)(void *dev_id, struct pt_regs *regs); int priority; void (*freed)(const void *arg); struct rcu_head rcu; }; static void really_free_nmi_list(void *arg) { struct nmi_handler *handler = arg; list_head_init(&handler->link); if (handler->freed) handler->freed(handler); } void release_nmi(struct nmi_handler *handler) { if (handler == NULL) return; spin_lock(&nmi_handler_lock); list_del_rcu(&handler->link); spin_unlock(&nmi_handler_lock); call_rcu(&handler->rcu, really_free_nmi_handler, handler); } int request_nmi(struct nmi_handler *handler) { struct list_head *head, *curr; struct nmi_handler *curr_h; /* Make sure the thing is not already in the list. */ if (!list_empty(&handler->link)) return EBUSY; spin_lock(&nmi_handler_lock); /* Add it into the list in priority order. */ head = &nmi_handler_list; __list_for_each(curr, head) { curr_h = list_entry(curr, struct nmi_handler, link); if (curr_h->priority <= handler->priority) break; } /* list_add_rcu takes care of memory barrier */ list_add_rcu(&handler->link, curr->link.prev); spin_unlock(&nmi_handler_lock); return 0; } static int call_nmi_handlers(struct pt_regs * regs) { struct list_head *head, *curr; int handled = 0; int val; head = &nmi_handler_list; /* __list_for_each_rcu takes care of memory barriers */ __list_for_each_rcu(curr, head) { curr_h = list_entry(curr, struct nmi_handler, link); val = curr_h->handler(curr_h->dev_id, regs); switch (val & ~NOTIFY_STOP_MASK) { case NOTIFY_OK: handled = 1; break; case NOTIFY_DONE: default: } if (val & NOTIFY_STOP_MASK) break; } return handled; } I probably have missed quite a few things here in these changes, but would be interesting to see if they could be made to work. One clear problem - someone does a release_nmi() and then a request_nmi() on the same handler while it is waiting for its RCU grace period to be over. Oh well :-) Thanks Dipankar ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 3 2002-10-23 17:33 ` Dipankar Sarma @ 2002-10-23 18:03 ` Corey Minyard 2002-10-23 18:57 ` Dipankar Sarma 0 siblings, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-23 18:03 UTC (permalink / raw) To: dipankar; +Cc: linux-kernel, John Levon Dipankar Sarma wrote: >On Tue, Oct 22, 2002 at 04:36:51PM -0500, Corey Minyard wrote: > >Hmm... I am not sure if this is correct. > Yes, it's not correct, I will fix it. I didn't realize there was an rcu-safe list. That should make things simpler. >I probably have missed quite a few things here in these changes, but >would be interesting to see if they could be made to work. One clear >problem - someone does a release_nmi() and then a request_nmi() >on the same handler while it is waiting for its RCU grace period >to be over. Oh well :-) > Well, the documentation said "Don't do that!". But I think you are right, I will make release_nmi() block until the item is free. Thanks, -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 3 2002-10-23 18:03 ` Corey Minyard @ 2002-10-23 18:57 ` Dipankar Sarma 2002-10-23 20:14 ` [PATCH] NMI request/release, version 4 Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: Dipankar Sarma @ 2002-10-23 18:57 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel, John Levon On Wed, Oct 23, 2002 at 01:03:11PM -0500, Corey Minyard wrote: > > > Yes, it's not correct, I will fix it. I didn't realize there was an > rcu-safe list. That should make things simpler. Yeah, RCU documentation needs some serious updating :( One other thing, it might be better to do all handler checks in request_nmi and release_nmi in the spinlock serialized critical section to minimize memory barrier issues unlike in the code snippet I mailed - things like the list_empty() check etc. That way you need to look at memory barrier issues with only one piece of lockfree code - call_nmi_handlers. Thanks Dipankar ^ permalink raw reply [flat|nested] 43+ messages in thread
* [PATCH] NMI request/release, version 4 2002-10-23 18:57 ` Dipankar Sarma @ 2002-10-23 20:14 ` Corey Minyard 2002-10-23 20:50 ` Dipankar Sarma 2002-10-24 7:50 ` Dipankar Sarma 0 siblings, 2 replies; 43+ messages in thread From: Corey Minyard @ 2002-10-23 20:14 UTC (permalink / raw) To: dipankar; +Cc: linux-kernel, John Levon [-- Attachment #1: Type: text/plain, Size: 539 bytes --] Ok, here's try number 4. This one fixes the race condition that Dipankar pointed out, and modifies the release_nmi() call to block until the rcu finishes. I have noticed that the rcu callback can be delayed a long time, sometimes up to 3 seconds. That seems odd. I have verified that the delay happens there. Also, does the __wait_for_event() code in release_nmi() need any memory barriers? Also, the code in traps.c is pretty generic. Perhaps it could be moved to a common place? Or is that worth it? Thanks again, -Corey [-- Attachment #2: linux-nmi-v4.diff --] [-- Type: text/plain, Size: 9106 bytes --] diff -ur linux.orig/arch/i386/kernel/i386_ksyms.c linux/arch/i386/kernel/i386_ksyms.c --- linux.orig/arch/i386/kernel/i386_ksyms.c Mon Oct 21 13:25:58 2002 +++ linux/arch/i386/kernel/i386_ksyms.c Tue Oct 22 12:19:59 2002 @@ -90,6 +90,9 @@ EXPORT_SYMBOL(cpu_khz); EXPORT_SYMBOL(apm_info); +EXPORT_SYMBOL(request_nmi); +EXPORT_SYMBOL(release_nmi); + #ifdef CONFIG_DEBUG_IOVIRT EXPORT_SYMBOL(__io_virt_debug); #endif diff -ur linux.orig/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c --- linux.orig/arch/i386/kernel/irq.c Mon Oct 21 13:25:58 2002 +++ linux/arch/i386/kernel/irq.c Tue Oct 22 12:08:20 2002 @@ -131,6 +131,8 @@ * Generic, controller-independent functions: */ +extern void nmi_append_user_names(struct seq_file *p); + int show_interrupts(struct seq_file *p, void *v) { int i, j; @@ -166,6 +168,8 @@ for (j = 0; j < NR_CPUS; j++) if (cpu_online(j)) p += seq_printf(p, "%10u ", nmi_count(j)); + seq_printf(p, " "); + nmi_append_user_names(p); seq_putc(p, '\n'); #if CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); diff -ur linux.orig/arch/i386/kernel/nmi.c linux/arch/i386/kernel/nmi.c --- linux.orig/arch/i386/kernel/nmi.c Mon Oct 21 13:25:45 2002 +++ linux/arch/i386/kernel/nmi.c Wed Oct 23 13:57:55 2002 @@ -20,6 +20,7 @@ #include <linux/interrupt.h> #include <linux/mc146818rtc.h> #include <linux/kernel_stat.h> +#include <linux/notifier.h> #include <asm/smp.h> #include <asm/mtrr.h> @@ -102,6 +103,17 @@ return 0; } +static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs); + +static struct nmi_handler nmi_watchdog_handler = +{ + .link = LIST_HEAD_INIT(nmi_watchdog_handler.link), + .dev_name = "nmi_watchdog", + .dev_id = NULL, + .handler = nmi_watchdog_tick, + .priority = 255, /* We want to be relatively high priority. */ +}; + static int __init setup_nmi_watchdog(char *str) { int nmi; @@ -110,6 +122,12 @@ if (nmi >= NMI_INVALID) return 0; + + if (request_nmi(&nmi_watchdog_handler) != 0) { + /* Couldn't add a watchdog handler, give up. */ + return 0; + } + if (nmi == NMI_NONE) nmi_watchdog = nmi; /* @@ -350,7 +368,7 @@ alert_counter[i] = 0; } -void nmi_watchdog_tick (struct pt_regs * regs) +static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs) { /* @@ -401,4 +419,6 @@ } wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); } + + return NOTIFY_OK; } diff -ur linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002 +++ linux/arch/i386/kernel/traps.c Wed Oct 23 14:58:09 2002 @@ -23,6 +23,10 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/highmem.h> +#include <linux/notifier.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/rcupdate.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -485,21 +489,139 @@ printk("Do you have a strange power saving mode enabled?\n"); } +/* + * A list of handlers for NMIs. This list will be called in order + * when an NMI from an otherwise unidentifiable source somes in. If + * one of these handles the NMI, it should return NOTIFY_OK, otherwise + * it should return NOTIFY_DONE. NMI handlers cannot claim spinlocks, + * so we have to handle freeing these in a different manner. A + * spinlock protects the list from multiple writers. When something + * is removed from the list, it is thrown into another list (with + * another link, so the "next" element stays valid) and scheduled to + * run as an rcu. When the rcu runs, it is guaranteed that nothing in + * the NMI code will be using it. + */ +static struct list_head nmi_handler_list = LIST_HEAD_INIT(nmi_handler_list); +static spinlock_t nmi_handler_lock = SPIN_LOCK_UNLOCKED; + +/* + * To free the list item, we use an rcu. The rcu-function will not + * run until all processors have done a context switch, gone idle, or + * gone to a user process, so it's guaranteed that when this runs, any + * NMI handler running at release time has completed and the list item + * can be safely freed. + */ +static void free_nmi_handler(void *arg) +{ + struct nmi_handler *handler = arg; + + INIT_LIST_HEAD(&(handler->link)); + wmb(); + wake_up(&(handler->wait)); +} + +int request_nmi(struct nmi_handler *handler) +{ + struct list_head *curr; + struct nmi_handler *curr_h = NULL; + + if (!list_empty(&(handler->link))) + return EBUSY; + + spin_lock(&nmi_handler_lock); + + __list_for_each(curr, &nmi_handler_list) { + curr_h = list_entry(curr, struct nmi_handler, link); + if (curr_h->priority <= handler->priority) + break; + } + + if (curr_h) + /* list_add_rcu takes care of memory barrier */ + list_add_rcu(&(handler->link), curr_h->link.prev); + else + list_add_rcu(&(handler->link), &nmi_handler_list); + + spin_unlock(&nmi_handler_lock); + return 0; +} + +void release_nmi(struct nmi_handler *handler) +{ + wait_queue_t q_ent; + unsigned long flags; + + spin_lock_irqsave(&nmi_handler_lock, flags); + list_del_rcu(&(handler->link)); + + /* Wait for handler to finish being freed. This can't be + interrupted, we must wait until it finished. */ + init_waitqueue_head(&(handler->wait)); + init_waitqueue_entry(&q_ent, current); + add_wait_queue(&(handler->wait), &q_ent); + call_rcu(&(handler->rcu), free_nmi_handler, handler); + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + if (list_empty(&(handler->link))) + break; + spin_unlock_irqrestore(&nmi_handler_lock, flags); + schedule(); + spin_lock_irqsave(&nmi_handler_lock, flags); + } + remove_wait_queue(&(handler->wait), &q_ent); + spin_unlock_irqrestore(&nmi_handler_lock, flags); +} + +static int call_nmi_handlers(struct pt_regs * regs) +{ + struct list_head *curr; + struct nmi_handler *curr_h; + int handled = 0; + int val; + + __list_for_each_rcu(curr, &nmi_handler_list) { + curr_h = list_entry(curr, struct nmi_handler, link); + val = curr_h->handler(curr_h->dev_id, regs); + switch (val & ~NOTIFY_STOP_MASK) { + case NOTIFY_OK: + handled = 1; + break; + + case NOTIFY_DONE: + default: + } + if (val & NOTIFY_STOP_MASK) + break; + } + return handled; +} + +void nmi_append_user_names(struct seq_file *p) +{ + struct list_head *curr; + struct nmi_handler *curr_h; + + spin_lock(&nmi_handler_lock); + __list_for_each(curr, &nmi_handler_list) { + curr_h = list_entry(curr, struct nmi_handler, link); + if (curr_h->dev_name) + p += seq_printf(p, " %s", curr_h->dev_name); + } + spin_unlock(&nmi_handler_lock); +} + static void default_do_nmi(struct pt_regs * regs) { unsigned char reason = inb(0x61); if (!(reason & 0xc0)) { -#if CONFIG_X86_LOCAL_APIC /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. + * Check the handler list to see if anyone can handle this + * nmi. */ - if (nmi_watchdog) { - nmi_watchdog_tick(regs); + if (call_nmi_handlers(regs)) return; - } -#endif + unknown_nmi_error(reason, regs); return; } diff -ur linux.orig/include/asm-i386/apic.h linux/include/asm-i386/apic.h --- linux.orig/include/asm-i386/apic.h Mon Oct 21 13:26:04 2002 +++ linux/include/asm-i386/apic.h Tue Oct 22 12:40:16 2002 @@ -79,7 +79,6 @@ extern void setup_boot_APIC_clock (void); extern void setup_secondary_APIC_clock (void); extern void setup_apic_nmi_watchdog (void); -extern inline void nmi_watchdog_tick (struct pt_regs * regs); extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); diff -ur linux.orig/include/asm-i386/irq.h linux/include/asm-i386/irq.h --- linux.orig/include/asm-i386/irq.h Mon Oct 21 13:24:41 2002 +++ linux/include/asm-i386/irq.h Wed Oct 23 14:15:59 2002 @@ -12,6 +12,7 @@ #include <linux/config.h> #include <linux/sched.h> +#include <linux/rcupdate.h> /* include comes from machine specific directory */ #include "irq_vectors.h" @@ -27,5 +28,36 @@ #ifdef CONFIG_X86_LOCAL_APIC #define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ #endif + + +/** + * Register a handler to get called when an NMI occurs. If the + * handler actually handles the NMI, it should return NOTIFY_OK. If + * it did not handle the NMI, it should return NOTIFY_DONE. It may "or" + * on NOTIFY_STOP_MASK to the return value if it does not want other + * handlers after it to be notified. Note that this is a slow-path + * handler, if you have a fast-path handler there's another tie-in + * for you. See the oprofile code. + */ +#define HAVE_NMI_HANDLER 1 +struct nmi_handler +{ + struct list_head link; /* You must init this before use. */ + + char *dev_name; + void *dev_id; + int (*handler)(void *dev_id, struct pt_regs *regs); + int priority; /* Handlers called in priority order. */ + + /* Don't mess with anything below here. */ + + struct rcu_head rcu; + wait_queue_head_t wait; +}; + +int request_nmi(struct nmi_handler *handler); + +/* Release will block until the handler is completely free. */ +void release_nmi(struct nmi_handler *handler); #endif /* _ASM_IRQ_H */ ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 4 2002-10-23 20:14 ` [PATCH] NMI request/release, version 4 Corey Minyard @ 2002-10-23 20:50 ` Dipankar Sarma 2002-10-23 21:53 ` Corey Minyard 2002-10-24 7:50 ` Dipankar Sarma 1 sibling, 1 reply; 43+ messages in thread From: Dipankar Sarma @ 2002-10-23 20:50 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel, John Levon Well, I haven't looked at the whole patch yet, but some quick responses - On Wed, Oct 23, 2002 at 03:14:52PM -0500, Corey Minyard wrote: > I have noticed that the rcu callback can be delayed a long time, > sometimes up to 3 seconds. That seems odd. I have verified that the > delay happens there. That kind of latency is really bad. Could you please check the latency with this change in kernel/rcupdate.c - void rcu_check_callbacks(int cpu, int user) { if (user || - (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= 1)) + (idle_cpu(cpu) && !in_softirq() && + hardirq_count() <= (1 << HARDIRQ_SHIFT))) RCU_qsctr(cpu)++; tasklet_schedule(&RCU_tasklet(cpu)); After local_irq_count() went away, the idle CPU check was broken and that meant that if you had an idle CPU, it could hold up RCU grace period completion. > +void release_nmi(struct nmi_handler *handler) > +{ > + wait_queue_t q_ent; > + unsigned long flags; > + > + spin_lock_irqsave(&nmi_handler_lock, flags); > + list_del_rcu(&(handler->link)); > + > + /* Wait for handler to finish being freed. This can't be > + interrupted, we must wait until it finished. */ > + init_waitqueue_head(&(handler->wait)); > + init_waitqueue_entry(&q_ent, current); > + add_wait_queue(&(handler->wait), &q_ent); > + call_rcu(&(handler->rcu), free_nmi_handler, handler); > + for (;;) { > + set_current_state(TASK_UNINTERRUPTIBLE); > + if (list_empty(&(handler->link))) > + break; > + spin_unlock_irqrestore(&nmi_handler_lock, flags); > + schedule(); > + spin_lock_irqsave(&nmi_handler_lock, flags); > + } > + remove_wait_queue(&(handler->wait), &q_ent); > + spin_unlock_irqrestore(&nmi_handler_lock, flags); > +} It might just be simpler to use completions instead - call_rcu(&(handler->rcu), free_nmi_handler, handler); init_completion(&handler->completion); spin_unlock_irqrestore(&nmi_handler_lock, flags); wait_for_completion(&handler->completion); and do complete(&handler->completion); in the the RCU callback. Also, now I think your original idea of "Don't do this!" :) was right. Waiting until an nmi handler is seen unlinked could make a task block for a long time if another task re-installs it. You should probably just fail installation of a busy handler (checked under lock). Thanks Dipankar ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 4 2002-10-23 20:50 ` Dipankar Sarma @ 2002-10-23 21:53 ` Corey Minyard 2002-10-24 7:41 ` Dipankar Sarma 0 siblings, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-23 21:53 UTC (permalink / raw) To: dipankar; +Cc: linux-kernel, John Levon Dipankar Sarma wrote: >Well, I haven't looked at the whole patch yet, but some quick >responses - > >On Wed, Oct 23, 2002 at 03:14:52PM -0500, Corey Minyard wrote: > > >>I have noticed that the rcu callback can be delayed a long time, >>sometimes up to 3 seconds. That seems odd. I have verified that the >>delay happens there. >> >> > >That kind of latency is really bad. Could you please check the latency >with this change in kernel/rcupdate.c - > > void rcu_check_callbacks(int cpu, int user) > { > if (user || >- (idle_cpu(cpu) && !in_softirq() && hardirq_count() <= 1)) >+ (idle_cpu(cpu) && !in_softirq() && >+ hardirq_count() <= (1 << HARDIRQ_SHIFT))) > RCU_qsctr(cpu)++; > tasklet_schedule(&RCU_tasklet(cpu)); > >After local_irq_count() went away, the idle CPU check was broken >and that meant that if you had an idle CPU, it could hold up RCU >grace period completion. > Ah, much better. That seems to fix it. >It might just be simpler to use completions instead - > > call_rcu(&(handler->rcu), free_nmi_handler, handler); > init_completion(&handler->completion); > spin_unlock_irqrestore(&nmi_handler_lock, flags); > wait_for_completion(&handler->completion); > >and do > > complete(&handler->completion); > >in the the RCU callback. > I was working under the assumption that the spinlocks were needed. But now I see that there are spinlocks in wait_for_completion. You did get init_completion() and call_rcu() backwards, they would need to be the opposite order, I think. >Also, now I think your original idea of "Don't do this!" :) was right. >Waiting until an nmi handler is seen unlinked could make a task >block for a long time if another task re-installs it. You should >probably just fail installation of a busy handler (checked under >lock). > Since just about all of these will be in modules at unload time, I'm thinking that the way it is now is better. Otherwise, someone will mess it up. IMHO, it's much more likely that someone doesn't handle the callback correctly than someone reused the value before the call that releases it returns. I'd prefer to leave it the way it is now. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 4 2002-10-23 21:53 ` Corey Minyard @ 2002-10-24 7:41 ` Dipankar Sarma 2002-10-24 13:08 ` Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: Dipankar Sarma @ 2002-10-24 7:41 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel, John Levon On Wed, Oct 23, 2002 at 04:53:34PM -0500, Corey Minyard wrote: > >After local_irq_count() went away, the idle CPU check was broken > >and that meant that if you had an idle CPU, it could hold up RCU > >grace period completion. > > > Ah, much better. That seems to fix it. Great! Do you have any latency numbers ? Just curious. > > >It might just be simpler to use completions instead - > > > > call_rcu(&(handler->rcu), free_nmi_handler, handler); > > init_completion(&handler->completion); > > spin_unlock_irqrestore(&nmi_handler_lock, flags); > > wait_for_completion(&handler->completion); > > > >and do > > > > complete(&handler->completion); > > > >in the the RCU callback. > > > I was working under the assumption that the spinlocks were needed. But > now I see that there are spinlocks in wait_for_completion. You did get > init_completion() and call_rcu() backwards, they would need to be the > opposite order, I think. AFAICS, the ordering of call_rcu() and init_completion should not matter because the CPU that is executing them would not have gone through a quiescent state and thus the RCU callback cannot happen. Only after a context swtich in wait_for_completion(), the callback is possible. > > >Also, now I think your original idea of "Don't do this!" :) was right. > >Waiting until an nmi handler is seen unlinked could make a task > >block for a long time if another task re-installs it. You should > >probably just fail installation of a busy handler (checked under > >lock). > > > Since just about all of these will be in modules at unload time, I'm > thinking that the way it is now is better. Otherwise, someone will mess > it up. IMHO, it's much more likely that someone doesn't handle the > callback correctly than someone reused the value before the call that > releases it returns. I'd prefer to leave it the way it is now. Oh, I didn't mean the part about waiting in release_nmi() until the release is complete (RCU callback done). That is absolutely necessary. I was talking about looping until the handler is not busy any more. I think it is safe to just do a wait_for_completion() and return in release_nmi(). Thanks Dipankar ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 4 2002-10-24 7:41 ` Dipankar Sarma @ 2002-10-24 13:08 ` Corey Minyard 0 siblings, 0 replies; 43+ messages in thread From: Corey Minyard @ 2002-10-24 13:08 UTC (permalink / raw) To: dipankar; +Cc: linux-kernel Dipankar Sarma wrote: >On Wed, Oct 23, 2002 at 04:53:34PM -0500, Corey Minyard wrote: > > >>>After local_irq_count() went away, the idle CPU check was broken >>>and that meant that if you had an idle CPU, it could hold up RCU >>>grace period completion. >>> >>Ah, much better. That seems to fix it. >> >> >Great! Do you have any latency numbers ? Just curious. > Unfortunately not. 3 seconds is well within the realm of human observation with printk. >>>It might just be simpler to use completions instead - >>> >>> call_rcu(&(handler->rcu), free_nmi_handler, handler); >>> init_completion(&handler->completion); >>> spin_unlock_irqrestore(&nmi_handler_lock, flags); >>> wait_for_completion(&handler->completion); >>> >>>and do >>> >>> complete(&handler->completion); >>> >>>in the the RCU callback. >>> >>> >>> >>I was working under the assumption that the spinlocks were needed. But >>now I see that there are spinlocks in wait_for_completion. You did get >>init_completion() and call_rcu() backwards, they would need to be the >>opposite order, I think. >> >> > >AFAICS, the ordering of call_rcu() and init_completion should not matter >because the CPU that is executing them would not have gone >through a quiescent state and thus the RCU callback cannot happen. >Only after a context swtich in wait_for_completion(), the callback >is possible. > Yes, I think you are right. I'm still not used to the RCUs :-). -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 4 2002-10-23 20:14 ` [PATCH] NMI request/release, version 4 Corey Minyard 2002-10-23 20:50 ` Dipankar Sarma @ 2002-10-24 7:50 ` Dipankar Sarma 2002-10-24 13:05 ` Corey Minyard 2002-10-24 13:28 ` [PATCH] NMI request/release, version 5 - I think this one's ready Corey Minyard 1 sibling, 2 replies; 43+ messages in thread From: Dipankar Sarma @ 2002-10-24 7:50 UTC (permalink / raw) To: Corey Minyard; +Cc: linux-kernel, John Levon Ok, some more comments - On Wed, Oct 23, 2002 at 03:14:52PM -0500, Corey Minyard wrote: > +void release_nmi(struct nmi_handler *handler) > +{ > + wait_queue_t q_ent; > + unsigned long flags; > + > + spin_lock_irqsave(&nmi_handler_lock, flags); > + list_del_rcu(&(handler->link)); > + > + /* Wait for handler to finish being freed. This can't be > + interrupted, we must wait until it finished. */ > + init_waitqueue_head(&(handler->wait)); > + init_waitqueue_entry(&q_ent, current); > + add_wait_queue(&(handler->wait), &q_ent); > + call_rcu(&(handler->rcu), free_nmi_handler, handler); > + for (;;) { > + set_current_state(TASK_UNINTERRUPTIBLE); > + if (list_empty(&(handler->link))) > + break; > + spin_unlock_irqrestore(&nmi_handler_lock, flags); > + schedule(); > + spin_lock_irqsave(&nmi_handler_lock, flags); > + } > + remove_wait_queue(&(handler->wait), &q_ent); > + spin_unlock_irqrestore(&nmi_handler_lock, flags); > +} Can release_nmi() be done from irq context ? If not, I don't see why spin_lock_irqsave() is required here. If it can be called from irq context, then I can't see how you can schedule() (or wait_for_completion() for that matter :)). Thanks Dipankar ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 4 2002-10-24 7:50 ` Dipankar Sarma @ 2002-10-24 13:05 ` Corey Minyard 2002-10-24 13:28 ` [PATCH] NMI request/release, version 5 - I think this one's ready Corey Minyard 1 sibling, 0 replies; 43+ messages in thread From: Corey Minyard @ 2002-10-24 13:05 UTC (permalink / raw) To: dipankar; +Cc: linux-kernel, John Levon Dipankar Sarma wrote: >Ok, some more comments - > >On Wed, Oct 23, 2002 at 03:14:52PM -0500, Corey Minyard wrote: > > >Can release_nmi() be done from irq context ? If not, I don't see >why spin_lock_irqsave() is required here. If it can be called >from irq context, then I can't see how you can schedule() >(or wait_for_completion() for that matter :)). > I originally was using nmi_handler_lock in the rcu routine (which runs at interrupt level). You have to turn off interrupts if someone else can claim the lock at interrupt level. However, I"m not any more, so it can go away. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 7:50 ` Dipankar Sarma 2002-10-24 13:05 ` Corey Minyard @ 2002-10-24 13:28 ` Corey Minyard 2002-10-24 14:46 ` John Levon 1 sibling, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-24 13:28 UTC (permalink / raw) To: dipankar; +Cc: linux-kernel [-- Attachment #1: Type: text/plain, Size: 143 bytes --] Thanks to Dipankar, I think I am pretty much done with the code to do a request/release NMI. This patch is relative to stock 2.5.44. -Corey [-- Attachment #2: linux-nmi-v5.diff --] [-- Type: text/plain, Size: 8713 bytes --] diff -ur linux.orig/arch/i386/kernel/i386_ksyms.c linux/arch/i386/kernel/i386_ksyms.c --- linux.orig/arch/i386/kernel/i386_ksyms.c Mon Oct 21 13:25:58 2002 +++ linux/arch/i386/kernel/i386_ksyms.c Tue Oct 22 12:19:59 2002 @@ -90,6 +90,9 @@ EXPORT_SYMBOL(cpu_khz); EXPORT_SYMBOL(apm_info); +EXPORT_SYMBOL(request_nmi); +EXPORT_SYMBOL(release_nmi); + #ifdef CONFIG_DEBUG_IOVIRT EXPORT_SYMBOL(__io_virt_debug); #endif diff -ur linux.orig/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c --- linux.orig/arch/i386/kernel/irq.c Mon Oct 21 13:25:58 2002 +++ linux/arch/i386/kernel/irq.c Tue Oct 22 12:08:20 2002 @@ -131,6 +131,8 @@ * Generic, controller-independent functions: */ +extern void nmi_append_user_names(struct seq_file *p); + int show_interrupts(struct seq_file *p, void *v) { int i, j; @@ -166,6 +168,8 @@ for (j = 0; j < NR_CPUS; j++) if (cpu_online(j)) p += seq_printf(p, "%10u ", nmi_count(j)); + seq_printf(p, " "); + nmi_append_user_names(p); seq_putc(p, '\n'); #if CONFIG_X86_LOCAL_APIC seq_printf(p, "LOC: "); diff -ur linux.orig/arch/i386/kernel/nmi.c linux/arch/i386/kernel/nmi.c --- linux.orig/arch/i386/kernel/nmi.c Mon Oct 21 13:25:45 2002 +++ linux/arch/i386/kernel/nmi.c Wed Oct 23 13:57:55 2002 @@ -20,6 +20,7 @@ #include <linux/interrupt.h> #include <linux/mc146818rtc.h> #include <linux/kernel_stat.h> +#include <linux/notifier.h> #include <asm/smp.h> #include <asm/mtrr.h> @@ -102,6 +103,17 @@ return 0; } +static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs); + +static struct nmi_handler nmi_watchdog_handler = +{ + .link = LIST_HEAD_INIT(nmi_watchdog_handler.link), + .dev_name = "nmi_watchdog", + .dev_id = NULL, + .handler = nmi_watchdog_tick, + .priority = 255, /* We want to be relatively high priority. */ +}; + static int __init setup_nmi_watchdog(char *str) { int nmi; @@ -110,6 +122,12 @@ if (nmi >= NMI_INVALID) return 0; + + if (request_nmi(&nmi_watchdog_handler) != 0) { + /* Couldn't add a watchdog handler, give up. */ + return 0; + } + if (nmi == NMI_NONE) nmi_watchdog = nmi; /* @@ -350,7 +368,7 @@ alert_counter[i] = 0; } -void nmi_watchdog_tick (struct pt_regs * regs) +static int nmi_watchdog_tick (void * dev_id, struct pt_regs * regs) { /* @@ -401,4 +419,6 @@ } wrmsr(nmi_perfctr_msr, -(cpu_khz/nmi_hz*1000), -1); } + + return NOTIFY_OK; } diff -ur linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c --- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002 +++ linux/arch/i386/kernel/traps.c Thu Oct 24 08:11:14 2002 @@ -23,6 +23,10 @@ #include <linux/spinlock.h> #include <linux/interrupt.h> #include <linux/highmem.h> +#include <linux/notifier.h> +#include <linux/seq_file.h> +#include <linux/slab.h> +#include <linux/rcupdate.h> #ifdef CONFIG_EISA #include <linux/ioport.h> @@ -485,21 +489,125 @@ printk("Do you have a strange power saving mode enabled?\n"); } +/* + * A list of handlers for NMIs. This list will be called in order + * when an NMI from an otherwise unidentifiable source somes in. If + * one of these handles the NMI, it should return NOTIFY_OK, otherwise + * it should return NOTIFY_DONE. NMI handlers cannot claim spinlocks, + * so we have to handle freeing these in a different manner. A + * spinlock protects the list from multiple writers. When something + * is removed from the list, it is thrown into another list (with + * another link, so the "next" element stays valid) and scheduled to + * run as an rcu. When the rcu runs, it is guaranteed that nothing in + * the NMI code will be using it. + */ +static struct list_head nmi_handler_list = LIST_HEAD_INIT(nmi_handler_list); +static spinlock_t nmi_handler_lock = SPIN_LOCK_UNLOCKED; + +/* + * To free the list item, we use an rcu. The rcu-function will not + * run until all processors have done a context switch, gone idle, or + * gone to a user process, so it's guaranteed that when this runs, any + * NMI handler running at release time has completed and the list item + * can be safely freed. + */ +static void free_nmi_handler(void *arg) +{ + struct nmi_handler *handler = arg; + + INIT_LIST_HEAD(&(handler->link)); + complete(&(handler->complete)); +} + +int request_nmi(struct nmi_handler *handler) +{ + struct list_head *curr; + struct nmi_handler *curr_h = NULL; + + if (!list_empty(&(handler->link))) + return EBUSY; + + spin_lock(&nmi_handler_lock); + + __list_for_each(curr, &nmi_handler_list) { + curr_h = list_entry(curr, struct nmi_handler, link); + if (curr_h->priority <= handler->priority) + break; + } + + if (curr_h) + /* list_add_rcu takes care of memory barrier */ + list_add_rcu(&(handler->link), curr_h->link.prev); + else + list_add_rcu(&(handler->link), &nmi_handler_list); + + spin_unlock(&nmi_handler_lock); + return 0; +} + +void release_nmi(struct nmi_handler *handler) +{ + spin_lock(&nmi_handler_lock); + list_del_rcu(&(handler->link)); + init_completion(&(handler->complete)); + call_rcu(&(handler->rcu), free_nmi_handler, handler); + spin_unlock(&nmi_handler_lock); + + /* Wait for handler to finish being freed. This can't be + interrupted, we must wait until it finished. */ + wait_for_completion(&(handler->complete)); +} + +static int call_nmi_handlers(struct pt_regs * regs) +{ + struct list_head *curr; + struct nmi_handler *curr_h; + int handled = 0; + int val; + + __list_for_each_rcu(curr, &nmi_handler_list) { + curr_h = list_entry(curr, struct nmi_handler, link); + val = curr_h->handler(curr_h->dev_id, regs); + switch (val & ~NOTIFY_STOP_MASK) { + case NOTIFY_OK: + handled = 1; + break; + + case NOTIFY_DONE: + default: + } + if (val & NOTIFY_STOP_MASK) + break; + } + return handled; +} + +void nmi_append_user_names(struct seq_file *p) +{ + struct list_head *curr; + struct nmi_handler *curr_h; + + spin_lock(&nmi_handler_lock); + __list_for_each(curr, &nmi_handler_list) { + curr_h = list_entry(curr, struct nmi_handler, link); + if (curr_h->dev_name) + p += seq_printf(p, " %s", curr_h->dev_name); + } + spin_unlock(&nmi_handler_lock); +} + static void default_do_nmi(struct pt_regs * regs) { unsigned char reason = inb(0x61); if (!(reason & 0xc0)) { -#if CONFIG_X86_LOCAL_APIC /* - * Ok, so this is none of the documented NMI sources, - * so it must be the NMI watchdog. + * Check the handler list to see if anyone can handle this + * nmi. */ - if (nmi_watchdog) { - nmi_watchdog_tick(regs); + if (call_nmi_handlers(regs)) return; - } -#endif + unknown_nmi_error(reason, regs); return; } diff -ur linux.orig/include/asm-i386/apic.h linux/include/asm-i386/apic.h --- linux.orig/include/asm-i386/apic.h Mon Oct 21 13:26:04 2002 +++ linux/include/asm-i386/apic.h Tue Oct 22 12:40:16 2002 @@ -79,7 +79,6 @@ extern void setup_boot_APIC_clock (void); extern void setup_secondary_APIC_clock (void); extern void setup_apic_nmi_watchdog (void); -extern inline void nmi_watchdog_tick (struct pt_regs * regs); extern int APIC_init_uniprocessor (void); extern void disable_APIC_timer(void); extern void enable_APIC_timer(void); diff -ur linux.orig/include/asm-i386/irq.h linux/include/asm-i386/irq.h --- linux.orig/include/asm-i386/irq.h Mon Oct 21 13:24:41 2002 +++ linux/include/asm-i386/irq.h Wed Oct 23 16:47:24 2002 @@ -12,6 +12,7 @@ #include <linux/config.h> #include <linux/sched.h> +#include <linux/rcupdate.h> /* include comes from machine specific directory */ #include "irq_vectors.h" @@ -27,5 +28,36 @@ #ifdef CONFIG_X86_LOCAL_APIC #define ARCH_HAS_NMI_WATCHDOG /* See include/linux/nmi.h */ #endif + + +/** + * Register a handler to get called when an NMI occurs. If the + * handler actually handles the NMI, it should return NOTIFY_OK. If + * it did not handle the NMI, it should return NOTIFY_DONE. It may "or" + * on NOTIFY_STOP_MASK to the return value if it does not want other + * handlers after it to be notified. Note that this is a slow-path + * handler, if you have a fast-path handler there's another tie-in + * for you. See the oprofile code. + */ +#define HAVE_NMI_HANDLER 1 +struct nmi_handler +{ + struct list_head link; /* You must init this before use. */ + + char *dev_name; + void *dev_id; + int (*handler)(void *dev_id, struct pt_regs *regs); + int priority; /* Handlers called in priority order. */ + + /* Don't mess with anything below here. */ + + struct rcu_head rcu; + struct completion complete; +}; + +int request_nmi(struct nmi_handler *handler); + +/* Release will block until the handler is completely free. */ +void release_nmi(struct nmi_handler *handler); #endif /* _ASM_IRQ_H */ ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 13:28 ` [PATCH] NMI request/release, version 5 - I think this one's ready Corey Minyard @ 2002-10-24 14:46 ` John Levon 2002-10-24 15:36 ` Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: John Levon @ 2002-10-24 14:46 UTC (permalink / raw) To: Corey Minyard; +Cc: dipankar, linux-kernel On Thu, Oct 24, 2002 at 08:28:20AM -0500, Corey Minyard wrote: > diff -ur linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c > --- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002 > +++ linux/arch/i386/kernel/traps.c Thu Oct 24 08:11:14 2002 At this point I'd quite like to see : mv nmi.c nmi_watchdog.c and put all this stuff in always-compiled nmi.c. traps.c is getting bloated. > static void default_do_nmi(struct pt_regs * regs) > { > unsigned char reason = inb(0x61); > > if (!(reason & 0xc0)) { > -#if CONFIG_X86_LOCAL_APIC > /* > - * Ok, so this is none of the documented NMI sources, > - * so it must be the NMI watchdog. > + * Check the handler list to see if anyone can handle this > + * nmi. > */ > - if (nmi_watchdog) { > - nmi_watchdog_tick(regs); > + if (call_nmi_handlers(regs)) Now you're using RCU, it's a real pity that we have the inb() first - if it wasn't for that, there would be no reason at all to have the "fast path" setting code too (the latter code is ugly, which is one reason I want to ditch it). How about adding default_do_nmi as the minimal-priority handler, then add the watchdog with higher priority above that ? Then oprofile can add itself on top of those both and return NOTIFY_OK to indicate it should break out of the loop. As a bonus, you lose the inb() for the watchdog too. > +++ linux/include/asm-i386/irq.h Wed Oct 23 16:47:24 2002 I thought you agreed the stuff should be in asm/nmi.h ? regards john -- "This is playing, not work, therefore it's not a waste of time." - Zath ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 14:46 ` John Levon @ 2002-10-24 15:36 ` Corey Minyard 2002-10-24 17:18 ` John Levon 0 siblings, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-24 15:36 UTC (permalink / raw) To: John Levon; +Cc: dipankar, linux-kernel John Levon wrote: >On Thu, Oct 24, 2002 at 08:28:20AM -0500, Corey Minyard wrote: > > > >>diff -ur linux.orig/arch/i386/kernel/traps.c linux/arch/i386/kernel/traps.c >>--- linux.orig/arch/i386/kernel/traps.c Mon Oct 21 13:25:45 2002 >>+++ linux/arch/i386/kernel/traps.c Thu Oct 24 08:11:14 2002 >> >> > >At this point I'd quite like to see : > > mv nmi.c nmi_watchdog.c > >and put all this stuff in always-compiled nmi.c. traps.c is getting >bloated. > I agree. > > > >> static void default_do_nmi(struct pt_regs * regs) >> { >> unsigned char reason = inb(0x61); >> >> if (!(reason & 0xc0)) { >>-#if CONFIG_X86_LOCAL_APIC >> /* >>- * Ok, so this is none of the documented NMI sources, >>- * so it must be the NMI watchdog. >>+ * Check the handler list to see if anyone can handle this >>+ * nmi. >> */ >>- if (nmi_watchdog) { >>- nmi_watchdog_tick(regs); >>+ if (call_nmi_handlers(regs)) >> >> > >Now you're using RCU, it's a real pity that we have the inb() first - >if it wasn't for that, there would be no reason at all to have the "fast >path" setting code too (the latter code is ugly, which is one reason I >want to ditch it). > >How about adding default_do_nmi as the minimal-priority handler, then >add the watchdog with higher priority above that ? Then oprofile can add >itself on top of those both and return NOTIFY_OK to indicate it should >break out of the loop. As a bonus, you lose the inb() for the watchdog >too. > Is there any way to detect if the nmi watchdog actually caused the timeout? I don't understand the hardware well enough to do it without some work, but it would be a VERY good idea to make sure the nmi watchdog actully caused the operation. Plus, can't you get more than one cause of an NMI? Shouldn't you check them all? >>+++ linux/include/asm-i386/irq.h Wed Oct 23 16:47:24 2002 >> >> > >I thought you agreed the stuff should be in asm/nmi.h ? > I will (I had forgotten), and I will move nmi.h to nmi_watchdog.h. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 15:36 ` Corey Minyard @ 2002-10-24 17:18 ` John Levon 2002-10-24 17:43 ` Corey Minyard 2002-10-24 20:03 ` Corey Minyard 0 siblings, 2 replies; 43+ messages in thread From: John Levon @ 2002-10-24 17:18 UTC (permalink / raw) To: Corey Minyard; +Cc: dipankar, linux-kernel On Thu, Oct 24, 2002 at 10:36:22AM -0500, Corey Minyard wrote: > Is there any way to detect if the nmi watchdog actually caused the > timeout? I don't understand the hardware well enough to do it without You can check if the counter used overflowed : #define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) #define CTRL_READ(l,h,msrs,c) do {rdmsr(MSR_P6_PERFCTR0, (l), (h));} while (0) CTR_READ(low, high, msrs, i); if (CTR_OVERFLOWED(low)) { ... found like oprofile does. I've accidentally deleted your patch, but weren't you unconditionally returning "break out of loop" from the watchdog ? I'm not very clear on the difference between NOTIFY_DONE and NOTIFY_OK anyway... > Plus, can't you get more than one cause of an NMI? Shouldn't you check > them all? Shouldn't the NMI stay asserted ? At least with perfctr, two counters causes two interrupts (actually there's a bug in mainline oprofile on that that I'll fix when Linus is back) regards john -- "This is playing, not work, therefore it's not a waste of time." - Zath ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 17:18 ` John Levon @ 2002-10-24 17:43 ` Corey Minyard 2002-10-24 18:04 ` John Levon 2002-10-24 20:03 ` Corey Minyard 1 sibling, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-24 17:43 UTC (permalink / raw) To: John Levon; +Cc: dipankar, linux-kernel John Levon wrote: >On Thu, Oct 24, 2002 at 10:36:22AM -0500, Corey Minyard wrote: > > > >>Is there any way to detect if the nmi watchdog actually caused the >>timeout? I don't understand the hardware well enough to do it without >> >> > >You can check if the counter used overflowed : > >#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) >#define CTRL_READ(l,h,msrs,c) do {rdmsr(MSR_P6_PERFCTR0, (l), (h));} while (0) > > CTR_READ(low, high, msrs, i); > if (CTR_OVERFLOWED(low)) { > ... found > >like oprofile does. > Ok, thanks, I'll add that to the nmi_watchdog code. > >I've accidentally deleted your patch, but weren't you unconditionally >returning "break out of loop" from the watchdog ? I'm not very clear on >the difference between NOTIFY_DONE and NOTIFY_OK anyway... > The comments on these are: #define NOTIFY_DONE 0x0000 /* Don't care */ #define NOTIFY_OK 0x0001 /* Suits me */ #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ I'mt taking these to mean that NOTIFY_DONE means you didn't handle it, NOTIFY_OK means you did handle it, and you "or" on NOTIFY_STOP_MASK if you want it to stop. I'm thinking that stopping is probably a bad idea, if the NMI is really edge triggered. > > > >>Plus, can't you get more than one cause of an NMI? Shouldn't you check >>them all? >> >> > >Shouldn't the NMI stay asserted ? At least with perfctr, two counters >causes two interrupts (actually there's a bug in mainline oprofile on >that that I'll fix when Linus is back) > There's a comment in do_nmi() that says that the NMI is edge triggered. If it is, then you have to call everything. I'd really like a manual on how the timer chip works, I'll see if I can hunt one down. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 17:43 ` Corey Minyard @ 2002-10-24 18:04 ` John Levon 2002-10-24 18:32 ` Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: John Levon @ 2002-10-24 18:04 UTC (permalink / raw) To: Corey Minyard; +Cc: dipankar, linux-kernel On Thu, Oct 24, 2002 at 12:43:50PM -0500, Corey Minyard wrote: > #define NOTIFY_DONE 0x0000 /* Don't care */ > #define NOTIFY_OK 0x0001 /* Suits me */ > #define NOTIFY_STOP_MASK 0x8000 /* Don't call further */ > > I'mt taking these to mean that NOTIFY_DONE means you didn't handle it, > NOTIFY_OK means you did handle it, and you "or" on NOTIFY_STOP_MASK if > you want it to stop. I'm thinking that stopping is probably a bad idea, > if the NMI is really edge triggered. > There's a comment in do_nmi() that says that the NMI is edge triggered. So there is. Darn. You could special case default_do_nmi, only printing out "Unknown NMI" iff the reason inb() check fails, /and/ no previous handlers set handled. Theoretically we have to take the cost of the inb() every time otherwise we can lose one of the NMISC-based things in default_do_nmi ... I can always see if this makes a noticable practical difference for oprofile under high interrupt load (I also need to be able to remove the NMI watchdog handler, but that's an oprofile-specific problem). Perhaps things will end being best to leave the current fast-path thing, but we should make sure that we've explored the possibilities of removing it first ;) thanks john -- "This is playing, not work, therefore it's not a waste of time." - Zath ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 18:04 ` John Levon @ 2002-10-24 18:32 ` Corey Minyard 2002-10-24 18:47 ` John Levon 0 siblings, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-24 18:32 UTC (permalink / raw) To: John Levon; +Cc: dipankar, linux-kernel John Levon wrote: >On Thu, Oct 24, 2002 at 12:43:50PM -0500, Corey Minyard wrote: > > >>There's a comment in do_nmi() that says that the NMI is edge triggered. >> >> > >So there is. Darn. You could special case default_do_nmi, only printing >out "Unknown NMI" iff the reason inb() check fails, /and/ no previous >handlers set handled. Theoretically we have to take the cost of the >inb() every time otherwise we can lose one of the NMISC-based things in >default_do_nmi ... I can always see if this makes a noticable practical >difference for oprofile under high interrupt load > >(I also need to be able to remove the NMI watchdog handler, but that's >an oprofile-specific problem). > >Perhaps things will end being best to leave the current fast-path thing, >but we should make sure that we've explored the possibilities of >removing it first ;) > This also means that the current code is actually wrong, since the current code just returns at the first thing it finds. I'll work on redoing this all in one list. Do you know if nmi_shutdown in oprofile/nmi_int.c can be called from interrupt context? Because nmi_release() can block, so I couldn't use it as-is if it ran in interrupt context. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 18:32 ` Corey Minyard @ 2002-10-24 18:47 ` John Levon 0 siblings, 0 replies; 43+ messages in thread From: John Levon @ 2002-10-24 18:47 UTC (permalink / raw) To: linux-kernel; +Cc: cminyard On Thu, Oct 24, 2002 at 01:32:40PM -0500, Corey Minyard wrote: > Do you know if nmi_shutdown in oprofile/nmi_int.c can be called from It can only be called in process context, from the event buffer release() method (and open(), on the failure path) regards john -- "This is playing, not work, therefore it's not a waste of time." - Zath ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 17:18 ` John Levon 2002-10-24 17:43 ` Corey Minyard @ 2002-10-24 20:03 ` Corey Minyard 2002-10-24 20:29 ` John Levon 1 sibling, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-24 20:03 UTC (permalink / raw) To: John Levon; +Cc: dipankar, linux-kernel John Levon wrote: >On Thu, Oct 24, 2002 at 10:36:22AM -0500, Corey Minyard wrote: > > > >>Is there any way to detect if the nmi watchdog actually caused the >>timeout? I don't understand the hardware well enough to do it without >> >> > >You can check if the counter used overflowed : > >#define CTR_OVERFLOWED(n) (!((n) & (1U<<31))) >#define CTRL_READ(l,h,msrs,c) do {rdmsr(MSR_P6_PERFCTR0, (l), (h));} while (0) > > CTR_READ(low, high, msrs, i); > if (CTR_OVERFLOWED(low)) { > ... found > Any way to do this for the IO_APIC case? -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 5 - I think this one's ready 2002-10-24 20:03 ` Corey Minyard @ 2002-10-24 20:29 ` John Levon 2002-10-25 1:22 ` [PATCH] NMI request/release, version 6 - "Well I thought the last one was ready" Corey Minyard 0 siblings, 1 reply; 43+ messages in thread From: John Levon @ 2002-10-24 20:29 UTC (permalink / raw) To: Corey Minyard; +Cc: dipankar, linux-kernel On Thu, Oct 24, 2002 at 03:03:31PM -0500, Corey Minyard wrote: > > if (CTR_OVERFLOWED(low)) { > > ... found > > > Any way to do this for the IO_APIC case? Ugh, forgot about that. I have no idea, sorry regards john -- "This is playing, not work, therefore it's not a waste of time." - Zath ^ permalink raw reply [flat|nested] 43+ messages in thread
* [PATCH] NMI request/release, version 6 - "Well I thought the last one was ready" 2002-10-24 20:29 ` John Levon @ 2002-10-25 1:22 ` Corey Minyard 2002-10-25 1:39 ` John Levon 0 siblings, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-25 1:22 UTC (permalink / raw) To: John Levon; +Cc: dipankar, linux-kernel Ok, I have reworked all the NMI code, moved it to it's own file, and converted all the NMI users to use the NMI request/release code. The current code will call call the NMI handlers on an NMI, interested parties may detect that their NMI occurred and handle it. Since the NMI into the CPU is edge-triggered, I think that's the correct way to handle it (although it is slower). If someone figures out another way, that would be very helpful. The include/linux/nmi.h and arch/i386/kernel/nmi.c files were renamed to nmi_watchdog.h and nmi_watchdog.c. The biggest hole (that I know of) in these changes is that there is no way that I can tell to detect if an nmi_watchdog has occurred if the NMI source is the I/O APIC. That code will assume that if no one else has handled the NMI, it was the source, which is a bad assumption (but the same assumption that was being made before my changes, so it's no worse). Most things should be using the local APIC, anyway. It's now too big to include in an email, so it's at http://home.attbi.com/~minyard/linux-nmi-v6.diff. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 6 - "Well I thought the last one was ready" 2002-10-25 1:22 ` [PATCH] NMI request/release, version 6 - "Well I thought the last one was ready" Corey Minyard @ 2002-10-25 1:39 ` John Levon 2002-10-25 1:58 ` Jeff Garzik 2002-10-25 2:01 ` [PATCH] NMI request/release, version 7 - minor cleanups Corey Minyard 0 siblings, 2 replies; 43+ messages in thread From: John Levon @ 2002-10-25 1:39 UTC (permalink / raw) To: Corey Minyard; +Cc: dipankar, linux-kernel On Thu, Oct 24, 2002 at 08:22:33PM -0500, Corey Minyard wrote: > http://home.attbi.com/~minyard/linux-nmi-v6.diff. case NOTIFY_DONE: default: } This needs to be : case NOTIFY_DONE: default:; } or later gcc's whine. + * handler, if you have a fast-path handler there's another tie-in + * for you. See the oprofile code. You forgot to remove this comment. if (!list_empty(&(handler->link))) return EBUSY; This wants to be -EBUSY (it eventually gets returned to userspace via oprofile, and is more common anyway). static int nmi_std (void * dev_id, struct pt_regs * regs, int cpu, int handled) I don't think this matches the usual kernel style. I think the rest looks OK, I'll find some time to play with oprofile side of this. thanks john -- "This is playing, not work, therefore it's not a waste of time." - Zath ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release, version 6 - "Well I thought the last one was ready" 2002-10-25 1:39 ` John Levon @ 2002-10-25 1:58 ` Jeff Garzik 2002-10-25 2:01 ` [PATCH] NMI request/release, version 7 - minor cleanups Corey Minyard 1 sibling, 0 replies; 43+ messages in thread From: Jeff Garzik @ 2002-10-25 1:58 UTC (permalink / raw) To: John Levon; +Cc: Corey Minyard, dipankar, linux-kernel John Levon wrote: >On Thu, Oct 24, 2002 at 08:22:33PM -0500, Corey Minyard wrote: > > > >>http://home.attbi.com/~minyard/linux-nmi-v6.diff. >> >> > > case NOTIFY_DONE: > default: > } > >This needs to be : > > case NOTIFY_DONE: > default:; > } > >or later gcc's whine. > or the even-more-readable: default: /* do nothing */ break; ^ permalink raw reply [flat|nested] 43+ messages in thread
* [PATCH] NMI request/release, version 7 - minor cleanups 2002-10-25 1:39 ` John Levon 2002-10-25 1:58 ` Jeff Garzik @ 2002-10-25 2:01 ` Corey Minyard 2002-10-25 13:26 ` [PATCH] NMI request/release, version 8 Corey Minyard 1 sibling, 1 reply; 43+ messages in thread From: Corey Minyard @ 2002-10-25 2:01 UTC (permalink / raw) To: John Levon; +Cc: dipankar, linux-kernel This fixes all the problems John Levon pointed out. It's at http://home.attbi.com/~minyard/linux-nmi-v7.diff Thanks for everyone's help. -Corey ^ permalink raw reply [flat|nested] 43+ messages in thread
* [PATCH] NMI request/release, version 8 2002-10-25 2:01 ` [PATCH] NMI request/release, version 7 - minor cleanups Corey Minyard @ 2002-10-25 13:26 ` Corey Minyard 0 siblings, 0 replies; 43+ messages in thread From: Corey Minyard @ 2002-10-25 13:26 UTC (permalink / raw) To: linux-kernel; +Cc: John Levon, dipankar [-- Attachment #1: Type: text/plain, Size: 285 bytes --] I realized that the piece of code at the end of the old NMI handler was still necessary. I have attached a patch to the previous version to fix the problem. The full version of the patch with this fix is on my web page at http://home.attbi.com/~minyard/linux-nmi-v8.diff. -Corey [-- Attachment #2: linux-nmi-v7-v8.diff --] [-- Type: text/plain, Size: 1701 bytes --] --- linux.v8/arch/i386/kernel/nmi.c Thu Oct 24 20:53:04 2002 +++ linux/arch/i386/kernel/nmi.c Fri Oct 25 08:21:22 2002 @@ -208,30 +208,29 @@ if (!handled) unknown_nmi_error(regs, cpu); -#if 0 - /* - * It MAY be possible to only call handlers until one returns - * that it handled the NMI, if, we do the following. Assuming - * that all the incoming signals causing NMIs are - * level-triggered, this code will cause another NMI - * immediately if the incoming signal is still asserted. I - * don't know if the assumption is correct or if it's better - * to call all the handlers or do the I/O. - * - * I'm pretty sure that this won't work with the performance - * registers NMI output, so I'm guessing that this won't work. - */ else { /* * Reassert NMI in case it became active meanwhile - * as it's edge-triggered. + * as it's edge-triggered. Don't do this if the NMI + * wasn't handled to avoid an infinite NMI loop. + * + * This is necessary in case we have another external + * NMI while processing this one. The external NMIs + * are level-generated, into the processor NMIs are + * edge-triggered, so if you have one NMI source + * come in while another is already there, the level + * will never go down to cause another edge, and + * no more NMIs will happen. This does NOT apply + * to internally generated NMIs, though, so you + * can't use the same trick to only call one handler + * at a time. Otherwise, if two internal NMIs came + * in at the same time you might miss one. */ outb(0x8f, 0x70); inb(0x71); /* dummy */ outb(0x0f, 0x70); inb(0x71); /* dummy */ } -#endif } void __init init_nmi(void) ^ permalink raw reply [flat|nested] 43+ messages in thread
* Re: [PATCH] NMI request/release 2002-10-22 2:10 ` John Levon 2002-10-22 2:32 ` Corey Minyard @ 2002-10-22 12:23 ` Suparna Bhattacharya 1 sibling, 0 replies; 43+ messages in thread From: Suparna Bhattacharya @ 2002-10-22 12:23 UTC (permalink / raw) On Tue, 22 Oct 2002 07:46:49 +0530, John Levon wrote: > On Mon, Oct 21, 2002 at 08:32:47PM -0500, Corey Minyard wrote: > >> The attached patch implements a way to request to receive an NMI if it >> comes from an otherwise unknown source. I needed this for handling >> NMIs with the IPMI watchdog. This function was discussed a little a >> while > > Then NMI watchdog and oprofile should be changed to use this too. Perhaps even LKCD can make use of such a framework if it works out. We > also need priority and/or equivalent of NOTIFY_STOP_MASK so we can break > out of calling all the handlers. Actually, why do you continue if one of > the handlers returns 1 anyway ? > >> + atomic_inc(&calling_nmi_handlers); > > Isn't this going to cause cacheline ping pong ? > >> + curr = nmi_handler_list; >> + while (curr) { >> + handled |= curr->handler(curr->dev_id, regs); > > dev_name is never used at all. What is it for ? Also, would be nice to > do an smp_processor_id() just once and pass that in to prevent multiple > calls to get_current(). > > Couldn't you modify the notifier code to do the xchg()s (though that's > not available on all CPU types ...) > >> +#define HAVE_NMI_HANDLER 1 > > What uses this ? > >> + volatile struct nmi_handler *next; > > Hmm ... > > Is it not possible to use linux/rcupdate.h for this stuff ? > > regards > john ^ permalink raw reply [flat|nested] 43+ messages in thread
end of thread, other threads:[~2002-10-25 13:19 UTC | newest] Thread overview: 43+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2002-10-22 1:32 [PATCH] NMI request/release Corey Minyard 2002-10-22 2:10 ` John Levon 2002-10-22 2:32 ` Corey Minyard 2002-10-22 2:53 ` John Levon 2002-10-22 13:02 ` Corey Minyard 2002-10-22 15:09 ` John Levon 2002-10-22 16:03 ` Corey Minyard 2002-10-22 17:23 ` Robert Love 2002-10-22 18:08 ` Corey Minyard 2002-10-22 18:16 ` Robert Love 2002-10-22 20:04 ` Dipankar Sarma 2002-10-22 17:53 ` Dipankar Sarma 2002-10-22 18:05 ` Corey Minyard 2002-10-22 18:08 ` Dipankar Sarma 2002-10-22 18:29 ` Corey Minyard 2002-10-22 19:08 ` John Levon 2002-10-22 21:36 ` [PATCH] NMI request/release, version 3 Corey Minyard 2002-10-23 17:33 ` Dipankar Sarma 2002-10-23 18:03 ` Corey Minyard 2002-10-23 18:57 ` Dipankar Sarma 2002-10-23 20:14 ` [PATCH] NMI request/release, version 4 Corey Minyard 2002-10-23 20:50 ` Dipankar Sarma 2002-10-23 21:53 ` Corey Minyard 2002-10-24 7:41 ` Dipankar Sarma 2002-10-24 13:08 ` Corey Minyard 2002-10-24 7:50 ` Dipankar Sarma 2002-10-24 13:05 ` Corey Minyard 2002-10-24 13:28 ` [PATCH] NMI request/release, version 5 - I think this one's ready Corey Minyard 2002-10-24 14:46 ` John Levon 2002-10-24 15:36 ` Corey Minyard 2002-10-24 17:18 ` John Levon 2002-10-24 17:43 ` Corey Minyard 2002-10-24 18:04 ` John Levon 2002-10-24 18:32 ` Corey Minyard 2002-10-24 18:47 ` John Levon 2002-10-24 20:03 ` Corey Minyard 2002-10-24 20:29 ` John Levon 2002-10-25 1:22 ` [PATCH] NMI request/release, version 6 - "Well I thought the last one was ready" Corey Minyard 2002-10-25 1:39 ` John Levon 2002-10-25 1:58 ` Jeff Garzik 2002-10-25 2:01 ` [PATCH] NMI request/release, version 7 - minor cleanups Corey Minyard 2002-10-25 13:26 ` [PATCH] NMI request/release, version 8 Corey Minyard 2002-10-22 12:23 ` [PATCH] NMI request/release Suparna Bhattacharya
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).