[PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
@ 2010-03-24 19:02 Andi Kleen
  2010-03-24 20:42 ` Thomas Gleixner
  0 siblings, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2010-03-24 19:02 UTC (permalink / raw)
  To: tglx, x86, linux-kernel, jesse.brandeburg

Prevent nested interrupts when the IRQ stack is near overflowing v2

Interrupts can always nest when they don't run with IRQF_DISABLED.

When a lot of interrupts hit the same vector on the same
CPU nested interrupts can overflow the irq stack and cause hangs.

This has been observed with MSI-X & Ethernet on a large system.

This patch automatically forces IRQF_DISABLED when
the interrupt stack runs low. I implemented it using
a "callback" (really just a weak call) from the generic IRQ code 
to the architecture code because passing this state down the 
normal call chain would have required changing too much code. 

The irq checks are currently implemented for x86-(32,64) only,
but other architectures could (and probably should) do the same.

Currently the thresholds are 2K each. This is a fairly
arbitary number. On 4K stack i386 it's about half 
the irq stack, on the other configurations it's 1/4-1/16.

This also fixes another minor bug on 32bit: don't dump a backtrace when the 
irq stack runs low.

Based on discussions with Suresh B. Siddha and others.
Originally reported by Jesse Brandeburg.

v2: Use more common code on 32bit. Don't dump stack
    on low irq stack.

Tested-by: emil.s.tantilov@intel.com
Cc: jesse.brandeburg@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Suresh B. Siddha <suresh.b.siddha@intel.com>

---
 arch/x86/kernel/irq_32.c |   43 ++++++++++++++++++++++++++++++++++---------
 arch/x86/kernel/irq_64.c |   18 ++++++++++++++++--
 include/linux/irq.h      |    2 ++
 kernel/irq/handle.c      |   16 +++++++++++++++-
 4 files changed, 67 insertions(+), 12 deletions(-)

Index: linux-2.6.34-rc1-ak/include/linux/irq.h
===================================================================
--- linux-2.6.34-rc1-ak.orig/include/linux/irq.h	2010-03-14 03:58:12.000000000 +0100
+++ linux-2.6.34-rc1-ak/include/linux/irq.h	2010-03-24 19:11:23.000000000 +0100
@@ -520,4 +520,6 @@
 }
 #endif	/* CONFIG_SMP */
 
+extern int irq_stack_near_overflow(void);
+
 #endif /* _LINUX_IRQ_H */
Index: linux-2.6.34-rc1-ak/kernel/irq/handle.c
===================================================================
--- linux-2.6.34-rc1-ak.orig/kernel/irq/handle.c	2010-03-14 03:58:12.000000000 +0100
+++ linux-2.6.34-rc1-ak/kernel/irq/handle.c	2010-03-24 19:11:23.000000000 +0100
@@ -358,6 +358,15 @@
 	       "but no thread function available.", irq, action->name);
 }
 
+/*
+ * Is the interrupt stack near overflowing?
+ * Can/should be overriden by architectures
+ */
+int __weak irq_stack_near_overflow(void)
+{
+	return 0;
+}
+
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
@@ -370,7 +379,12 @@
 	irqreturn_t ret, retval = IRQ_NONE;
 	unsigned int status = 0;
 
-	if (!(action->flags & IRQF_DISABLED))
+	/*
+	 * When the IRQ stack is near overflowing don't allow nested
+	 * interrupts.
+	 */
+
+	if (!(action->flags & IRQF_DISABLED) && !irq_stack_near_overflow())
 		local_irq_enable_in_hardirq();
 
 	do {
Index: linux-2.6.34-rc1-ak/arch/x86/kernel/irq_64.c
===================================================================
--- linux-2.6.34-rc1-ak.orig/arch/x86/kernel/irq_64.c	2010-03-03 02:01:27.000000000 +0100
+++ linux-2.6.34-rc1-ak/arch/x86/kernel/irq_64.c	2010-03-24 19:11:23.000000000 +0100
@@ -16,6 +16,7 @@
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
 #include <linux/smp.h>
+#include <linux/irq.h>
 #include <asm/io_apic.h>
 #include <asm/idle.h>
 #include <asm/apic.h>
@@ -26,6 +27,19 @@
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
+#define IRQ_STACK_THRESH 2048
+
+/*
+ * Stack overflow checking for the interrupt stacks.
+ * Called by the generic IRQ handler.
+ */
+int irq_stack_near_overflow(void)
+{
+	char *stack;
+	asm("mov %%rsp,%0" : "=r" (stack));
+	return stack <= __get_cpu_var(irq_stack_ptr) - IRQ_STACK_SIZE + IRQ_STACK_THRESH;
+}
+
 /*
  * Probabilistic stack overflow check:
  *
@@ -33,7 +47,7 @@
  * runs on the big interrupt stacks. Checking reliably is too expensive,
  * so we just check from interrupts.
  */
-static inline void stack_overflow_check(struct pt_regs *regs)
+static inline void process_stack_overflow_check(struct pt_regs *regs)
 {
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	u64 curbase = (u64)task_stack_page(current);
@@ -52,7 +66,7 @@
 {
 	struct irq_desc *desc;
 
-	stack_overflow_check(regs);
+	process_stack_overflow_check(regs);
 
 	desc = irq_to_desc(irq);
 	if (unlikely(!desc))
Index: linux-2.6.34-rc1-ak/arch/x86/kernel/irq_32.c
===================================================================
--- linux-2.6.34-rc1-ak.orig/arch/x86/kernel/irq_32.c	2010-03-03 02:01:27.000000000 +0100
+++ linux-2.6.34-rc1-ak/arch/x86/kernel/irq_32.c	2010-03-24 19:24:19.000000000 +0100
@@ -17,6 +17,7 @@
 #include <linux/delay.h>
 #include <linux/uaccess.h>
 #include <linux/percpu.h>
+#include <linux/irq.h>
 
 #include <asm/apic.h>
 
@@ -26,16 +27,29 @@
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-/* Debugging check for stack overflow: is there less than 1KB free? */
-static int check_stack_overflow(void)
+
+static inline int check_stack(int threshold)
 {
 	long sp;
 
 	__asm__ __volatile__("andl %%esp,%0" :
 			     "=r" (sp) : "0" (THREAD_SIZE - 1));
 
-	return sp < (sizeof(struct thread_info) + STACK_WARN);
+	return sp < (sizeof(struct thread_info) + threshold);
+}
+
+#define IRQ_STACK_THRESH 2048
+
+int irq_stack_near_overflow(void)
+{
+	return check_stack(IRQ_STACK_THRESH);
+}
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+/* Debugging check for stack overflow: is there less than 2KB free? */
+static inline int check_stack_overflow(void)
+{
+	return check_stack(IRQ_STACK_THRESH);
 }
 
 static void print_stack_overflow(void)
@@ -189,7 +203,12 @@
 
 #else
 static inline int
-execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
+execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
+{
+	if (unlikely(overflow))
+		print_stack_overflow();
+	return 0;
+}
 #endif
 
 bool handle_irq(unsigned irq, struct pt_regs *regs)
@@ -197,17 +216,23 @@
 	struct irq_desc *desc;
 	int overflow;
 
+	/*
+	 * The result of this gets ignored when
+	 * the interrupt is already nested on a irq stack.
+	 * That means no backtrace printed -- that is
+	 * needed because nested interrupts can always happen.
+	 * However the generic IRQ code will check again
+	 * and prevent further nesting if the stack is near
+	 * overflow.
+	 */
 	overflow = check_stack_overflow();
 
 	desc = irq_to_desc(irq);
 	if (unlikely(!desc))
 		return false;
 
-	if (!execute_on_irq_stack(overflow, desc, irq)) {
-		if (unlikely(overflow))
-			print_stack_overflow();
+	if (!execute_on_irq_stack(overflow, desc, irq))
 		desc->handle_irq(irq, desc);
-	}
 
 	return true;
 }

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-24 19:02 [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2 Andi Kleen
@ 2010-03-24 20:42 ` Thomas Gleixner
  2010-03-24 23:08   ` Thomas Gleixner
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Gleixner @ 2010-03-24 20:42 UTC (permalink / raw)
  To: Andi Kleen; +Cc: x86, linux-kernel, jesse.brandeburg

B1;2005;0cOn Wed, 24 Mar 2010, Andi Kleen wrote:

> Prevent nested interrupts when the IRQ stack is near overflowing v2
> 
> Interrupts can always nest when they don't run with IRQF_DISABLED.
> 
> When a lot of interrupts hit the same vector on the same
> CPU nested interrupts can overflow the irq stack and cause hangs.
> 
> This has been observed with MSI-X & Ethernet on a large system.

In which way are which interrupts nested ?
 
Thanks,

	tglx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-24 20:42 ` Thomas Gleixner
@ 2010-03-24 23:08   ` Thomas Gleixner
  2010-03-25  0:36     ` Andi Kleen
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Gleixner @ 2010-03-24 23:08 UTC (permalink / raw)
  To: Andi Kleen; +Cc: x86, linux-kernel, jesse.brandeburg

On Wed, 24 Mar 2010, Thomas Gleixner wrote:

> On Wed, 24 Mar 2010, Andi Kleen wrote:
> 
> > Prevent nested interrupts when the IRQ stack is near overflowing v2
> > 
> > Interrupts can always nest when they don't run with IRQF_DISABLED.
> > 
> > When a lot of interrupts hit the same vector on the same
> > CPU nested interrupts can overflow the irq stack and cause hangs.

That's utter nonsense. An interrupt storm on the same vector does not
cause irq nesting. The irq code prevents reentering a handler and in
case of MSI-X it just disables the IRQ when it comes again while the
first irq on that vector is still in progress. So the maximum nesting
is two up to handle_edge_irq() where it disables the IRQ and returns
right away.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-24 23:08   ` Thomas Gleixner
@ 2010-03-25  0:36     ` Andi Kleen
  2010-03-25  1:46       ` Thomas Gleixner
  0 siblings, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2010-03-25  0:36 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, linux-kernel, jesse.brandeburg

On Thu, Mar 25, 2010 at 12:08:23AM +0100, Thomas Gleixner wrote:
> On Wed, 24 Mar 2010, Thomas Gleixner wrote:
> 
> > On Wed, 24 Mar 2010, Andi Kleen wrote:
> > 
> > > Prevent nested interrupts when the IRQ stack is near overflowing v2
> > > 
> > > Interrupts can always nest when they don't run with IRQF_DISABLED.
> > > 
> > > When a lot of interrupts hit the same vector on the same
> > > CPU nested interrupts can overflow the irq stack and cause hangs.
> That's utter nonsense. An interrupt storm on the same vector does not
> cause irq nesting. The irq code prevents reentering a handler and in

Sorry it's the same CPU, not the same vector.  Yes the reference
to same vector was misleading.

"
Multiple vectors on a multi port NIC pointing to the same CPU, 
all hitting the irq stack until it overflows.
"

> case of MSI-X it just disables the IRQ when it comes again while the
> first irq on that vector is still in progress. So the maximum nesting
> is two up to handle_edge_irq() where it disables the IRQ and returns
> right away.

Real maximum nesting is all IRQs running with interrupts on pointing
to the same CPU. Enough from multiple busy IRQ sources and you go boom.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25  0:36     ` Andi Kleen
@ 2010-03-25  1:46       ` Thomas Gleixner
  2010-03-25  9:37         ` Andi Kleen
  2010-03-25 10:50         ` Alan Cox
  0 siblings, 2 replies; 39+ messages in thread
From: Thomas Gleixner @ 2010-03-25  1:46 UTC (permalink / raw)
  To: Andi Kleen; +Cc: x86, LKML, jesse.brandeburg, Linus Torvalds

On Thu, 25 Mar 2010, Andi Kleen wrote:

> On Thu, Mar 25, 2010 at 12:08:23AM +0100, Thomas Gleixner wrote:
> > On Wed, 24 Mar 2010, Thomas Gleixner wrote:
> > 
> > > On Wed, 24 Mar 2010, Andi Kleen wrote:
> > > 
> > > > Prevent nested interrupts when the IRQ stack is near overflowing v2
> > > > 
> > > > Interrupts can always nest when they don't run with IRQF_DISABLED.
> > > > 
> > > > When a lot of interrupts hit the same vector on the same
> > > > CPU nested interrupts can overflow the irq stack and cause hangs.
> > That's utter nonsense. An interrupt storm on the same vector does not
> > cause irq nesting. The irq code prevents reentering a handler and in
> 
> Sorry it's the same CPU, not the same vector.  Yes the reference
> to same vector was misleading.

"misleading" is an euphemism at best ... 

This is ever repeating shit: your changelogs suck big time!

> "
> Multiple vectors on a multi port NIC pointing to the same CPU, 
> all hitting the irq stack until it overflows.
> "

So there are several questions:

1) Why are those multiple vectors all hitting the same cpu at the same
   time ? How many of them are firing at the same time ?

2) What kind of scenario is that ? Massive traffic on the card or some
   corner case ?

3) Why does the NIC driver code not set IRQF_DISABLED in the first
   place?  AFAICT the network drivers just kick off NAPI, so whats the
   point to run those handlers with IRQs enabled at all ?

> > case of MSI-X it just disables the IRQ when it comes again while the
> > first irq on that vector is still in progress. So the maximum nesting
> > is two up to handle_edge_irq() where it disables the IRQ and returns
> > right away.
> 
> Real maximum nesting is all IRQs running with interrupts on pointing
> to the same CPU. Enough from multiple busy IRQ sources and you go boom.

Which leads to the general question why we have that IRQF_DISABLED
shite at all. AFAICT the historical reason were IDE drivers, but we
grew other abusers like USB, SCSI and other crap which runs hard irq
handlers for hundreds of micro seconds in the worst case. All those
offenders need to be fixed (e.g. by converting to threaded irq
handlers) so we can run _ALL_ hard irq context handlers with interrupts
disabled. lockdep will sort out the nasty ones which enable irqs in the
middle of that hard irq handler.

Your band aid patch is just disgusting. How do you ensure that none of
the handlers on which you enforce IRQ_DISABLED does not enable
interrupts itself ? You _CANNOT_.

I'm not taking that patch unless you come up with a real convincing
story.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25  1:46       ` Thomas Gleixner
@ 2010-03-25  9:37         ` Andi Kleen
  2010-03-25 11:09           ` Thomas Gleixner
  2010-03-25 10:50         ` Alan Cox
  1 sibling, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2010-03-25  9:37 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

On Thu, Mar 25, 2010 at 02:46:42AM +0100, Thomas Gleixner wrote:
> "misleading" is an euphemism at best ... 
> 
> This is ever repeating shit: your changelogs suck big time!

As much as your comments I guess.

> 
> > "
> > Multiple vectors on a multi port NIC pointing to the same CPU, 
> > all hitting the irq stack until it overflows.
> > "
> 
> So there are several questions:
> 
> 1) Why are those multiple vectors all hitting the same cpu at the same
>    time ? How many of them are firing at the same time ?

This was 4 NIC ports being operational under stress at the same time

> 
> 2) What kind of scenario is that ? Massive traffic on the card or some
>    corner case ?

Massive traffic on the card from multiple ports on a large system.

> 3) Why does the NIC driver code not set IRQF_DISABLED in the first
>    place?  AFAICT the network drivers just kick off NAPI, so whats the
>    point to run those handlers with IRQs enabled at all ?

I think the idea was to minimize irq latency for other interrupts

But yes defaulting to IRQF_DISABLED would fix it too, at some 
cost. In principle that could be done also.

> 
> > > case of MSI-X it just disables the IRQ when it comes again while the
> > > first irq on that vector is still in progress. So the maximum nesting
> > > is two up to handle_edge_irq() where it disables the IRQ and returns
> > > right away.
> > 
> > Real maximum nesting is all IRQs running with interrupts on pointing
> > to the same CPU. Enough from multiple busy IRQ sources and you go boom.
> 
> Which leads to the general question why we have that IRQF_DISABLED
> shite at all. AFAICT the historical reason were IDE drivers, but we

My understanding was that traditionally the irq handlers were
allowed to nest and the "fast" non nest case was only added for some 
fast handlers like serial with small FIFOs.

> grew other abusers like USB, SCSI and other crap which runs hard irq
> handlers for hundreds of micro seconds in the worst case. All those
> offenders need to be fixed (e.g. by converting to threaded irq
> handlers) so we can run _ALL_ hard irq context handlers with interrupts
> disabled. lockdep will sort out the nasty ones which enable irqs in the
> middle of that hard irq handler.

Ok glad to give you advertisement time for your pet project...

Anyways if such a thing was done it would be a long term project
and that short term fix would be still needed.

> the handlers on which you enforce IRQ_DISABLED does not enable
> interrupts itself ? You _CANNOT_.

I can't, just as much as I cannot enforce they won't crash or not loop
forever or something. But afaik they don't. 

I did some quick grepping and didn't found a driver who does that
at least.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25  1:46       ` Thomas Gleixner
  2010-03-25  9:37         ` Andi Kleen
@ 2010-03-25 10:50         ` Alan Cox
  2010-03-25 11:16           ` Thomas Gleixner
  2010-03-25 11:57           ` Andi Kleen
  1 sibling, 2 replies; 39+ messages in thread
From: Alan Cox @ 2010-03-25 10:50 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

> Which leads to the general question why we have that IRQF_DISABLED
> shite at all. AFAICT the historical reason were IDE drivers, but we
> grew other abusers like USB, SCSI and other crap which runs hard irq
> handlers for hundreds of micro seconds in the worst case.

Anyone you've forgotten to offend ?

We have IRQF_DISABLED and stuff using that model because for something
like ten years the Linux kernel had no real sane notion of handing stuff
off to worker threads or threaded irq support and also because there were
so many errata around IRQ masking as well as all the evil business
with interrupt delivery being asynchronous to the PCI or ISA transactions
on some CPUs (eg you could write the irq mask register on the device,
read it back to ensure it occurred and *still* get an IRQ delivered after
that point because it was on the APIC bus)

Dumping everyones code under an insult without any historical context
isn't helpful.

Pretty much the only 'core' driver today which enables IRQs in the irq
handlers and needs it is the old IDE layer. There are also a couple of
drivers which play games with disable/enable_irq in the IRQ paths for
other reasons (lack of irq threads when written and a hardware model thats
totally SMP unfriendly). 8390 is the obvious one here and it at least
would be far far saner using threaded IRQs and normal locking with IRQs
unmasked.

Alan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25  9:37         ` Andi Kleen
@ 2010-03-25 11:09           ` Thomas Gleixner
  2010-03-25 12:11             ` Andi Kleen
  2010-03-25 16:13             ` Linus Torvalds
  0 siblings, 2 replies; 39+ messages in thread
From: Thomas Gleixner @ 2010-03-25 11:09 UTC (permalink / raw)
  To: Andi Kleen; +Cc: x86, LKML, jesse.brandeburg, Linus Torvalds

On Thu, 25 Mar 2010, Andi Kleen wrote:
> On Thu, Mar 25, 2010 at 02:46:42AM +0100, Thomas Gleixner wrote:
> > 3) Why does the NIC driver code not set IRQF_DISABLED in the first
> >    place?  AFAICT the network drivers just kick off NAPI, so whats the
> >    point to run those handlers with IRQs enabled at all ?
> 
> I think the idea was to minimize irq latency for other interrupts

So what's the point ? Is the irq handler of that card so long running,
that it causes trouble ? If yes, then this needs to be fixed. If no,
then it simply can run with IRQs disabled.
 
> But yes defaulting to IRQF_DISABLED would fix it too, at some 
> cost. In principle that could be done also.

What's the cost? Nothing at all. There is no f*cking difference between:

 IRQ1 10us
 IRQ2 10us
 IRQ3 10us
 IRQ4 10us

and

 IRQ1 2us
  IRQ2 2us
   IRQ3 2us
    IRQ4 10us
   IRQ3 8us
  IRQ2 8us
 IRQ1 8us

The system is neither running a task nor a softirq for 40us in both
cases.

So what's the point of running a well written (short) interrupt
handler with interrupts enabled ? Nothing at all. It just makes us
deal with crap like stacks overflowing for no good reason.

> > 
> > > > case of MSI-X it just disables the IRQ when it comes again while the
> > > > first irq on that vector is still in progress. So the maximum nesting
> > > > is two up to handle_edge_irq() where it disables the IRQ and returns
> > > > right away.
> > > 
> > > Real maximum nesting is all IRQs running with interrupts on pointing
> > > to the same CPU. Enough from multiple busy IRQ sources and you go boom.
> > 
> > Which leads to the general question why we have that IRQF_DISABLED
> > shite at all. AFAICT the historical reason were IDE drivers, but we
> 
> My understanding was that traditionally the irq handlers were
> allowed to nest and the "fast" non nest case was only added for some 
> fast handlers like serial with small FIFOs.
> 
> > grew other abusers like USB, SCSI and other crap which runs hard irq
> > handlers for hundreds of micro seconds in the worst case. All those
> > offenders need to be fixed (e.g. by converting to threaded irq
> > handlers) so we can run _ALL_ hard irq context handlers with interrupts
> > disabled. lockdep will sort out the nasty ones which enable irqs in the
> > middle of that hard irq handler.
> 
> Ok glad to give you advertisement time for your pet project...

You just don't get it. Long running interrupt handlers are a
BUG. Period. If they are short they can run with IRQs disabled w/o any
harm to the system.

> Anyways if such a thing was done it would be a long term project
> and that short term fix would be still needed.

Your patch is not a fix, It's a lousy, horrible and unreliable
workaround. It's not fixing the root cause of the problem at hand.

The real fix is to run the NIC interrupt handlers with IRQs disabled
and be done with it. If you still think that introduces latencies then
prove it with numbers.

Thanks,

	tglx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 10:50         ` Alan Cox
@ 2010-03-25 11:16           ` Thomas Gleixner
  2010-03-25 11:59             ` Alan Cox
  2010-03-25 12:00             ` Andi Kleen
  2010-03-25 11:57           ` Andi Kleen
  1 sibling, 2 replies; 39+ messages in thread
From: Thomas Gleixner @ 2010-03-25 11:16 UTC (permalink / raw)
  To: Alan Cox; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

On Thu, 25 Mar 2010, Alan Cox wrote:

> > Which leads to the general question why we have that IRQF_DISABLED
> > shite at all. AFAICT the historical reason were IDE drivers, but we
> > grew other abusers like USB, SCSI and other crap which runs hard irq
> > handlers for hundreds of micro seconds in the worst case.
> 
> Anyone you've forgotten to offend ?

Hmm, not sure. Have not measured IRQ handler run times for quite a
while :)

> Pretty much the only 'core' driver today which enables IRQs in the irq
> handlers and needs it is the old IDE layer. There are also a couple of
> drivers which play games with disable/enable_irq in the IRQ paths for
> other reasons (lack of irq threads when written and a hardware model thats
> totally SMP unfriendly). 8390 is the obvious one here and it at least
> would be far far saner using threaded IRQs and normal locking with IRQs
> unmasked.

Right, but that's not the problem here. We talk about a (hopefully)
well written interrupt handler which runs for a very short
time. What's the point of running it with interrupts enabled ?
Nothing, we just run into stack overflow problems. So what's better:
an unreliable and ugly hackaround or just avoiding the possible stack
overflow in the first place ?

Thanks,

	tglx



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 10:50         ` Alan Cox
  2010-03-25 11:16           ` Thomas Gleixner
@ 2010-03-25 11:57           ` Andi Kleen
  1 sibling, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-03-25 11:57 UTC (permalink / raw)
  To: Alan Cox
  Cc: Thomas Gleixner, Andi Kleen, x86, LKML, jesse.brandeburg,
	Linus Torvalds

> Pretty much the only 'core' driver today which enables IRQs in the irq
> handlers and needs it is the old IDE layer. There are also a couple of

Thanks. 

I'm tempted to just ignore it in this case, but in theory it might
still have troubles if there are a lot of interrupts to the same CPU.

I've only had a report on a very large system with a very high interrupt
rate on a very fast NIC though, so presumably it's not too common.

Anyways this patch will fix the problem for all drivers that do 
not explicitely enable interrupts, which is the overwhelming majority.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 11:16           ` Thomas Gleixner
@ 2010-03-25 11:59             ` Alan Cox
  2010-03-25 12:00             ` Andi Kleen
  1 sibling, 0 replies; 39+ messages in thread
From: Alan Cox @ 2010-03-25 11:59 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

> time. What's the point of running it with interrupts enabled ?

The underlying question you posed was why don't we kill off the running
with irqs enabled cases. That requires more work but should definitely
happen.

> Nothing, we just run into stack overflow problems. So what's better:
> an unreliable and ugly hackaround or just avoiding the possible stack
> overflow in the first place ?

I think you are trying to have a debate when we are in agreement anyway.
I don't btw think the workaround is 'unreliable', ugly as sin yes but the
logic appears sound.

Alan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 11:16           ` Thomas Gleixner
  2010-03-25 11:59             ` Alan Cox
@ 2010-03-25 12:00             ` Andi Kleen
  1 sibling, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-03-25 12:00 UTC (permalink / raw)
  To: Thomas Gleixner
  Cc: Alan Cox, Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

On Thu, Mar 25, 2010 at 12:16:17PM +0100, Thomas Gleixner wrote:
> > Pretty much the only 'core' driver today which enables IRQs in the irq
> > handlers and needs it is the old IDE layer. There are also a couple of
> > drivers which play games with disable/enable_irq in the IRQ paths for
> > other reasons (lack of irq threads when written and a hardware model thats
> > totally SMP unfriendly). 8390 is the obvious one here and it at least
> > would be far far saner using threaded IRQs and normal locking with IRQs
> > unmasked.
> 
> Right, but that's not the problem here. We talk about a (hopefully)
> well written interrupt handler which runs for a very short
> time. 

The NIC handlers can do quite some work under high traffic.
Even with interrupt mitigation and NAPI.

> What's the point of running it with interrupts enabled ?

Other interrupts.

> Nothing, we just run into stack overflow problems. So what's better:
> an unreliable and ugly hackaround

I don't think that's a accurate description of the patch at all.
Besides I believe it's reliable in all cases that matter.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 11:09           ` Thomas Gleixner
@ 2010-03-25 12:11             ` Andi Kleen
  2010-03-25 13:17               ` Thomas Gleixner
  2010-03-25 16:13             ` Linus Torvalds
  1 sibling, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2010-03-25 12:11 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

On Thu, Mar 25, 2010 at 12:09:10PM +0100, Thomas Gleixner wrote:
> On Thu, 25 Mar 2010, Andi Kleen wrote:
> > On Thu, Mar 25, 2010 at 02:46:42AM +0100, Thomas Gleixner wrote:
> > > 3) Why does the NIC driver code not set IRQF_DISABLED in the first
> > >    place?  AFAICT the network drivers just kick off NAPI, so whats the
> > >    point to run those handlers with IRQs enabled at all ?
> > 
> > I think the idea was to minimize irq latency for other interrupts
> 
> So what's the point ? Is the irq handler of that card so long running,
> that it causes trouble ? 

I believe it's more like that you have a lot of them (even with interrupt
mitigation and NAPI polling) and they have to scale up the work to handle
high bandwidths, so it ends up with a lot of time in them anyways,
 ending with skew in the timers and similar issues.

Anyways my goal here was simply to have a least intrusive fix, not
change semantics for everyone.

> > cost. In principle that could be done also.
> 
> What's the cost? Nothing at all. There is no f*cking difference between:
> 
>  IRQ1 10us
>  IRQ2 10us
>  IRQ3 10us
>  IRQ4 10us
> 
> and
> 
>  IRQ1 2us
>   IRQ2 2us
>    IRQ3 2us
>     IRQ4 10us
>    IRQ3 8us
>   IRQ2 8us
>  IRQ1 8us
> 
> The system is neither running a task nor a softirq for 40us in both
> cases.

Yes it could work out this. Or it could not. I'm not sure, so I chose
the safe option.

> 
> So what's the point of running a well written (short) interrupt
> handler with interrupts enabled ? Nothing at all. It just makes us
> deal with crap like stacks overflowing for no good reason.

Ok so you're just proposing to always set IRQF_DISABLED? 

If you can force everyone to use that that would work I guess.

I don't know what problems it would cause. My fear is that any latency problems
would be answered with a "move to RT, moron" from your mouth though.

> > Anyways if such a thing was done it would be a long term project
> > and that short term fix would be still needed.
> 
> Your patch is not a fix, It's a lousy, horrible and unreliable
> workaround. It's not fixing the root cause of the problem at hand.

It fixes the bug in a minimally intrusive way.
> 
> The real fix is to run the NIC interrupt handlers with IRQs disabled
> and be done with it. If you still think that introduces latencies then
> prove it with numbers.

Sorry you got that wrong. I'm not proposing to change semantics, you
are proposing that. So you would need to prove anything if at all.

Anyways if you think you can write a better patch to fix that bug
please fell free to write one.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 12:11             ` Andi Kleen
@ 2010-03-25 13:17               ` Thomas Gleixner
  2010-03-25 13:32                 ` Andi Kleen
  0 siblings, 1 reply; 39+ messages in thread
From: Thomas Gleixner @ 2010-03-25 13:17 UTC (permalink / raw)
  To: Andi Kleen; +Cc: x86, LKML, jesse.brandeburg, Linus Torvalds

On Thu, 25 Mar 2010, Andi Kleen wrote:

> On Thu, Mar 25, 2010 at 12:09:10PM +0100, Thomas Gleixner wrote:
> > On Thu, 25 Mar 2010, Andi Kleen wrote:
> > > On Thu, Mar 25, 2010 at 02:46:42AM +0100, Thomas Gleixner wrote:
> > > > 3) Why does the NIC driver code not set IRQF_DISABLED in the first
> > > >    place?  AFAICT the network drivers just kick off NAPI, so whats the
> > > >    point to run those handlers with IRQs enabled at all ?
> > > 
> > > I think the idea was to minimize irq latency for other interrupts
> > 
> > So what's the point ? Is the irq handler of that card so long running,
> > that it causes trouble ? 
> 
> I believe it's more like that you have a lot of them (even with interrupt
> mitigation and NAPI polling) and they have to scale up the work to handle
> high bandwidths, so it ends up with a lot of time in them anyways,
>  ending with skew in the timers and similar issues.

Timekeeping is an absolute non issue here. The time keeping code can
cope with pretty large delays and there is nothing which 
 
> Anyways my goal here was simply to have a least intrusive fix, not
> change semantics for everyone.
>
> Yes it could work out this. Or it could not. I'm not sure, so I chose
> the safe option.

Oh it's safe because it solves the problem on that particular machine
and workload?

No, it's not safe at all. 

    - it does not prevent a driver reenabling interrupts

    - it will cause real large latencies when the interrupt which
      happens to trigger that check is one of the long running
      ones. Are you going to deal with the "oh we see >500us" latencies
      bugreports ?

    - worst case it'll brick your machine if the interrupt which
      happens to trigger that check relies on irq enabled semantics
      (e.g. missing jiffies update). Yes, such a driver is broken by
      design, but we have such crap lurking. So much for not changing
      semantics.
 
> > So what's the point of running a well written (short) interrupt
> > handler with interrupts enabled ? Nothing at all. It just makes us
> > deal with crap like stacks overflowing for no good reason.
> 
> Ok so you're just proposing to always set IRQF_DISABLED? 

For any sane written driver, yes. We have crap irq handlers which need
to be fixed, so we can't enforce it right away. See above.
 
> I don't know what problems it would cause. My fear is that any latency problems
> would be answered with a "move to RT, moron" from your mouth though.

Oh well.
 
> > > Anyways if such a thing was done it would be a long term project
> > > and that short term fix would be still needed.
> > 
> > Your patch is not a fix, It's a lousy, horrible and unreliable
> > workaround. It's not fixing the root cause of the problem at hand.
> 
> It fixes the bug in a minimally intrusive way.

It papers over the problem. We already know that the NIC driver floods
the machine with interrupts, so why are you insisting that we need to
bandaid that problem ?

The minimal intrusive way is a one liner in that very driver code and
if it causes problems for that very driver then we don't fix them with
adding a callback in the generic interrupt code path.

The message which we would send out with applying that band aid would
be simply: Go ahead driver writers and let your handlers run as long
as they want, we'll safe you in 99.9% of the cases and we'll happily
go and debug the 0.1% of completely undebuggable shit which will
result out of that.

No thanks,

	tglx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 13:17               ` Thomas Gleixner
@ 2010-03-25 13:32                 ` Andi Kleen
  2010-03-25 14:16                   ` Thomas Gleixner
  0 siblings, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2010-03-25 13:32 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

> > > > Anyways if such a thing was done it would be a long term project
> > > > and that short term fix would be still needed.
> > > 
> > > Your patch is not a fix, It's a lousy, horrible and unreliable
> > > workaround. It's not fixing the root cause of the problem at hand.
> > 
> > It fixes the bug in a minimally intrusive way.
> 
> It papers over the problem. We already know that the NIC driver floods
> the machine with interrupts, so why are you insisting that we need to

Well in this case it's simply because it has 4 ports and they are all
active and have a lot of MSI-X vectors for each stream. 

Even if you had the perfect interrupt handler that ran in
one cycle, if you had enough of them in parallel from different ports
there could be still a stack overflow problem on individual CPUs.

> bandaid that problem ?

Because the system crashes otherwise on that test?

> The minimal intrusive way is a one liner in that very driver code and
> if it causes problems for that very driver then we don't fix them with
> adding a callback in the generic interrupt code path.

Ok.

> 
> The message which we would send out with applying that band aid would
> be simply: Go ahead driver writers and let your handlers run as long

Well it's simply the current state of affairs today. I'm merely
attempting to make the current state slightly safer without breaking
anything in the process.

> as they want, we'll safe you in 99.9% of the cases and we'll happily
> go and debug the 0.1% of completely undebuggable shit which will
> result out of that.

I'm not sure I fully understand your suggestion.

Is your suggestion to only set IRQF_DISABLED that one driver and ignore the 
other ones? (let's call that the "ostrich approach")

Or is your suggestion to set IRQF_DISABLED by default?

Or is it something else?

Thanks,

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 13:32                 ` Andi Kleen
@ 2010-03-25 14:16                   ` Thomas Gleixner
  2010-03-25 15:38                     ` Andi Kleen
  2010-03-25 16:06                     ` Alan Cox
  0 siblings, 2 replies; 39+ messages in thread
From: Thomas Gleixner @ 2010-03-25 14:16 UTC (permalink / raw)
  To: Andi Kleen; +Cc: x86, LKML, jesse.brandeburg, Linus Torvalds

On Thu, 25 Mar 2010, Andi Kleen wrote:
> > > > > Anyways if such a thing was done it would be a long term project
> > > > > and that short term fix would be still needed.
> > > > 
> > > > Your patch is not a fix, It's a lousy, horrible and unreliable
> > > > workaround. It's not fixing the root cause of the problem at hand.
> > > 
> > > It fixes the bug in a minimally intrusive way.
> > 
> > It papers over the problem. We already know that the NIC driver floods
> > the machine with interrupts, so why are you insisting that we need to
> 
> Well in this case it's simply because it has 4 ports and they are all
> active and have a lot of MSI-X vectors for each stream. 
> 
> Even if you had the perfect interrupt handler that ran in
> one cycle, if you had enough of them in parallel from different ports
> there could be still a stack overflow problem on individual CPUs.

Not at all if the handler runs with irqs disabled.
 
> > The minimal intrusive way is a one liner in that very driver code and
> > if it causes problems for that very driver then we don't fix them with
> > adding a callback in the generic interrupt code path.
> 
> Ok.
> 
> > 
> > The message which we would send out with applying that band aid would
> > be simply: Go ahead driver writers and let your handlers run as long
> 
> Well it's simply the current state of affairs today. I'm merely
> attempting to make the current state slightly safer without breaking
> anything in the process.

Well, I'd agree if those stack overflows would be a massive reported
problem. 

Right now they happen with a weird test case which points out a
trouble spot. Multi vector NICs under heavy load. So why not go there
and change the handful of drivers to run their handlers with irqs
disabled?

Band aids are the last resort if we can't deal with a problem by other
sane means. And this problem falls not into that category, it can be
solved in the affected drivers with zero effort.

Thanks,

	tglx




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 14:16                   ` Thomas Gleixner
@ 2010-03-25 15:38                     ` Andi Kleen
  2010-03-25 16:06                     ` Alan Cox
  1 sibling, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-03-25 15:38 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

> > Well it's simply the current state of affairs today. I'm merely
> > attempting to make the current state slightly safer without breaking
> > anything in the process.
> 
> Well, I'd agree if those stack overflows would be a massive reported
> problem. 

At least the people who reported it to me thought it was a massive
problem @)

> Right now they happen with a weird test case which points out a
> trouble spot. Multi vector NICs under heavy load. So why not go there
> and change the handful of drivers to run their handlers with irqs
> disabled?

Ok, but afaik it's not that small a number: MSI-X support is getting
more and more wide spread. It's pretty universal in the 10+GbitE space
for space and starts getting deployed for block too.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 14:16                   ` Thomas Gleixner
  2010-03-25 15:38                     ` Andi Kleen
@ 2010-03-25 16:06                     ` Alan Cox
  1 sibling, 0 replies; 39+ messages in thread
From: Alan Cox @ 2010-03-25 16:06 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg, Linus Torvalds

O> Right now they happen with a weird test case which points out a
> trouble spot. Multi vector NICs under heavy load. So why not go there
> and change the handful of drivers to run their handlers with irqs
> disabled?

How about - because it can happen anyway ? It's merely a lot less likely
and if it is occuring at random in very obscure situations the chances
are it doesn't produce a nice neat report that it occurred either. Your
assertion that this is the only case it is seen appears unbacked by
evidence for or against.

> Band aids are the last resort if we can't deal with a problem by other
> sane means. And this problem falls not into that category, it can be
> solved in the affected drivers with zero effort.

Thomas - you can't complain about Andi's patches being inadequate and
then suggest an ever more incomplete 'solution'. Absolute minimum we need
to WARN() in that situation so that kerneloops.org can gather statistics
on what occurs and try to find out.

The real fix is not zero effort, as you yourself said there are quite a
few subsystems re-enabling interrupts in their IRQ paths and all would
need fixing.

Alan

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 11:09           ` Thomas Gleixner
  2010-03-25 12:11             ` Andi Kleen
@ 2010-03-25 16:13             ` Linus Torvalds
  2010-03-25 16:17               ` Linus Torvalds
                                 ` (3 more replies)
  1 sibling, 4 replies; 39+ messages in thread
From: Linus Torvalds @ 2010-03-25 16:13 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg

On Thu, 25 Mar 2010, Thomas Gleixner wrote:

> On Thu, 25 Mar 2010, Andi Kleen wrote:
> > On Thu, Mar 25, 2010 at 02:46:42AM +0100, Thomas Gleixner wrote:
> > > 3) Why does the NIC driver code not set IRQF_DISABLED in the first
> > >    place?  AFAICT the network drivers just kick off NAPI, so whats the
> > >    point to run those handlers with IRQs enabled at all ?
> > 
> > I think the idea was to minimize irq latency for other interrupts
> 
> So what's the point ? Is the irq handler of that card so long running,
> that it causes trouble ? If yes, then this needs to be fixed. If no,
> then it simply can run with IRQs disabled.

So historically _all_ interrupts allow nesting, except for what used to be 
called "fast" irq handlers. Notably the serial line, which especially on 
chips without buffering needs really low latencies.

So that's where the irq handler code comes from: a "fast" interrupt will 
not ever enable the CPU interrupts at all, and can run without ever doing 
the whole "mask and ACK" code. But no, we absolutely MUST NOT make regular 
interrupt handlers be that kind of fast handlers (ie IRQF_DISABLED in 
modern parlance), because that would make the whole point moot.

So Thomas, you're wrong. We can't fix all irq handlers to be really quick, 
and we MUST NOT run them with all other irq's disabled if they are not - 
because that obviates the whole point.

[ There's another historical issue, which is that at least the SCSI layer 
  used to depend on timer interrupts still coming in, because there were 
  literally cases where the cdoe would wait for a timer tick while 
  _inside_ the SCSI irq handler.

  That may or may not be true any more - I certainly hope it isn't. But 
  it's an example of the kinds of things drivers really can do. ]

> What's the cost? Nothing at all. There is no f*cking difference between:
> 
>  IRQ1 10us
>  IRQ2 10us
>  IRQ3 10us
>  IRQ4 10us

But there _is_ a big f*cking difference when there is another irq involved 
that wants to happen immediately. For a network card that's not (much) of 
an issue, since any sane network card will buffer things anyway. For an 
original 8250 with a single-byte buffer running at 19200 bps on a slow 
machine, it really is a _huge_ deal. On that slow machine, the other 
interrupt handlers won't be 10us anyway. A single PIO op is 1us!

There are other cases where this matters, but that 8250 is the historical 
one.

NOTE! Historically, the "fast" handlers also had a much faster irq 
response because they didn't do that whole MASK/ACK/END thing. So they'd 
just keep the CPU interrupts disabled, and ACK at the end, and I think 
we've even used AUTOEIO so that they didn't need any ACK at all, and we 
never touched the interrupt controller itself for them.

Again, that's a big deal when you're trying to push a serial line past 
19200bps on a slow machine with no hardware buffering (or even to 9600bps, 
for that matter - those IRQ controller accesses were traditionally all 1us 
each, and to do mask+ack+unmask was a minimum of three writes if you were 
off the slave chip, and while we have that magic 'cached_mask' we didn't 
always do that, so it ended up being three PIO writes and two PIO reads: a 
_minimim_ of 5us just for irq controller accesses).

> So what's the point of running a well written (short) interrupt
> handler with interrupts enabled ? Nothing at all. It just makes us
> deal with crap like stacks overflowing for no good reason.

If you know it's short and does almost nothing at all, then IRQF_DISABLED 
would be fine. But you haven't thought it through: IRQF_DISABLED also 
means that you cannot share interrupts with other people (think about it), 
so if this was _only_ about that particular case of four network cards on 
four separate irqs you'd be right, but it isn't.

You need to look at the bigger picture.

> You just don't get it. Long running interrupt handlers are a
> BUG. Period. If they are short they can run with IRQs disabled w/o any
> harm to the system.

Thomas, YOU are the one not getting it. Long-running is a fact. And 
long-running is all relative. 100us is a long time, but with certain 
hardware it's just a fact of life due to PIO costs. IDE drives in PIO mode 
are perhaps the most well-known and extreme example, but tens of 
microseconds is pretty much par for the course.

And yes, slow hardware still exists.

And even on fast hardware, we have the irq sharing issue, along with 
random legacy driver issues.

Now, that all said, I don't know whether Andi's patch is really the right 
thing. But it may well approach being the best thing we'd have.

Now, it's also true that our IRQ infrastructure handlers _could_ be 
smarter, and make the whole problem less likely to happen.

In particular, it's probably true that especially on modern hardware with 
multiple cores, and especially when you do _not_ have irq sharing (which 
is the common case these days for things like network drivers that can use 
MSI), we really would be better off having the irq disabled over the whole 
thing, and on some interrupt controllers it might even be worth it to do 
the old optimization of not masking-and-acking, but just acking.

But see above. This is _not_ something that a driver can do any more. They 
don't know whether the interrupt might end up being shared. Just blindly 
setting IRAF_DISABLED in a driver is _not_ the answer. But being smarter 
in the generic irq handler code might work.

And then, what we could do, is to mark the drivers that absolutely _must_ 
be able to nest specially. Like the IDE driver when in PIO mode. Or maybe 
the SCSI drivers, if they still depend on that timer interrupt happening 
while they are busy.

		Linus

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 16:13             ` Linus Torvalds
@ 2010-03-25 16:17               ` Linus Torvalds
  2010-03-25 16:27               ` Ingo Molnar
                                 ` (2 subsequent siblings)
  3 siblings, 0 replies; 39+ messages in thread
From: Linus Torvalds @ 2010-03-25 16:17 UTC (permalink / raw)
  To: Thomas Gleixner; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg



On Thu, 25 Mar 2010, Linus Torvalds wrote:
> 
> NOTE! Historically, the "fast" handlers also had a much faster irq 
> response because they didn't do that whole MASK/ACK/END thing. So they'd 
> just keep the CPU interrupts disabled, and ACK at the end, and I think 
> we've even used AUTOEIO so that they didn't need any ACK at all, and we 
> never touched the interrupt controller itself for them.

Btw, it was even more extreme than that. The fast irq handlers got a 
totally separate kernel entry point, and wouldn't save all registers, only 
the compiler-clobbered ones. Which is why they then had no "struct 
pt_regs" etc.

And yes, it really mattered. Then later we got so bloated that it wasn't 
much of an issue - and just made everything more complicated.

		Linus

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 16:13             ` Linus Torvalds
  2010-03-25 16:17               ` Linus Torvalds
@ 2010-03-25 16:27               ` Ingo Molnar
  2010-03-25 16:33                 ` Ingo Molnar
  2010-03-25 18:27                 ` Andi Kleen
  2010-03-25 16:52               ` Thomas Gleixner
  2010-03-25 17:47               ` Peter Zijlstra
  3 siblings, 2 replies; 39+ messages in thread
From: Ingo Molnar @ 2010-03-25 16:27 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Thomas Gleixner, Andi Kleen, x86, LKML, jesse.brandeburg


* Linus Torvalds <torvalds@linux-foundation.org> wrote:

[...]
>
> Now, it's also true that our IRQ infrastructure handlers _could_ be smarter, 
> and make the whole problem less likely to happen.
> 
> In particular, it's probably true that especially on modern hardware with 
> multiple cores, and especially when you do _not_ have irq sharing (which is 
> the common case these days for things like network drivers that can use 
> MSI), we really would be better off having the irq disabled over the whole 
> thing, and on some interrupt controllers it might even be worth it to do the 
> old optimization of not masking-and-acking, but just acking.

Yes.

> But see above. This is _not_ something that a driver can do any more. They 
> don't know whether the interrupt might end up being shared. Just blindly 
> setting IRAF_DISABLED in a driver is _not_ the answer. But being smarter in 
> the generic irq handler code might work.
> 
> And then, what we could do, is to mark the drivers that absolutely _must_ be 
> able to nest specially. Like the IDE driver when in PIO mode. Or maybe the 
> SCSI drivers, if they still depend on that timer interrupt happening while 
> they are busy.

I think the patch as posted solves a real problem, but also perpetuates a bad 
situation.

At minimum we should print a (one-time) warning that some badness occured. 
That would push us either in the direction of improving drivers, or towards 
improving the generic code.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 16:27               ` Ingo Molnar
@ 2010-03-25 16:33                 ` Ingo Molnar
  2010-03-25 18:27                 ` Andi Kleen
  1 sibling, 0 replies; 39+ messages in thread
From: Ingo Molnar @ 2010-03-25 16:33 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Thomas Gleixner, Andi Kleen, x86, LKML, jesse.brandeburg


* Ingo Molnar <mingo@elte.hu> wrote:

> 
> * Linus Torvalds <torvalds@linux-foundation.org> wrote:
> 
> [...]
> >
> > Now, it's also true that our IRQ infrastructure handlers _could_ be smarter, 
> > and make the whole problem less likely to happen.
> > 
> > In particular, it's probably true that especially on modern hardware with 
> > multiple cores, and especially when you do _not_ have irq sharing (which is 
> > the common case these days for things like network drivers that can use 
> > MSI), we really would be better off having the irq disabled over the whole 
> > thing, and on some interrupt controllers it might even be worth it to do the 
> > old optimization of not masking-and-acking, but just acking.
> 
> Yes.
> 
> > But see above. This is _not_ something that a driver can do any more. They 
> > don't know whether the interrupt might end up being shared. Just blindly 
> > setting IRAF_DISABLED in a driver is _not_ the answer. But being smarter in 
> > the generic irq handler code might work.
> > 
> > And then, what we could do, is to mark the drivers that absolutely _must_ 
> > be able to nest specially. Like the IDE driver when in PIO mode. Or maybe 
> > the SCSI drivers, if they still depend on that timer interrupt happening 
> > while they are busy.
> 
> I think the patch as posted solves a real problem, but also perpetuates a 
> bad situation.
> 
> At minimum we should print a (one-time) warning that some badness occured. 
> That would push us either in the direction of improving drivers, or towards 
> improving the generic code.

Furthermore, applying that patch as-is would not just cause us to do nothing 
about it in the future, it would also add a rather fragile looking piece of 
logic. I.e. it's a sweep-under-the-rug thing pretty much IMO.

So i think Thomas is quite right wrt. ugliness of the patch but missed the 
other important fact that this can occur in a lot of places with high enough 
IRQ parallelism and cannot be fixed one by one.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 16:13             ` Linus Torvalds
  2010-03-25 16:17               ` Linus Torvalds
  2010-03-25 16:27               ` Ingo Molnar
@ 2010-03-25 16:52               ` Thomas Gleixner
  2010-03-25 17:47               ` Peter Zijlstra
  3 siblings, 0 replies; 39+ messages in thread
From: Thomas Gleixner @ 2010-03-25 16:52 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Andi Kleen, x86, LKML, jesse.brandeburg

On Thu, 25 Mar 2010, Linus Torvalds wrote:
> In particular, it's probably true that especially on modern hardware with 
> multiple cores, and especially when you do _not_ have irq sharing (which 
> is the common case these days for things like network drivers that can use 
> MSI), we really would be better off having the irq disabled over the whole 
> thing, and on some interrupt controllers it might even be worth it to do 
> the old optimization of not masking-and-acking, but just acking.
> 
> But see above. This is _not_ something that a driver can do any more. They 
> don't know whether the interrupt might end up being shared. Just blindly 
> setting IRAF_DISABLED in a driver is _not_ the answer. But being smarter 
> in the generic irq handler code might work.

For the MSI ones it definitely works as they can not be shared. I'm
just not sure whether we can just enforce that for MSI.
 
Thanks,

	tglx

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 16:13             ` Linus Torvalds
                                 ` (2 preceding siblings ...)
  2010-03-25 16:52               ` Thomas Gleixner
@ 2010-03-25 17:47               ` Peter Zijlstra
  2010-03-25 18:01                 ` Linus Torvalds
  3 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2010-03-25 17:47 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Thomas Gleixner, Andi Kleen, x86, LKML, jesse.brandeburg

On Thu, 2010-03-25 at 09:13 -0700, Linus Torvalds wrote:
> But see above. This is _not_ something that a driver can do any more. They 
> don't know whether the interrupt might end up being shared. Just blindly 
> setting IRAF_DISABLED in a driver is _not_ the answer. But being smarter 
> in the generic irq handler code might work.
> 
> And then, what we could do, is to mark the drivers that absolutely _must_ 
> be able to nest specially. Like the IDE driver when in PIO mode. Or maybe 
> the SCSI drivers, if they still depend on that timer interrupt happening 
> while they are busy. 

FWIW lockdep forces IRQF_DISABLED and yells when anybody does
local_irq_enable() while in hardirq context, I haven't seen any such
splats in a long while, except from the ARM people who did funny things
with building their own threaded interrupts or somesuch.

So I'm sure there's some devices, like IDE PIO and the curious 8390, and
maybe some other drivers that aren't as common, but I don't think the
situation is quite as bad as some people paint it.




^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 17:47               ` Peter Zijlstra
@ 2010-03-25 18:01                 ` Linus Torvalds
  2010-03-25 18:21                   ` Peter Zijlstra
                                     ` (2 more replies)
  0 siblings, 3 replies; 39+ messages in thread
From: Linus Torvalds @ 2010-03-25 18:01 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Thomas Gleixner, Andi Kleen, x86, LKML, jesse.brandeburg

On Thu, 25 Mar 2010, Peter Zijlstra wrote:
> 
> FWIW lockdep forces IRQF_DISABLED and yells when anybody does
> local_irq_enable() while in hardirq context, I haven't seen any such
> splats in a long while, except from the ARM people who did funny things
> with building their own threaded interrupts or somesuch.

The thing is, that won't show the drivers that just simply _expect_ 
other interrupts to happen.

The SCSI situation, iirc, used to be that certain error conditions simply 
caused a delay loop (reset? I forget) that depended on 'jiffies'. So there 
was no 'local_irq_enable()' involved, nor did it even happen under any 
normal load - only in error situations.

Now, I think (and sincerely) that the SCSI situation is likely long since 
fixed, but we have thousands and thousands of drivers, and these kinds of 
things are very hard to notice automatically. Are there any cases around 
that still have busy-loop delays based on real-time in their irq handlers? 
I simply don't know.

			Linus

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 18:01                 ` Linus Torvalds
@ 2010-03-25 18:21                   ` Peter Zijlstra
  2010-03-25 18:23                     ` Peter Zijlstra
  2010-03-25 18:29                   ` Ingo Molnar
  2010-03-26  6:10                   ` Andi Kleen
  2 siblings, 1 reply; 39+ messages in thread
From: Peter Zijlstra @ 2010-03-25 18:21 UTC (permalink / raw)
  To: Linus Torvalds; +Cc: Thomas Gleixner, Andi Kleen, x86, LKML, jesse.brandeburg

On Thu, 2010-03-25 at 11:01 -0700, Linus Torvalds wrote:
> Are there any cases around 
> that still have busy-loop delays based on real-time in their irq handlers? 
> I simply don't know. 

I recently found a few in drivers/net/ there's all kinds of funny stuff
in there.. not sure how common the matching hardware is though.

One thing we could do is instrument jiffies to yell when its used from
hardirq context and fix up these things.

But yeah, there's funky hardware around, but I think we should provide
more incentives to keep modern hardware drivers saner.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 18:21                   ` Peter Zijlstra
@ 2010-03-25 18:23                     ` Peter Zijlstra
  2010-03-25 18:44                       ` Andi Kleen
  2010-03-25 19:01                       ` Ingo Molnar
  0 siblings, 2 replies; 39+ messages in thread
From: Peter Zijlstra @ 2010-03-25 18:23 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Thomas Gleixner, Andi Kleen, x86, LKML, jesse.brandeburg,
	Arjan Van De Ven

On Thu, 2010-03-25 at 19:21 +0100, Peter Zijlstra wrote:
> On Thu, 2010-03-25 at 11:01 -0700, Linus Torvalds wrote:
> > Are there any cases around 
> > that still have busy-loop delays based on real-time in their irq handlers? 
> > I simply don't know. 
> 
> I recently found a few in drivers/net/ there's all kinds of funny stuff
> in there.. not sure how common the matching hardware is though.
> 
> One thing we could do is instrument jiffies to yell when its used from
> hardirq context and fix up these things.

Also, Arjan mentioned he wanted to sweep the kernel for jiffies users
and convert them to timer interfaces..


^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 16:27               ` Ingo Molnar
  2010-03-25 16:33                 ` Ingo Molnar
@ 2010-03-25 18:27                 ` Andi Kleen
  2010-03-26  4:55                   ` Eric W. Biederman
  1 sibling, 1 reply; 39+ messages in thread
From: Andi Kleen @ 2010-03-25 18:27 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Linus Torvalds, Thomas Gleixner, Andi Kleen, x86, LKML,
	jesse.brandeburg

> I think the patch as posted solves a real problem, but also perpetuates a bad 
> situation.
> 
> At minimum we should print a (one-time) warning that some badness occured. 
> That would push us either in the direction of improving drivers, or towards 
> improving the generic code.

What should a driver do to prevent that? I don't see what it could do
short of castrating itself (like refusing to use multiple ports)
As Linus says the driver doesn't know if setting IRQF_DISABLED is safe.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 18:01                 ` Linus Torvalds
  2010-03-25 18:21                   ` Peter Zijlstra
@ 2010-03-25 18:29                   ` Ingo Molnar
  2010-03-25 19:10                     ` Linus Torvalds
  2010-03-26  6:10                   ` Andi Kleen
  2 siblings, 1 reply; 39+ messages in thread
From: Ingo Molnar @ 2010-03-25 18:29 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Peter Zijlstra, Thomas Gleixner, Andi Kleen, x86, LKML,
	jesse.brandeburg

* Linus Torvalds <torvalds@linux-foundation.org> wrote:

> On Thu, 25 Mar 2010, Peter Zijlstra wrote:
> > 
> > FWIW lockdep forces IRQF_DISABLED and yells when anybody does 
> > local_irq_enable() while in hardirq context, I haven't seen any such 
> > splats in a long while, except from the ARM people who did funny things 
> > with building their own threaded interrupts or somesuch.
> 
> The thing is, that won't show the drivers that just simply _expect_ other 
> interrupts to happen.
> 
> The SCSI situation, iirc, used to be that certain error conditions simply 
> caused a delay loop (reset? I forget) that depended on 'jiffies'. So there 
> was no 'local_irq_enable()' involved, nor did it even happen under any 
> normal load - only in error situations.
> 
> Now, I think (and sincerely) that the SCSI situation is likely long since 
> fixed, but we have thousands and thousands of drivers, and these kinds of 
> things are very hard to notice automatically. Are there any cases around 
> that still have busy-loop delays based on real-time in their irq handlers? I 
> simply don't know.

Yes, but that kind of crap should not go unnoticed if it triggers (which may 
be rare): in form of a hard lockup. We had that lockdep behavior of disabling 
irqs in all handlers forever, ever since it went upstream four years ago.

So we had 3 separate mechanisms in the past 3-4 years:

 - lockdep [which disables irqs in all handlers, indiscriminately]
 - dynticks [which found in-irq jiffies loops]
 - -rt [which found hardirqs-off loops]

That should have weeded out most of that kind of IRQ handler abuse.

In terms of test coverage, beyond upstream kernel testers who enable lockdep, 
Fedora rawhide has also been running kernels with lockdep enabled for the past 
3-4 years, so there's a fair amount of 'weird hardware' coverage as well. It 
certainly wont cover 100% everything, but it should cover everything that 
people bothered to test + report.

I'm fairly certain, based on having played with those aspects from many angles 
that disabling irqs in all drivers should work just fine today already.

So the patch below should at most trigger bugs in areas that need fixing 
anyway, and i'm quite sure that under no circumstance would it cause 
unforeseen problems in 'thousands of drivers'.

So i think this would be a clear step forward. (It's also probably the best 
thing for performance to batch up IRQs instead of nesting them.)

We could do this carefully, first in the IRQ tree then in linux-next, them 
upstream, with plenty of time for people to test. If it causes _any_ 
widespread problems that make you uncomfortable it would be very easy to 
revert. What do you think?

Thanks,

	Ingo

diff --git a/kernel/irq/handle.c b/kernel/irq/handle.c
index 76d5a67..27e5c69 100644
--- a/kernel/irq/handle.c
+++ b/kernel/irq/handle.c
@@ -370,9 +370,6 @@ irqreturn_t handle_IRQ_event(unsigned int irq, struct irqaction *action)
 	irqreturn_t ret, retval = IRQ_NONE;
 	unsigned int status = 0;

-	if (!(action->flags & IRQF_DISABLED))
-		local_irq_enable_in_hardirq();
-
 	do {
 		trace_irq_handler_entry(irq, action);
 		ret = action->handler(irq, action->dev_id);

^ permalink raw reply related	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 18:23                     ` Peter Zijlstra
@ 2010-03-25 18:44                       ` Andi Kleen
  2010-03-25 19:01                       ` Ingo Molnar
  1 sibling, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-03-25 18:44 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Thomas Gleixner, Andi Kleen, x86, LKML,
	jesse.brandeburg, Arjan Van De Ven

On Thu, Mar 25, 2010 at 07:23:58PM +0100, Peter Zijlstra wrote:
> On Thu, 2010-03-25 at 19:21 +0100, Peter Zijlstra wrote:
> > On Thu, 2010-03-25 at 11:01 -0700, Linus Torvalds wrote:
> > > Are there any cases around 
> > > that still have busy-loop delays based on real-time in their irq handlers? 
> > > I simply don't know. 
> > 
> > I recently found a few in drivers/net/ there's all kinds of funny stuff
> > in there.. not sure how common the matching hardware is though.
> > 
> > One thing we could do is instrument jiffies to yell when its used from
> > hardirq context and fix up these things.
> 
> Also, Arjan mentioned he wanted to sweep the kernel for jiffies users
> and convert them to timer interfaces..

% linux-2.6> gid jiffies | wc -l
7569

Good luck.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 18:23                     ` Peter Zijlstra
  2010-03-25 18:44                       ` Andi Kleen
@ 2010-03-25 19:01                       ` Ingo Molnar
  1 sibling, 0 replies; 39+ messages in thread
From: Ingo Molnar @ 2010-03-25 19:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Linus Torvalds, Thomas Gleixner, Andi Kleen, x86, LKML,
	jesse.brandeburg, Arjan Van De Ven

* Peter Zijlstra <peterz@infradead.org> wrote:

> On Thu, 2010-03-25 at 19:21 +0100, Peter Zijlstra wrote:
> > On Thu, 2010-03-25 at 11:01 -0700, Linus Torvalds wrote:
> > > Are there any cases around 
> > > that still have busy-loop delays based on real-time in their irq handlers? 
> > > I simply don't know. 
> > 
> > I recently found a few in drivers/net/ there's all kinds of funny stuff
> > in there.. not sure how common the matching hardware is though.
> > 
> > One thing we could do is instrument jiffies to yell when its used from
> > hardirq context and fix up these things.
> 
> Also, Arjan mentioned he wanted to sweep the kernel for jiffies users and 
> convert them to timer interfaces..

Dynticks found most of the loop-for-jiffies-in-handlers cruft: an IRQ handler 
hitting the idle loop and then spinning for jiffies, with dynticks enabled, 
would spin forver because there's no timer irq.

That is because in dynticks mode we dont reactivate the tick when we go into 
an irq handler (that has hit the idle task). So any looping on jiffies will 
loop forever. We only reactivate the tick when we go out of idle.

We had a few bugs of that kind in the early days of dynticks - they were not 
particularly numerous and all were fixed.

Thanks,

	Ingo

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 18:29                   ` Ingo Molnar
@ 2010-03-25 19:10                     ` Linus Torvalds
  2010-03-25 19:42                       ` David Miller
  2010-03-25 20:53                       ` Linus Torvalds
  0 siblings, 2 replies; 39+ messages in thread
From: Linus Torvalds @ 2010-03-25 19:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Thomas Gleixner, Andi Kleen, x86, LKML,
	jesse.brandeburg

On Thu, 25 Mar 2010, Ingo Molnar wrote:
> 
> So the patch below should at most trigger bugs in areas that need fixing 
> anyway, and i'm quite sure that under no circumstance would it cause 
> unforeseen problems in 'thousands of drivers'.

If we do this, then we should just remove all the IRQF_DISABLED code in 
kernel/irq/manage.c too, and basically make IRQF_DISABLED a clear no-op 
(still leave it around as a #define, to not break any users). 

It won't make it any harder to revert if it causes problems, and that way 
there will be no crazy dead code (and comments) left around.

And I just checked: even the 8250 serial driver doesn't use IRQF_DISABLED 
any more, so doing that shouldn't cause any latency issues (sure, the 
serial driver may interrupt another irq, but another irq can also 
interrupt the serial driver as things stand now, so the original latency 
issue with fast irq handlers doesn't actually work these days _anyway_).

		Linus

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 19:10                     ` Linus Torvalds
@ 2010-03-25 19:42                       ` David Miller
  2010-03-25 20:40                         ` Linus Torvalds
  2010-03-25 20:51                         ` Ingo Molnar
  2010-03-25 20:53                       ` Linus Torvalds
  1 sibling, 2 replies; 39+ messages in thread
From: David Miller @ 2010-03-25 19:42 UTC (permalink / raw)
  To: torvalds; +Cc: mingo, peterz, tglx, andi, x86, linux-kernel, jesse.brandeburg

From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 25 Mar 2010 12:10:22 -0700 (PDT)

> 
> 
> On Thu, 25 Mar 2010, Ingo Molnar wrote:
>> 
>> So the patch below should at most trigger bugs in areas that need fixing 
>> anyway, and i'm quite sure that under no circumstance would it cause 
>> unforeseen problems in 'thousands of drivers'.
> 
> If we do this, then we should just remove all the IRQF_DISABLED code in 
> kernel/irq/manage.c too, and basically make IRQF_DISABLED a clear no-op 
> (still leave it around as a #define, to not break any users). 

FWIW, I'm currently using IRQF_DISABLED for virtual network
device interrupts on sparc64 as a workaround for some stack
overflow issues.

This change will just force me to work harder to find out a better way
to fix the problem, so don't let my issue hold this back, it's just
an FYI...

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 19:42                       ` David Miller
@ 2010-03-25 20:40                         ` Linus Torvalds
  2010-03-26  3:33                           ` David Miller
  2010-03-25 20:51                         ` Ingo Molnar
  1 sibling, 1 reply; 39+ messages in thread
From: Linus Torvalds @ 2010-03-25 20:40 UTC (permalink / raw)
  To: David Miller
  Cc: mingo, peterz, tglx, andi, x86, linux-kernel, jesse.brandeburg



On Thu, 25 Mar 2010, David Miller wrote:
> 
> FWIW, I'm currently using IRQF_DISABLED for virtual network
> device interrupts on sparc64 as a workaround for some stack
> overflow issues.
> 
> This change will just force me to work harder to find out a better way
> to fix the problem, so don't let my issue hold this back, it's just
> an FYI...

You missed the point of the patch.

It makes _all_ interrupts act like IRQF_DISABLED. So it actually fixes 
your issue, by making IRQF_DISABLE a no-op - not because it doesn't exist 
any more, but because the non-IRQF_DISABLED case would not exist any more.

		Linus

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 19:42                       ` David Miller
  2010-03-25 20:40                         ` Linus Torvalds
@ 2010-03-25 20:51                         ` Ingo Molnar
  1 sibling, 0 replies; 39+ messages in thread
From: Ingo Molnar @ 2010-03-25 20:51 UTC (permalink / raw)
  To: David Miller
  Cc: torvalds, peterz, tglx, andi, x86, linux-kernel, jesse.brandeburg


* David Miller <davem@davemloft.net> wrote:

> From: Linus Torvalds <torvalds@linux-foundation.org>
> Date: Thu, 25 Mar 2010 12:10:22 -0700 (PDT)
> 
> > On Thu, 25 Mar 2010, Ingo Molnar wrote:
> >> 
> >> So the patch below should at most trigger bugs in areas that need fixing 
> >> anyway, and i'm quite sure that under no circumstance would it cause 
> >> unforeseen problems in 'thousands of drivers'.
> > 
> > If we do this, then we should just remove all the IRQF_DISABLED code in 
> > kernel/irq/manage.c too, and basically make IRQF_DISABLED a clear no-op 
> > (still leave it around as a #define, to not break any users).
> 
> FWIW, I'm currently using IRQF_DISABLED for virtual network device 
> interrupts on sparc64 as a workaround for some stack overflow issues.

I think IRQF_DISABLED is the sanest method to run IRQs: it's the most atomic, 
thus most cache-efficient, it doesnt nest, and it's also a tiny bit faster to 
execute, by a few instructions (on x86).

> This change will just force me to work harder to find out a better way to 
> fix the problem, so don't let my issue hold this back, it's just an FYI...

The patch i sent basically hardcodes IRQF_DISABLED - so it should fix your 
problem automatically. IRQF_DISABLED will become an unconditional thing, and 
we can then also remove it.

	Ingo

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 19:10                     ` Linus Torvalds
  2010-03-25 19:42                       ` David Miller
@ 2010-03-25 20:53                       ` Linus Torvalds
  1 sibling, 0 replies; 39+ messages in thread
From: Linus Torvalds @ 2010-03-25 20:53 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Thomas Gleixner, Andi Kleen, x86, LKML,
	jesse.brandeburg



On Thu, 25 Mar 2010, Linus Torvalds wrote:
> 
> And I just checked: even the 8250 serial driver doesn't use IRQF_DISABLED 
> any more, so doing that shouldn't cause any latency issues (sure, the 
> serial driver may interrupt another irq, but another irq can also 
> interrupt the serial driver as things stand now, so the original latency 
> issue with fast irq handlers doesn't actually work these days _anyway_).

Btw, if we really do decide that everybody is IRQF_DISABLED, that really 
should make the whole mask-and-ack issue for the irq controllers go away.

We'd still need it, but only for the very special IDE controller case and 
others who _explicitly_ re-enable interrupts. Those would really have to 
cause the irq to be masked so that we don't get issues with recursive irqs 
of the same kind.

		Linus

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 20:40                         ` Linus Torvalds
@ 2010-03-26  3:33                           ` David Miller
  0 siblings, 0 replies; 39+ messages in thread
From: David Miller @ 2010-03-26  3:33 UTC (permalink / raw)
  To: torvalds; +Cc: mingo, peterz, tglx, andi, x86, linux-kernel, jesse.brandeburg

From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Thu, 25 Mar 2010 13:40:43 -0700 (PDT)

> It makes _all_ interrupts act like IRQF_DISABLED. So it actually fixes 
> your issue, by making IRQF_DISABLE a no-op - not because it doesn't exist 
> any more, but because the non-IRQF_DISABLED case would not exist any more.

Yep, Peter Z. explained this to me under seperate cover.

Great!



^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 18:27                 ` Andi Kleen
@ 2010-03-26  4:55                   ` Eric W. Biederman
  0 siblings, 0 replies; 39+ messages in thread
From: Eric W. Biederman @ 2010-03-26  4:55 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Ingo Molnar, Linus Torvalds, Thomas Gleixner, x86, LKML,
	jesse.brandeburg

Andi Kleen <andi@firstfloor.org> writes:

>> I think the patch as posted solves a real problem, but also perpetuates a bad 
>> situation.
>> 
>> At minimum we should print a (one-time) warning that some badness occured. 
>> That would push us either in the direction of improving drivers, or towards 
>> improving the generic code.
>
> What should a driver do to prevent that? I don't see what it could do
> short of castrating itself (like refusing to use multiple ports)
> As Linus says the driver doesn't know if setting IRQF_DISABLED is safe.

As an aside this is happening on MSI irqs.  They can never be shared.
So in fact the driver can know it is safe.

Should we perhaps make all MSI irqs automatically set IRQF_DISABLED?

Eric

^ permalink raw reply	[flat|nested] 39+ messages in thread

* Re: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
  2010-03-25 18:01                 ` Linus Torvalds
  2010-03-25 18:21                   ` Peter Zijlstra
  2010-03-25 18:29                   ` Ingo Molnar
@ 2010-03-26  6:10                   ` Andi Kleen
  2 siblings, 0 replies; 39+ messages in thread
From: Andi Kleen @ 2010-03-26  6:10 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Peter Zijlstra, Thomas Gleixner, Andi Kleen, x86, LKML,
	jesse.brandeburg

> Now, I think (and sincerely) that the SCSI situation is likely long since 
> fixed, but we have thousands and thousands of drivers, and these kinds of 
> things are very hard to notice automatically. Are there any cases around 
> that still have busy-loop delays based on real-time in their irq handlers? 
> I simply don't know.

This case was already broken back when the x86-64 port used to run
with NMI watchdog on by default, because any IO exception handling
with interrupts off would trigger the NMI watchdog eventually.

I remember having to add bandaids to Fusion for this a long time ago.

Of course ISA drivers might still do it.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 39+ messages in thread

end of thread, other threads:[~2010-03-26  6:10 UTC | newest]

Thread overview: 39+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-03-24 19:02 [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2 Andi Kleen
2010-03-24 20:42 ` Thomas Gleixner
2010-03-24 23:08   ` Thomas Gleixner
2010-03-25  0:36     ` Andi Kleen
2010-03-25  1:46       ` Thomas Gleixner
2010-03-25  9:37         ` Andi Kleen
2010-03-25 11:09           ` Thomas Gleixner
2010-03-25 12:11             ` Andi Kleen
2010-03-25 13:17               ` Thomas Gleixner
2010-03-25 13:32                 ` Andi Kleen
2010-03-25 14:16                   ` Thomas Gleixner
2010-03-25 15:38                     ` Andi Kleen
2010-03-25 16:06                     ` Alan Cox
2010-03-25 16:13             ` Linus Torvalds
2010-03-25 16:17               ` Linus Torvalds
2010-03-25 16:27               ` Ingo Molnar
2010-03-25 16:33                 ` Ingo Molnar
2010-03-25 18:27                 ` Andi Kleen
2010-03-26  4:55                   ` Eric W. Biederman
2010-03-25 16:52               ` Thomas Gleixner
2010-03-25 17:47               ` Peter Zijlstra
2010-03-25 18:01                 ` Linus Torvalds
2010-03-25 18:21                   ` Peter Zijlstra
2010-03-25 18:23                     ` Peter Zijlstra
2010-03-25 18:44                       ` Andi Kleen
2010-03-25 19:01                       ` Ingo Molnar
2010-03-25 18:29                   ` Ingo Molnar
2010-03-25 19:10                     ` Linus Torvalds
2010-03-25 19:42                       ` David Miller
2010-03-25 20:40                         ` Linus Torvalds
2010-03-26  3:33                           ` David Miller
2010-03-25 20:51                         ` Ingo Molnar
2010-03-25 20:53                       ` Linus Torvalds
2010-03-26  6:10                   ` Andi Kleen
2010-03-25 10:50         ` Alan Cox
2010-03-25 11:16           ` Thomas Gleixner
2010-03-25 11:59             ` Alan Cox
2010-03-25 12:00             ` Andi Kleen
2010-03-25 11:57           ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox