public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: tglx@linutronix.de, x86@kernel.org, linux-kernel@vger.kernel.org,
	jesse.brandeburg@intel.com
Subject: [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2
Date: Wed, 24 Mar 2010 20:02:04 +0100	[thread overview]
Message-ID: <20100324190150.GA18803@basil.fritz.box> (raw)

Prevent nested interrupts when the IRQ stack is near overflowing v2

Interrupts can always nest when they don't run with IRQF_DISABLED.

When a lot of interrupts hit the same vector on the same
CPU nested interrupts can overflow the irq stack and cause hangs.

This has been observed with MSI-X & Ethernet on a large system.

This patch automatically forces IRQF_DISABLED when
the interrupt stack runs low. I implemented it using
a "callback" (really just a weak call) from the generic IRQ code 
to the architecture code because passing this state down the 
normal call chain would have required changing too much code. 

The irq checks are currently implemented for x86-(32,64) only,
but other architectures could (and probably should) do the same.

Currently the thresholds are 2K each. This is a fairly
arbitary number. On 4K stack i386 it's about half 
the irq stack, on the other configurations it's 1/4-1/16.

This also fixes another minor bug on 32bit: don't dump a backtrace when the 
irq stack runs low.

Based on discussions with Suresh B. Siddha and others.
Originally reported by Jesse Brandeburg.

v2: Use more common code on 32bit. Don't dump stack
    on low irq stack.

Tested-by: emil.s.tantilov@intel.com
Cc: jesse.brandeburg@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Suresh B. Siddha <suresh.b.siddha@intel.com>

---
 arch/x86/kernel/irq_32.c |   43 ++++++++++++++++++++++++++++++++++---------
 arch/x86/kernel/irq_64.c |   18 ++++++++++++++++--
 include/linux/irq.h      |    2 ++
 kernel/irq/handle.c      |   16 +++++++++++++++-
 4 files changed, 67 insertions(+), 12 deletions(-)

Index: linux-2.6.34-rc1-ak/include/linux/irq.h
===================================================================
--- linux-2.6.34-rc1-ak.orig/include/linux/irq.h	2010-03-14 03:58:12.000000000 +0100
+++ linux-2.6.34-rc1-ak/include/linux/irq.h	2010-03-24 19:11:23.000000000 +0100
@@ -520,4 +520,6 @@
 }
 #endif	/* CONFIG_SMP */
 
+extern int irq_stack_near_overflow(void);
+
 #endif /* _LINUX_IRQ_H */
Index: linux-2.6.34-rc1-ak/kernel/irq/handle.c
===================================================================
--- linux-2.6.34-rc1-ak.orig/kernel/irq/handle.c	2010-03-14 03:58:12.000000000 +0100
+++ linux-2.6.34-rc1-ak/kernel/irq/handle.c	2010-03-24 19:11:23.000000000 +0100
@@ -358,6 +358,15 @@
 	       "but no thread function available.", irq, action->name);
 }
 
+/*
+ * Is the interrupt stack near overflowing?
+ * Can/should be overriden by architectures
+ */
+int __weak irq_stack_near_overflow(void)
+{
+	return 0;
+}
+
 /**
  * handle_IRQ_event - irq action chain handler
  * @irq:	the interrupt number
@@ -370,7 +379,12 @@
 	irqreturn_t ret, retval = IRQ_NONE;
 	unsigned int status = 0;
 
-	if (!(action->flags & IRQF_DISABLED))
+	/*
+	 * When the IRQ stack is near overflowing don't allow nested
+	 * interrupts.
+	 */
+
+	if (!(action->flags & IRQF_DISABLED) && !irq_stack_near_overflow())
 		local_irq_enable_in_hardirq();
 
 	do {
Index: linux-2.6.34-rc1-ak/arch/x86/kernel/irq_64.c
===================================================================
--- linux-2.6.34-rc1-ak.orig/arch/x86/kernel/irq_64.c	2010-03-03 02:01:27.000000000 +0100
+++ linux-2.6.34-rc1-ak/arch/x86/kernel/irq_64.c	2010-03-24 19:11:23.000000000 +0100
@@ -16,6 +16,7 @@
 #include <linux/ftrace.h>
 #include <linux/uaccess.h>
 #include <linux/smp.h>
+#include <linux/irq.h>
 #include <asm/io_apic.h>
 #include <asm/idle.h>
 #include <asm/apic.h>
@@ -26,6 +27,19 @@
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
+#define IRQ_STACK_THRESH 2048
+
+/*
+ * Stack overflow checking for the interrupt stacks.
+ * Called by the generic IRQ handler.
+ */
+int irq_stack_near_overflow(void)
+{
+	char *stack;
+	asm("mov %%rsp,%0" : "=r" (stack));
+	return stack <= __get_cpu_var(irq_stack_ptr) - IRQ_STACK_SIZE + IRQ_STACK_THRESH;
+}
+
 /*
  * Probabilistic stack overflow check:
  *
@@ -33,7 +47,7 @@
  * runs on the big interrupt stacks. Checking reliably is too expensive,
  * so we just check from interrupts.
  */
-static inline void stack_overflow_check(struct pt_regs *regs)
+static inline void process_stack_overflow_check(struct pt_regs *regs)
 {
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 	u64 curbase = (u64)task_stack_page(current);
@@ -52,7 +66,7 @@
 {
 	struct irq_desc *desc;
 
-	stack_overflow_check(regs);
+	process_stack_overflow_check(regs);
 
 	desc = irq_to_desc(irq);
 	if (unlikely(!desc))
Index: linux-2.6.34-rc1-ak/arch/x86/kernel/irq_32.c
===================================================================
--- linux-2.6.34-rc1-ak.orig/arch/x86/kernel/irq_32.c	2010-03-03 02:01:27.000000000 +0100
+++ linux-2.6.34-rc1-ak/arch/x86/kernel/irq_32.c	2010-03-24 19:24:19.000000000 +0100
@@ -17,6 +17,7 @@
 #include <linux/delay.h>
 #include <linux/uaccess.h>
 #include <linux/percpu.h>
+#include <linux/irq.h>
 
 #include <asm/apic.h>
 
@@ -26,16 +27,29 @@
 DEFINE_PER_CPU(struct pt_regs *, irq_regs);
 EXPORT_PER_CPU_SYMBOL(irq_regs);
 
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-/* Debugging check for stack overflow: is there less than 1KB free? */
-static int check_stack_overflow(void)
+
+static inline int check_stack(int threshold)
 {
 	long sp;
 
 	__asm__ __volatile__("andl %%esp,%0" :
 			     "=r" (sp) : "0" (THREAD_SIZE - 1));
 
-	return sp < (sizeof(struct thread_info) + STACK_WARN);
+	return sp < (sizeof(struct thread_info) + threshold);
+}
+
+#define IRQ_STACK_THRESH 2048
+
+int irq_stack_near_overflow(void)
+{
+	return check_stack(IRQ_STACK_THRESH);
+}
+
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+/* Debugging check for stack overflow: is there less than 2KB free? */
+static inline int check_stack_overflow(void)
+{
+	return check_stack(IRQ_STACK_THRESH);
 }
 
 static void print_stack_overflow(void)
@@ -189,7 +203,12 @@
 
 #else
 static inline int
-execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq) { return 0; }
+execute_on_irq_stack(int overflow, struct irq_desc *desc, int irq)
+{
+	if (unlikely(overflow))
+		print_stack_overflow();
+	return 0;
+}
 #endif
 
 bool handle_irq(unsigned irq, struct pt_regs *regs)
@@ -197,17 +216,23 @@
 	struct irq_desc *desc;
 	int overflow;
 
+	/*
+	 * The result of this gets ignored when
+	 * the interrupt is already nested on a irq stack.
+	 * That means no backtrace printed -- that is
+	 * needed because nested interrupts can always happen.
+	 * However the generic IRQ code will check again
+	 * and prevent further nesting if the stack is near
+	 * overflow.
+	 */
 	overflow = check_stack_overflow();
 
 	desc = irq_to_desc(irq);
 	if (unlikely(!desc))
 		return false;
 
-	if (!execute_on_irq_stack(overflow, desc, irq)) {
-		if (unlikely(overflow))
-			print_stack_overflow();
+	if (!execute_on_irq_stack(overflow, desc, irq))
 		desc->handle_irq(irq, desc);
-	}
 
 	return true;
 }

             reply	other threads:[~2010-03-24 19:02 UTC|newest]

Thread overview: 39+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-03-24 19:02 Andi Kleen [this message]
2010-03-24 20:42 ` [PATCH] Prevent nested interrupts when the IRQ stack is near overflowing v2 Thomas Gleixner
2010-03-24 23:08   ` Thomas Gleixner
2010-03-25  0:36     ` Andi Kleen
2010-03-25  1:46       ` Thomas Gleixner
2010-03-25  9:37         ` Andi Kleen
2010-03-25 11:09           ` Thomas Gleixner
2010-03-25 12:11             ` Andi Kleen
2010-03-25 13:17               ` Thomas Gleixner
2010-03-25 13:32                 ` Andi Kleen
2010-03-25 14:16                   ` Thomas Gleixner
2010-03-25 15:38                     ` Andi Kleen
2010-03-25 16:06                     ` Alan Cox
2010-03-25 16:13             ` Linus Torvalds
2010-03-25 16:17               ` Linus Torvalds
2010-03-25 16:27               ` Ingo Molnar
2010-03-25 16:33                 ` Ingo Molnar
2010-03-25 18:27                 ` Andi Kleen
2010-03-26  4:55                   ` Eric W. Biederman
2010-03-25 16:52               ` Thomas Gleixner
2010-03-25 17:47               ` Peter Zijlstra
2010-03-25 18:01                 ` Linus Torvalds
2010-03-25 18:21                   ` Peter Zijlstra
2010-03-25 18:23                     ` Peter Zijlstra
2010-03-25 18:44                       ` Andi Kleen
2010-03-25 19:01                       ` Ingo Molnar
2010-03-25 18:29                   ` Ingo Molnar
2010-03-25 19:10                     ` Linus Torvalds
2010-03-25 19:42                       ` David Miller
2010-03-25 20:40                         ` Linus Torvalds
2010-03-26  3:33                           ` David Miller
2010-03-25 20:51                         ` Ingo Molnar
2010-03-25 20:53                       ` Linus Torvalds
2010-03-26  6:10                   ` Andi Kleen
2010-03-25 10:50         ` Alan Cox
2010-03-25 11:16           ` Thomas Gleixner
2010-03-25 11:59             ` Alan Cox
2010-03-25 12:00             ` Andi Kleen
2010-03-25 11:57           ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100324190150.GA18803@basil.fritz.box \
    --to=andi@firstfloor.org \
    --cc=jesse.brandeburg@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox