Re: PATCH: Misrouted IRQ recovery, take 2

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Alan Cox <alan@redhat.com>
To: David R <david@unsolicited.net>
Cc: Alan Cox <alan@redhat.com>,
	linux-kernel@vger.kernel.org, arjanv@redhat.com, akpm@osdl.org
Subject: Re: PATCH: Misrouted IRQ recovery, take 2
Date: Mon, 6 Sep 2004 12:09:28 -0400	[thread overview]
Message-ID: <20040906160928.GA17719@devserv.devel.redhat.com> (raw)
In-Reply-To: <1094486042.413c881a748ee@unsolicited.net>

On Mon, Sep 06, 2004 at 04:54:02PM +0100, David R wrote:
> Quoting Alan Cox <alan@redhat.com>:
> 
> I'm probably being stupid...  but in misrouted_irq(), shouldn't the 'work' flag
> be reset for each irq, rather than initialised outside the main loop?
> 

You are correct. 

Ok try this


--- linux-2.6.8.1/arch/i386/kernel/irq.c	2004-08-14 11:54:48.000000000 +0100
+++ linux-2.6.8.1.ac/arch/i386/kernel/irq.c	2004-09-07 07:21:13.267562440 +0100
@@ -273,12 +273,103 @@ static int noirqdebug;
 static int __init noirqdebug_setup(char *str)
 {
 	noirqdebug = 1;
-	printk("IRQ lockup detection disabled\n");
+	printk(KERN_INFO "IRQ lockup detection disabled\n");
 	return 1;
 }
 
 __setup("noirqdebug", noirqdebug_setup);
 
+static int irqfixup;
+
+static int __init irqfixup_setup(char *str)
+{
+	irqfixup = 1;
+	printk(KERN_WARNING "Misrouted IRQ fixup support enabled.\n");
+	printk(KERN_WARNING "This may impact system performance.\n");
+	return 1;
+}
+
+__setup("irqfixup", irqfixup_setup);
+
+static int __init irqpoll_setup(char *str)
+{
+	irqfixup = 2;
+	printk(KERN_WARNING "Misrouted IRQ fixup and polling support enabled.\n");
+	printk(KERN_WARNING "This may significantly impact system performance.\n");
+	return 1;
+}
+
+__setup("irqpoll", irqpoll_setup);
+
+/*
+ *	Recovery handler for misrouted interrupts
+ */
+
+static asmlinkage int misrouted_irq(int irq, struct pt_regs *regs)
+{
+	int i;
+	irq_desc_t *desc;
+	int ok = 0;
+	for(i = 1; i < NR_IRQS; i++)
+	{
+		int work = 0;	/* Did we do work for a real IRQ */
+		struct irqaction *action;
+		if(i == irq)	/* Already tried */
+			continue;
+		desc = &irq_desc[i];
+		spin_lock(&desc->lock);
+		action = desc->action;
+		/* Already running on another processor */
+		if(desc->status & IRQ_INPROGRESS)
+		{
+			/* Already running: If it is shared get the other
+			   CPU to go looking for our mystery interrupt too */
+			if(desc->action && (desc->action->flags & SA_SHIRQ))
+				desc->status |= IRQ_PENDING;
+			spin_unlock(&desc->lock);
+			continue;
+		}
+		/* Honour the normal IRQ locking */
+		desc->status |= IRQ_INPROGRESS;
+		spin_unlock(&desc->lock);
+		while(action)
+		{
+			/* Only shared IRQ handlers are safe to call */
+			if(action->flags & SA_SHIRQ)
+			{
+				if(action->handler(i, action->dev_id, regs) == IRQ_HANDLED)
+					ok = 1;
+			}
+			action = action->next;
+		}
+		local_irq_disable();
+		/* Now clean up the flags */
+		spin_lock(&desc->lock);
+		action = desc->action;
+
+		/* While we were looking for a fixup someone queued a real
+		   IRQ clashing with our walk */
+
+		while((desc->status & IRQ_PENDING) && action)
+		{
+			/* Perform real IRQ processing for the IRQ we deferred */
+			work = 1;
+			spin_unlock(&desc->lock);
+			handle_IRQ_event(i, regs, action);
+			spin_lock(&desc->lock);
+			desc->status &= ~IRQ_PENDING;
+		}
+		desc->status &= ~IRQ_INPROGRESS;
+		/* If we did actual work for the real IRQ line we must
+		   let the IRQ controller clean up too */
+		if(work)
+			desc->handler->end(i);
+		spin_unlock(&desc->lock);
+	}
+	/* So the caller can adjust the irq error counts */
+	return ok;
+}
+
 /*
  * If 99,900 of the previous 100,000 interrupts have not been handled then
  * assume that the IRQ is stuck in some manner.  Drop a diagnostic and try to
@@ -289,13 +380,69 @@ __setup("noirqdebug", noirqdebug_setup);
  *
  * Called under desc->lock
  */
-static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret)
+static void note_interrupt(int irq, irq_desc_t *desc, irqreturn_t action_ret, struct pt_regs *regs)
 {
 	if (action_ret != IRQ_HANDLED) {
 		desc->irqs_unhandled++;
 		if (action_ret != IRQ_NONE)
 			report_bad_irq(irq, desc, action_ret);
 	}
+	if(unlikely(irqfixup))	/* Don't punish working computers */
+	{
+		if((irqfixup == 2 && irq == 0) || action_ret == IRQ_NONE)
+		{
+#ifdef CONFIG_4KSTACKS
+			u32 *isp;
+			union irq_ctx * curctx;
+			union irq_ctx * irqctx;
+			int ok;
+
+			curctx = (union irq_ctx *) current_thread_info();
+			irqctx = hardirq_ctx[smp_processor_id()];
+
+			spin_unlock(&desc->lock);
+
+			/*
+			 * this is where we switch to the IRQ stack. However, if we are already using
+			 * the IRQ stack (because we interrupted a hardirq handler) we can't do that
+			 * and just have to keep using the current stack (which is the irq stack already
+			 * after all)
+			 */
+
+			if (curctx == irqctx)
+				ok = misrouted_irq(irq, regs);
+			else {
+				/* build the stack frame on the IRQ stack */
+				isp = (u32*) ((char*)irqctx + sizeof(*irqctx));
+				irqctx->tinfo.task = curctx->tinfo.task;
+				irqctx->tinfo.previous_esp = current_stack_pointer();
+
+				*--isp = (u32) regs;
+				*--isp = (u32) irq;
+
+				asm volatile(
+				"       xchgl   %%ebx,%%esp     \n"
+				"       call    misrouted_irq   \n"
+				"       xchgl   %%ebx,%%esp     \n"
+				: "=a"(ok)
+				: "b"(isp)
+				: "memory", "cc", "edx", "ecx"
+				);
+			}
+			spin_lock(&desc->lock);
+			if (curctx != irqctx)
+				irqctx->tinfo.task = NULL;
+#else
+			spin_unlock(&desc->lock);
+
+			ok = misrouted_irq(irq, desc, regs);
+
+			spin_lock(&desc->lock);
+#endif
+			if(action_ret == IRQ_NONE)
+				desc->irqs_unhandled -= ok;
+		}
+	}
 
 	desc->irq_count++;
 	if (desc->irq_count < 100000)
@@ -532,7 +679,7 @@ asmlinkage unsigned int do_IRQ(struct pt
 		}
 		spin_lock(&desc->lock);
 		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+			note_interrupt(irq, desc, action_ret, &regs);
 		if (curctx != irqctx)
 			irqctx->tinfo.task = NULL;
 		if (likely(!(desc->status & IRQ_PENDING)))
@@ -551,7 +698,7 @@ asmlinkage unsigned int do_IRQ(struct pt
 
 		spin_lock(&desc->lock);
 		if (!noirqdebug)
-			note_interrupt(irq, desc, action_ret);
+			note_interrupt(irq, desc, action_ret, &regs);
 		if (likely(!(desc->status & IRQ_PENDING)))
 			break;
 		desc->status &= ~IRQ_PENDING;

next prev parent reply	other threads:[~2004-09-06 16:10 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-09-06 15:54 PATCH: Misrouted IRQ recovery, take 2 David R
2004-09-06 16:09 ` Alan Cox [this message]
  -- strict thread matches above, loose matches on Subject: below --
2004-09-06 15:16 Alan Cox

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20040906160928.GA17719@devserv.devel.redhat.com \
    --to=alan@redhat.com \
    --cc=akpm@osdl.org \
    --cc=arjanv@redhat.com \
    --cc=david@unsolicited.net \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.