linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] PCI Error Recovery: Symbios SCSI device driver
@ 2006-01-18 16:53 linas
  2006-01-18 17:07 ` Matthew Wilcox
  0 siblings, 1 reply; 21+ messages in thread
From: linas @ 2006-01-18 16:53 UTC (permalink / raw)
  To: James.Bottomley; +Cc: brking, gregkh, linux-scsi


Hi James,

Please review the patch below, and forward upstream.  I've been
bouncing it to various mailing lists for the last half-year,
it has been living in an -mm tree for a while, and was a part of
GregKH's patchset for a while. I'd like to get it propely upstream.

The general description of the principles at work here are given in
Documentation/pci-error-recovery.txt

Thanks,

--linas


----- Forwarded message from Greg KH <gregkh@suse.de> -----

Cc: linas@austin.ibm.com
Subject: [PATCH] PCI Error Recovery: Symbios SCSI device driver
Reply-To: Greg K-H <greg@kroah.com>
To: linux-pci@atrey.karlin.mff.cuni.cz
From: Greg KH <gregkh@suse.de>

[PATCH] PCI Error Recovery: Symbios SCSI device driver

Various PCI bus errors can be signaled by newer PCI controllers.  This
patch adds the PCI error recovery callbacks to the Symbios SCSI device driver.
The patch has been tested, and appears to work well.

Signed-off-by: Linas Vepstas <linas@linas.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>

---
commit d78cde68ab78766c3a175466aa8adcbdc5520963
tree 836445f183f96049046d2da633c45e04df53b775
parent 3c06ba2cdbd37f80ae6dc044d9e305f0dd0ad6dd
author linas <linas@austin.ibm.com> Fri, 18 Nov 2005 16:23:04 -0600
committer Greg Kroah-Hartman <gregkh@suse.de> Thu, 05 Jan 2006 21:54:54 -0800

 drivers/scsi/sym53c8xx_2/sym_glue.c |  113 +++++++++++++++++++++++++++++++++++
 drivers/scsi/sym53c8xx_2/sym_glue.h |    4 +
 drivers/scsi/sym53c8xx_2/sym_hipd.c |   15 +++++
 3 files changed, 132 insertions(+), 0 deletions(-)

diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 1fffd2b..19ca4ed 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -716,6 +716,10 @@ static irqreturn_t sym53c8xx_intr(int ir
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
 
+	/* Avoid spinloop trying to handle interrupts on frozen device */
+	if (np->s.io_state != pci_channel_io_normal)
+		return IRQ_HANDLED;
+
 	spin_lock_irqsave(np->s.host->host_lock, flags);
 	sym_interrupt(np);
 	spin_unlock_irqrestore(np->s.host->host_lock, flags);
@@ -789,6 +793,25 @@ static void sym_eh_done(struct scsi_cmnd
  */
 static void sym_eh_timeout(u_long p) { __sym_eh_done((struct scsi_cmnd *)p, 1); }
 
+static void sym_eeh_timeout(u_long p)
+{
+	struct sym_eh_wait *ep = (struct sym_eh_wait *) p;
+	if (!ep)
+		return;
+	complete(&ep->done);
+}
+
+static void sym_eeh_done(struct sym_eh_wait *ep)
+{
+	if (!ep)
+		return;
+	ep->timed_out = 0;
+	if (!del_timer(&ep->timer))
+		return;
+
+	complete(&ep->done);
+}
+
 /*
  *  Generic method for our eh processing.
  *  The 'op' argument tells what we have to do.
@@ -829,6 +852,35 @@ prepare:
 
 	/* Try to proceed the operation we have been asked for */
 	sts = -1;
+
+	/* We may be in an error condition because the PCI bus
+	 * went down. In this case, we need to wait until the
+	 * PCI bus is reset, the card is reset, and only then
+	 * proceed with the scsi error recovery.  We'll wait
+	 * for 15 seconds for this to happen.
+	 */
+#define WAIT_FOR_PCI_RECOVERY	15
+	if (np->s.io_state != pci_channel_io_normal) {
+		struct sym_eh_wait eeh, *eep = &eeh;
+		np->s.io_reset_wait = eep;
+		init_completion(&eep->done);
+		init_timer(&eep->timer);
+		eep->to_do = SYM_EH_DO_WAIT;
+		eep->timer.expires = jiffies + (WAIT_FOR_PCI_RECOVERY*HZ);
+		eep->timer.function = sym_eeh_timeout;
+		eep->timer.data = (u_long)eep;
+		eep->timed_out = 1;	/* Be pessimistic for once :) */
+		add_timer(&eep->timer);
+		spin_unlock_irq(np->s.host->host_lock);
+		wait_for_completion(&eep->done);
+		spin_lock_irq(np->s.host->host_lock);
+		if (eep->timed_out) {
+			printk (KERN_ERR "%s: Timed out waiting for PCI reset\n",
+			       sym_name(np));
+		}
+		np->s.io_reset_wait = NULL;
+	}
+
 	switch(op) {
 	case SYM_EH_ABORT:
 		sts = sym_abort_scsiio(np, cmd, 1);
@@ -1630,6 +1682,8 @@ static struct Scsi_Host * __devinit sym_
 	np->maxoffs	= dev->chip.offset_max;
 	np->maxburst	= dev->chip.burst_max;
 	np->myaddr	= dev->host_id;
+	np->s.io_state = pci_channel_io_normal;
+	np->s.io_reset_wait = NULL;
 
 	/*
 	 *  Edit its name.
@@ -1962,6 +2016,58 @@ static int sym_detach(struct sym_hcb *np
 	return 1;
 }
 
+/* ------------- PCI Error Recovery infrastructure -------------- */
+/** sym2_io_error_detected() is called when PCI error is detected */
+static pci_ers_result_t sym2_io_error_detected (struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	np->s.io_state = state;
+	// XXX If slot is permanently frozen, then what?
+	// Should we scsi_remove_host() maybe ??
+
+	/* Request a slot slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/** sym2_io_slot_reset is called when the pci bus has been reset.
+ *  Restart the card from scratch. */
+static pci_ers_result_t sym2_io_slot_reset (struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	printk (KERN_INFO "%s: recovering from a PCI slot reset\n",
+	    sym_name(np));
+
+	if (pci_enable_device(pdev))
+		printk (KERN_ERR "%s: device setup failed most egregiously\n",
+			    sym_name(np));
+
+	pci_set_master(pdev);
+	enable_irq (pdev->irq);
+
+	/* Perform host reset only on one instance of the card */
+	if (0 == PCI_FUNC (pdev->devfn))
+		sym_reset_scsi_bus(np, 0);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/** sym2_io_resume is called when the error recovery driver
+ *  tells us that its OK to resume normal operation.
+ */
+static void sym2_io_resume (struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	/* Perform device startup only once for this card. */
+	if (0 == PCI_FUNC (pdev->devfn))
+		sym_start_up (np, 1);
+
+	np->s.io_state = pci_channel_io_normal;
+	sym_eeh_done (np->s.io_reset_wait);
+}
+
 /*
  * Driver host template.
  */
@@ -2219,11 +2325,18 @@ static struct pci_device_id sym2_id_tabl
 
 MODULE_DEVICE_TABLE(pci, sym2_id_table);
 
+static struct pci_error_handlers sym2_err_handler = {
+	.error_detected = sym2_io_error_detected,
+	.slot_reset = sym2_io_slot_reset,
+	.resume = sym2_io_resume,
+};
+
 static struct pci_driver sym2_driver = {
 	.name		= NAME53C8XX,
 	.id_table	= sym2_id_table,
 	.probe		= sym2_probe,
 	.remove		= __devexit_p(sym2_remove),
+	.err_handler = &sym2_err_handler,
 };
 
 static int __init sym2_init(void)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.h b/drivers/scsi/sym53c8xx_2/sym_glue.h
index cc92d0c..1ccf0c5 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.h
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.h
@@ -180,6 +180,10 @@ struct sym_shcb {
 	char		chip_name[8];
 	struct pci_dev	*device;
 
+	/* pci bus i/o state; waiter for clearing of i/o state */
+	pci_channel_state_t io_state;
+	struct sym_eh_wait *io_reset_wait;
+
 	struct Scsi_Host *host;
 
 	void __iomem *	ioaddr;		/* MMIO kernel io address	*/
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index 8260f04..eec2feb 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -2761,6 +2761,7 @@ void sym_interrupt (struct sym_hcb *np)
 	u_char	istat, istatc;
 	u_char	dstat;
 	u_short	sist;
+	u_int    icnt;
 
 	/*
 	 *  interrupt on the fly ?
@@ -2802,6 +2803,7 @@ void sym_interrupt (struct sym_hcb *np)
 	sist	= 0;
 	dstat	= 0;
 	istatc	= istat;
+	icnt = 0;
 	do {
 		if (istatc & SIP)
 			sist  |= INW(np, nc_sist);
@@ -2809,6 +2811,19 @@ void sym_interrupt (struct sym_hcb *np)
 			dstat |= INB(np, nc_dstat);
 		istatc = INB(np, nc_istat);
 		istat |= istatc;
+
+		/* Prevent deadlock waiting on a condition that may never clear. */
+		/* XXX this is a temporary kludge; the correct to detect
+		 * a PCI bus error would be to use the io_check interfaces
+		 * proposed by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+		 * Problem with polling like that is the state flag might not
+		 * be set.
+		 */
+		icnt ++;
+		if (100 < icnt) {
+			if (np->s.device->error_state != pci_channel_io_normal)
+				return;
+		}
 	} while (istatc & (SIP|DIP));
 
 	if (DEBUG_FLAGS & DEBUG_TINY)



----- End forwarded message -----

^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [PATCH] PCI Error Recovery: Symbios SCSI device driver
@ 2006-02-02 20:15 Linas Vepstas
  0 siblings, 0 replies; 21+ messages in thread
From: Linas Vepstas @ 2006-02-02 20:15 UTC (permalink / raw)
  To: Matthew Wilcox; +Cc: linux-scsi


Hi Matthew,

Please review the patch below; if it looks good, please forward
upstream.  I beleive its been in the AKPM kernels for a while now.
A variant as also been shipping with SuSE SLES9 kernels for quite
a while. This version of the patch should have fixed up all of 
your previous comments/complaints. 

--linas

Formal description:

Various PCI bus errors can be signaled by newer PCI controllers.  This
patch adds the PCI error recovery callbacks to the Symbios SCSI device 
driver.  The patch has been tested, and appears to work well.

Signed-off-by: Linas Vepstas <linas@linas.org>

--

 sym_glue.c |  136 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 sym_glue.h |    4 +
 sym_hipd.c |   15 ++++++
 3 files changed, 155 insertions(+)

Index: linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_glue.c
===================================================================
--- linux-2.6.16-rc1-git5.orig/drivers/scsi/sym53c8xx_2/sym_glue.c	2006-02-01 17:09:16.000000000 -0600
+++ linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_glue.c	2006-02-02 14:07:41.088534411 -0600
@@ -716,6 +716,10 @@
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
 
+	/* Avoid spinloop trying to handle interrupts on frozen device */
+	if (np->s.io_state != pci_channel_io_normal)
+		return IRQ_HANDLED;
+
 	spin_lock_irqsave(np->s.host->host_lock, flags);
 	sym_interrupt(np);
 	spin_unlock_irqrestore(np->s.host->host_lock, flags);
@@ -789,6 +793,25 @@
  */
 static void sym_eh_timeout(u_long p) { __sym_eh_done((struct scsi_cmnd *)p, 1); }
 
+static void sym_eeh_timeout(unsigned long p)
+{
+	struct sym_eh_wait *ep = (struct sym_eh_wait *) p;
+	if (!ep)
+		return;
+	complete(&ep->done);
+}
+
+static void sym_eeh_done(struct sym_eh_wait *ep)
+{
+	if (!ep)
+		return;
+	ep->timed_out = 0;
+	if (!del_timer(&ep->timer))
+		return;
+
+	complete(&ep->done);
+}
+
 /*
  *  Generic method for our eh processing.
  *  The 'op' argument tells what we have to do.
@@ -829,6 +852,36 @@
 
 	/* Try to proceed the operation we have been asked for */
 	sts = -1;
+
+	/* We may be in an error condition because the PCI bus
+	 * went down. In this case, we need to wait until the
+	 * PCI bus is reset, the card is reset, and only then
+	 * proceed with the scsi error recovery.  We'll wait
+	 * for 15 seconds for this to happen.
+	 */
+#define WAIT_FOR_PCI_RECOVERY	15
+	if (np->s.io_state != pci_channel_io_normal) {
+		struct sym_eh_wait eeh, *eep = &eeh;
+		np->s.io_reset_wait = eep;
+		init_completion(&eep->done);
+		init_timer(&eep->timer);
+		eep->to_do = SYM_EH_DO_WAIT;
+		eep->timer.expires = jiffies + (WAIT_FOR_PCI_RECOVERY*HZ);
+		eep->timer.function = sym_eeh_timeout;
+		eep->timer.data = (u_long)eep;
+		eep->timed_out = 1;	/* Be pessimistic for once :) */
+		add_timer(&eep->timer);
+		spin_unlock_irq(np->s.host->host_lock);
+		wait_for_completion(&eep->done);
+		spin_lock_irq(np->s.host->host_lock);
+		if (eep->timed_out) {
+			printk (KERN_ERR 
+				"%s: Timed out waiting for PCI reset\n", 
+				sym_name(np));
+		}
+		np->s.io_reset_wait = NULL;
+	}
+
 	switch(op) {
 	case SYM_EH_ABORT:
 		sts = sym_abort_scsiio(np, cmd, 1);
@@ -1630,6 +1683,8 @@
 	np->maxoffs	= dev->chip.offset_max;
 	np->maxburst	= dev->chip.burst_max;
 	np->myaddr	= dev->host_id;
+	np->s.io_state = pci_channel_io_normal;
+	np->s.io_reset_wait = NULL;
 
 	/*
 	 *  Edit its name.
@@ -1962,6 +2017,80 @@
 	return 1;
 }
 
+/**
+ * sym2_io_error_detected() - PCI error is detected
+ *
+ * Description: This routine is called shrtly after the PCI error
+ * recovery subsystem has detected a PCI bus error. At this point,
+ * all further i/o to te adapter will be fruitless, so hold off i/o.
+ * Basically, just queue up i/o and wait for the bus reset to happen.
+ */
+static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev, 
+					                       enum pci_channel_state state)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	np->s.io_state = state;
+	/* If the reported state is "permanent failure", then
+	 * we should shut down the driver for good, and -EIO
+	 * all pending i/o requests. XXX Not implemented yet.
+	 * (Not sure how - should we scsi_remove_host() maybe ??)
+	 */
+
+	/* Request a slot slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ *  sym2_io_slot_reset - the pci bus has been reset.
+ *
+ *  Description: This routine is called after the PCI error
+ *  recovery system has completely reset the PCI slot. At
+ *  this point, I/O is possible, although the card has just
+ *  been reset, and is not yet initialized. A complete
+ *  restart is performed.
+ */
+static pci_ers_result_t sym2_io_slot_reset(struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	printk(KERN_INFO "%s: recovering from a PCI slot reset\n",
+	    sym_name(np));
+
+	if (pci_enable_device(pdev))
+		printk(KERN_ERR "%s: device setup failed most egregiously\n",
+			    sym_name(np));
+
+	pci_set_master(pdev);
+	enable_irq(pdev->irq);
+
+	/* Perform host reset only on one instance of the card */
+	if (PCI_FUNC(pdev->devfn) == 0)
+		sym_reset_scsi_bus(np, 0);
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ *  sym2_io_resume - pci error recovery completed.
+ *
+ *  Description: This routine is called when the PCI error
+ *  recovery has finished recovering this and all other
+ *  affects PCI cards. Normal I/O operations may resume.
+ *  Start handling any queued requests.
+ */
+static void sym2_io_resume(struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	/* Perform device startup only once for this card. */
+	if (PCI_FUNC(pdev->devfn) == 0)
+		sym_start_up(np, 1);
+
+	np->s.io_state = pci_channel_io_normal;
+	sym_eeh_done(np->s.io_reset_wait);
+}
+
 /*
  * Driver host template.
  */
@@ -2219,11 +2348,18 @@
 
 MODULE_DEVICE_TABLE(pci, sym2_id_table);
 
+static struct pci_error_handlers sym2_err_handler = {
+	.error_detected = sym2_io_error_detected,
+	.slot_reset = sym2_io_slot_reset,
+	.resume = sym2_io_resume,
+};
+
 static struct pci_driver sym2_driver = {
 	.name		= NAME53C8XX,
 	.id_table	= sym2_id_table,
 	.probe		= sym2_probe,
 	.remove		= __devexit_p(sym2_remove),
+	.err_handler = &sym2_err_handler,
 };
 
 static int __init sym2_init(void)
Index: linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_glue.h
===================================================================
--- linux-2.6.16-rc1-git5.orig/drivers/scsi/sym53c8xx_2/sym_glue.h	2006-02-01 17:09:16.000000000 -0600
+++ linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_glue.h	2006-02-02 13:33:47.459016635 -0600
@@ -180,6 +180,10 @@
 	char		chip_name[8];
 	struct pci_dev	*device;
 
+	/* pci bus i/o state; waiter for clearing of i/o state */
+	enum pci_channel_state io_state;
+	struct sym_eh_wait *io_reset_wait;
+
 	struct Scsi_Host *host;
 
 	void __iomem *	ioaddr;		/* MMIO kernel io address	*/
Index: linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_hipd.c
===================================================================
--- linux-2.6.16-rc1-git5.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c	2006-02-01 17:09:16.000000000 -0600
+++ linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_hipd.c	2006-02-02 14:05:38.973739591 -0600
@@ -2761,6 +2761,7 @@
 	u_char	istat, istatc;
 	u_char	dstat;
 	u_short	sist;
+	unsigned int    icnt;
 
 	/*
 	 *  interrupt on the fly ?
@@ -2802,6 +2803,7 @@
 	sist	= 0;
 	dstat	= 0;
 	istatc	= istat;
+	icnt = 0;
 	do {
 		if (istatc & SIP)
 			sist  |= INW(np, nc_sist);
@@ -2809,6 +2811,19 @@
 			dstat |= INB(np, nc_dstat);
 		istatc = INB(np, nc_istat);
 		istat |= istatc;
+		
+		/* 
+		 * Prevent deadlock waiting on a condition that may 
+		 * never clear. If the PCI bus has disconnected us,
+		 * we shouldn't poll, as all reads will return 0xffffffff.
+		 * If the flags aren't clearing, then check to see if
+		 * the bus is disconnected. If it is, punt.
+		 */
+		icnt ++;
+		if (100 < icnt) {
+			if (np->s.device->error_state != pci_channel_io_normal)
+				return;
+		}
 	} while (istatc & (SIP|DIP));
 
 	if (DEBUG_FLAGS & DEBUG_TINY)

^ permalink raw reply	[flat|nested] 21+ messages in thread
* [PATCH]: PCI Error Recovery: Symbios SCSI device driver
@ 2006-09-21 23:13 Linas Vepstas
  2006-09-22 22:06 ` Luca
  0 siblings, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2006-09-21 23:13 UTC (permalink / raw)
  To: matthew; +Cc: linux-scsi, linux-pci, linuxppc-dev, linux-kernel


Matthew, 

I hope this looks like a good patch.  Please review, apply and 
forward upstream.

--linas

Various PCI bus errors can be signaled by newer PCI controllers.  
This patch adds the PCI error recovery callbacks to the Symbios 
SCSI device driver.  The patch has been tested, and appears to 
work well.

Signed-off-by: Linas Vepstas <linas@ausin.ibm.com>

----
 drivers/scsi/sym53c8xx_2/sym_glue.c |   97 ++++++++++++++++++++++++++++++++++++
 drivers/scsi/sym53c8xx_2/sym_glue.h |    4 +
 drivers/scsi/sym53c8xx_2/sym_hipd.c |   10 +++
 3 files changed, 111 insertions(+)

Index: linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.c
===================================================================
--- linux-2.6.18-rc7-git1.orig/drivers/scsi/sym53c8xx_2/sym_glue.c	2006-09-21 17:32:54.000000000 -0500
+++ linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.c	2006-09-21 17:35:37.000000000 -0500
@@ -659,6 +659,11 @@ static irqreturn_t sym53c8xx_intr(int ir
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
 
+	/* Avoid spinloop trying to handle interrupts on frozen device */
+	if ((np->s.device->error_state != pci_channel_io_normal) &&
+	    (np->s.device->error_state != 0))
+		return IRQ_HANDLED;
+
 	spin_lock_irqsave(np->s.host->host_lock, flags);
 	sym_interrupt(np);
 	spin_unlock_irqrestore(np->s.host->host_lock, flags);
@@ -726,6 +731,19 @@ static int sym_eh_handler(int op, char *
 
 	dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
 
+	/* We may be in an error condition because the PCI bus
+	 * went down. In this case, we need to wait until the
+	 * PCI bus is reset, the card is reset, and only then
+	 * proceed with the scsi error recovery.  There's no
+	 * point in hurrying; take a leisurely wait.
+	 */
+#define WAIT_FOR_PCI_RECOVERY	35
+	if ((np->s.device->error_state != pci_channel_io_normal) &&
+	    (np->s.device->error_state != 0) &&
+		 (0 == wait_for_completion_timeout(&np->s.io_reset_wait,
+		                                 WAIT_FOR_PCI_RECOVERY*HZ)))
+			return SCSI_FAILED;
+
 	spin_lock_irq(host->host_lock);
 	/* This one is queued in some place -> to wait for completion */
 	FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -1510,6 +1528,7 @@ static struct Scsi_Host * __devinit sym_
 	np->maxoffs	= dev->chip.offset_max;
 	np->maxburst	= dev->chip.burst_max;
 	np->myaddr	= dev->host_id;
+	init_completion(&np->s.io_reset_wait);
 
 	/*
 	 *  Edit its name.
@@ -1948,6 +1967,77 @@ static void __devexit sym2_remove(struct
 	attach_count--;
 }
 
+/**
+ * sym2_io_error_detected() -- called when PCI error is detected
+ * @pdev: pointer to PCI device
+ * @state: current state of the PCI slot
+ */
+static pci_ers_result_t sym2_io_error_detected (struct pci_dev *pdev,
+                                         enum pci_channel_state state)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	/* If slot is permanently frozen, turn everything off */
+	if (state == pci_channel_io_perm_failure) {
+		sym2_remove(pdev);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	init_completion(&np->s.io_reset_wait);
+	disable_irq(pdev->irq);
+	pci_disable_device(pdev);
+
+	/* Request a slot slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * sym2_io_slot_reset() -- called when the pci bus has been reset.
+ * @pdev: pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t sym2_io_slot_reset (struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	printk (KERN_INFO "%s: recovering from a PCI slot reset\n",
+	    sym_name(np));
+
+	if (pci_enable_device(pdev))
+		printk (KERN_ERR "%s: device setup failed most egregiously\n",
+			    sym_name(np));
+
+	pci_set_master(pdev);
+	enable_irq (pdev->irq);
+
+	/* Perform host reset only on one instance of the card */
+	if (0 == PCI_FUNC (pdev->devfn)) {
+		if (sym_reset_scsi_bus(np, 0)) {
+		   printk(KERN_ERR "%s: Unable to reset scsi host controller\n",
+					          sym_name(np));
+			return PCI_ERS_RESULT_DISCONNECT;
+		}
+		sym_start_up (np, 1);
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * sym2_io_resume() -- resume normal ops after PCI reset
+ * @pdev: pointer to PCI device
+ *
+ * Called when the error recovery driver tells us that its
+ * OK to resume normal operation. Use completion to allow
+ * halted scsi ops to resume.
+ */
+static void sym2_io_resume (struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+	complete_all(&np->s.io_reset_wait);
+}
+
 static void sym2_get_signalling(struct Scsi_Host *shost)
 {
 	struct sym_hcb *np = sym_get_hcb(shost);
@@ -2110,11 +2200,18 @@ static struct pci_device_id sym2_id_tabl
 
 MODULE_DEVICE_TABLE(pci, sym2_id_table);
 
+static struct pci_error_handlers sym2_err_handler = {
+	.error_detected = sym2_io_error_detected,
+	.slot_reset = sym2_io_slot_reset,
+	.resume = sym2_io_resume,
+};
+
 static struct pci_driver sym2_driver = {
 	.name		= NAME53C8XX,
 	.id_table	= sym2_id_table,
 	.probe		= sym2_probe,
 	.remove		= __devexit_p(sym2_remove),
+	.err_handler = &sym2_err_handler,
 };
 
 static int __init sym2_init(void)
Index: linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.h
===================================================================
--- linux-2.6.18-rc7-git1.orig/drivers/scsi/sym53c8xx_2/sym_glue.h	2006-09-21 17:32:54.000000000 -0500
+++ linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.h	2006-09-21 17:35:37.000000000 -0500
@@ -40,6 +40,7 @@
 #ifndef SYM_GLUE_H
 #define SYM_GLUE_H
 
+#include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
@@ -179,6 +180,9 @@ struct sym_shcb {
 	char		chip_name[8];
 	struct pci_dev	*device;
 
+	/* Waiter for clearing of frozen PCI bus */
+	struct completion io_reset_wait;
+
 	struct Scsi_Host *host;
 
 	void __iomem *	ioaddr;		/* MMIO kernel io address	*/
Index: linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_hipd.c
===================================================================
--- linux-2.6.18-rc7-git1.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c	2006-09-21 17:32:54.000000000 -0500
+++ linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_hipd.c	2006-09-21 17:35:37.000000000 -0500
@@ -2761,6 +2761,7 @@ void sym_interrupt (struct sym_hcb *np)
 	u_char	istat, istatc;
 	u_char	dstat;
 	u_short	sist;
+	u_int    icnt;
 
 	/*
 	 *  interrupt on the fly ?
@@ -2802,6 +2803,7 @@ void sym_interrupt (struct sym_hcb *np)
 	sist	= 0;
 	dstat	= 0;
 	istatc	= istat;
+	icnt = 0;
 	do {
 		if (istatc & SIP)
 			sist  |= INW(np, nc_sist);
@@ -2809,6 +2811,14 @@ void sym_interrupt (struct sym_hcb *np)
 			dstat |= INB(np, nc_dstat);
 		istatc = INB(np, nc_istat);
 		istat |= istatc;
+
+		/* Prevent deadlock waiting on a condition that may never clear. */
+		icnt ++;
+		if (100 < icnt) {
+			if ((np->s.device->error_state != pci_channel_io_normal)
+			   && (np->s.device->error_state != 0))
+				return;
+		}
 	} while (istatc & (SIP|DIP));
 
 	if (DEBUG_FLAGS & DEBUG_TINY)

^ permalink raw reply	[flat|nested] 21+ messages in thread
* [PATCH]: PCI Error Recovery: Symbios SCSI device driver
@ 2006-10-20 18:05 Linas Vepstas
  2006-10-31 18:55 ` Matthew Wilcox
  0 siblings, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2006-10-20 18:05 UTC (permalink / raw)
  To: matthew; +Cc: linux-scsi, linux-pci, linux-kernel


Matthew, 

Please apply and forward upstream. This patch is nearly identical
to the one from a month earlier, except that it fixes some 
indentation problems, and shortens a few over-length lines.
This patch has been in an -mm tree since 29 Sept, and a variant
of it has been shipping for over a year in Novell SLES9, where
it has not received any complaints/defects/fixes/etc.

You had previously mentioned some objections to this patch, 
I assume these were about the bad indentation? Are there other 
concerns?

--linas


Various PCI bus errors can be signaled by newer PCI controllers.  
This patch adds the PCI error recovery callbacks to the Symbios 
SCSI device driver.  The patch has been tested, and appears to 
work well.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>

--
 drivers/scsi/sym53c8xx_2/sym_glue.c |   99 ++++++++++++++++++++++++++++++++++++
 drivers/scsi/sym53c8xx_2/sym_glue.h |    4 +
 drivers/scsi/sym53c8xx_2/sym_hipd.c |   10 +++
 3 files changed, 113 insertions(+)

Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.c
===================================================================
--- linux-2.6.19-rc1-git11.orig/drivers/scsi/sym53c8xx_2/sym_glue.c	2006-10-20 12:25:11.000000000 -0500
+++ linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.c	2006-10-20 12:41:15.000000000 -0500
@@ -659,6 +659,11 @@ static irqreturn_t sym53c8xx_intr(int ir
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
 
+	/* Avoid spinloop trying to handle interrupts on frozen device */
+	if ((np->s.device->error_state != pci_channel_io_normal) &&
+	    (np->s.device->error_state != 0))
+		return IRQ_HANDLED;
+
 	spin_lock_irqsave(np->s.host->host_lock, flags);
 	sym_interrupt(np);
 	spin_unlock_irqrestore(np->s.host->host_lock, flags);
@@ -726,6 +731,19 @@ static int sym_eh_handler(int op, char *
 
 	dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
 
+	/* We may be in an error condition because the PCI bus
+	 * went down. In this case, we need to wait until the
+	 * PCI bus is reset, the card is reset, and only then
+	 * proceed with the scsi error recovery.  There's no
+	 * point in hurrying; take a leisurely wait.
+	 */
+#define WAIT_FOR_PCI_RECOVERY	35
+	if ((np->s.device->error_state != pci_channel_io_normal) &&
+	    (np->s.device->error_state != 0) &&
+	    (wait_for_completion_timeout(&np->s.io_reset_wait,
+		                         WAIT_FOR_PCI_RECOVERY*HZ) == 0))
+			return SCSI_FAILED;
+
 	spin_lock_irq(host->host_lock);
 	/* This one is queued in some place -> to wait for completion */
 	FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -1510,6 +1528,7 @@ static struct Scsi_Host * __devinit sym_
 	np->maxoffs	= dev->chip.offset_max;
 	np->maxburst	= dev->chip.burst_max;
 	np->myaddr	= dev->host_id;
+	init_completion(&np->s.io_reset_wait);
 
 	/*
 	 *  Edit its name.
@@ -1948,6 +1967,79 @@ static void __devexit sym2_remove(struct
 	attach_count--;
 }
 
+/**
+ * sym2_io_error_detected() -- called when PCI error is detected
+ * @pdev: pointer to PCI device
+ * @state: current state of the PCI slot
+ */
+static pci_ers_result_t sym2_io_error_detected (struct pci_dev *pdev,
+                                         enum pci_channel_state state)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	/* If slot is permanently frozen, turn everything off */
+	if (state == pci_channel_io_perm_failure) {
+		sym2_remove(pdev);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	init_completion(&np->s.io_reset_wait);
+	disable_irq(pdev->irq);
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * sym2_io_slot_reset() -- called when the pci bus has been reset.
+ * @pdev: pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t sym2_io_slot_reset (struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	printk(KERN_INFO "%s: recovering from a PCI slot reset\n",
+	          sym_name(np));
+
+	if (pci_enable_device(pdev)) {
+		printk(KERN_ERR "%s: Unable to enable afer PCI reset\n",
+		        sym_name(np));
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_set_master(pdev);
+	enable_irq(pdev->irq);
+
+	/* Perform host reset only on one instance of the card */
+	if (PCI_FUNC (pdev->devfn) == 0) {
+		if (sym_reset_scsi_bus(np, 0)) {
+			printk(KERN_ERR "%s: Unable to reset scsi host\n",
+			        sym_name(np));
+			return PCI_ERS_RESULT_DISCONNECT;
+		}
+		sym_start_up(np, 1);
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * sym2_io_resume() -- resume normal ops after PCI reset
+ * @pdev: pointer to PCI device
+ *
+ * Called when the error recovery driver tells us that its
+ * OK to resume normal operation. Use completion to allow
+ * halted scsi ops to resume.
+ */
+static void sym2_io_resume (struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+	complete_all(&np->s.io_reset_wait);
+}
+
 static void sym2_get_signalling(struct Scsi_Host *shost)
 {
 	struct sym_hcb *np = sym_get_hcb(shost);
@@ -2110,11 +2202,18 @@ static struct pci_device_id sym2_id_tabl
 
 MODULE_DEVICE_TABLE(pci, sym2_id_table);
 
+static struct pci_error_handlers sym2_err_handler = {
+	.error_detected = sym2_io_error_detected,
+	.slot_reset = sym2_io_slot_reset,
+	.resume = sym2_io_resume,
+};
+
 static struct pci_driver sym2_driver = {
 	.name		= NAME53C8XX,
 	.id_table	= sym2_id_table,
 	.probe		= sym2_probe,
 	.remove		= __devexit_p(sym2_remove),
+	.err_handler 	= &sym2_err_handler,
 };
 
 static int __init sym2_init(void)
Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.h
===================================================================
--- linux-2.6.19-rc1-git11.orig/drivers/scsi/sym53c8xx_2/sym_glue.h	2006-10-20 12:25:11.000000000 -0500
+++ linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.h	2006-10-20 12:41:16.000000000 -0500
@@ -40,6 +40,7 @@
 #ifndef SYM_GLUE_H
 #define SYM_GLUE_H
 
+#include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
@@ -179,6 +180,9 @@ struct sym_shcb {
 	char		chip_name[8];
 	struct pci_dev	*device;
 
+	/* Waiter for clearing of frozen PCI bus */
+	struct completion io_reset_wait;
+
 	struct Scsi_Host *host;
 
 	void __iomem *	ioaddr;		/* MMIO kernel io address	*/
Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_hipd.c
===================================================================
--- linux-2.6.19-rc1-git11.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c	2006-10-20 12:25:11.000000000 -0500
+++ linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_hipd.c	2006-10-20 12:41:16.000000000 -0500
@@ -2761,6 +2761,7 @@ void sym_interrupt (struct sym_hcb *np)
 	u_char	istat, istatc;
 	u_char	dstat;
 	u_short	sist;
+	u_int    icnt;
 
 	/*
 	 *  interrupt on the fly ?
@@ -2802,6 +2803,7 @@ void sym_interrupt (struct sym_hcb *np)
 	sist	= 0;
 	dstat	= 0;
 	istatc	= istat;
+	icnt = 0;
 	do {
 		if (istatc & SIP)
 			sist  |= INW(np, nc_sist);
@@ -2809,6 +2811,14 @@ void sym_interrupt (struct sym_hcb *np)
 			dstat |= INB(np, nc_dstat);
 		istatc = INB(np, nc_istat);
 		istat |= istatc;
+
+		/* Prevent deadlock waiting on a condition that may never clear. */
+		icnt ++;
+		if (icnt > 100) {
+			if ((np->s.device->error_state != pci_channel_io_normal)
+			   && (np->s.device->error_state != 0))
+				return;
+		}
 	} while (istatc & (SIP|DIP));
 
 	if (DEBUG_FLAGS & DEBUG_TINY)

^ permalink raw reply	[flat|nested] 21+ messages in thread
* [PATCH]: PCI Error Recovery: Symbios SCSI device driver
@ 2007-07-02 18:39 Linas Vepstas
  2007-07-05 18:28 ` Andrew Morton
  0 siblings, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2007-07-02 18:39 UTC (permalink / raw)
  To: James.Bottomley, matthew, willy
  Cc: linux-scsi, Paul Mackerras, linuxppc-dev, linux-kernel, linux-pci,
	Andrew Morton


Various PCI bus errors can be signaled by newer PCI controllers.  
This patch adds the PCI error recovery callbacks to the Symbios 
SCSI device driver.  The patch has been tested, and appears to 
work well.

Signed-off-by: Linas Vepstas <linas@austin.ibm.com>

----

Hi,

This patch has been bouncing around for a long time, and has made
appearences in various -mm trees since 2.6.something-teen. However,
it has never made it into mainline, and I'm starting to get concerned
that it will miss 2.6.23 as well. 

There was some discussion, and I think I addressed all of the various
issues that came up. I'd really like to get this patch in, but am unclear
on exactly who to pester at this point. Matt Wilcox seems to be looking 
for a job (???) and I am unable to git-clone James Bottmley's 
git://kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
git tree; there's some error on the server side.

Linas.

 drivers/scsi/sym53c8xx_2/sym_glue.c |  136 ++++++++++++++++++++++++++++++++++++
 drivers/scsi/sym53c8xx_2/sym_glue.h |    4 +
 drivers/scsi/sym53c8xx_2/sym_hipd.c |    6 +
 3 files changed, 146 insertions(+)

Index: linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.c
===================================================================
--- linux-2.6.22-rc1.orig/drivers/scsi/sym53c8xx_2/sym_glue.c	2007-04-25 22:08:32.000000000 -0500
+++ linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.c	2007-05-14 17:31:44.000000000 -0500
@@ -657,6 +657,10 @@ static irqreturn_t sym53c8xx_intr(int ir
 	unsigned long flags;
 	struct sym_hcb *np = (struct sym_hcb *)dev_id;
 
+	/* Avoid spinloop trying to handle interrupts on frozen device */
+	if (pci_channel_offline(np->s.device))
+		return IRQ_HANDLED;
+
 	if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
 
 	spin_lock_irqsave(np->s.host->host_lock, flags);
@@ -726,6 +730,20 @@ static int sym_eh_handler(int op, char *
 
 	dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
 
+	/* We may be in an error condition because the PCI bus
+	 * went down. In this case, we need to wait until the
+	 * PCI bus is reset, the card is reset, and only then
+	 * proceed with the scsi error recovery.  There's no
+	 * point in hurrying; take a leisurely wait.
+	 */
+#define WAIT_FOR_PCI_RECOVERY	35
+	if (pci_channel_offline(np->s.device)) {
+		int finished_reset = wait_for_completion_timeout(
+			&np->s.io_reset_wait, WAIT_FOR_PCI_RECOVERY*HZ);
+		if (!finished_reset)
+			return SCSI_FAILED;
+	}
+
 	spin_lock_irq(host->host_lock);
 	/* This one is queued in some place -> to wait for completion */
 	FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -1510,6 +1528,7 @@ static struct Scsi_Host * __devinit sym_
 	np->maxoffs	= dev->chip.offset_max;
 	np->maxburst	= dev->chip.burst_max;
 	np->myaddr	= dev->host_id;
+	init_completion(&np->s.io_reset_wait);
 
 	/*
 	 *  Edit its name.
@@ -1948,6 +1967,116 @@ static void __devexit sym2_remove(struct
 	attach_count--;
 }
 
+/**
+ * sym2_io_error_detected() -- called when PCI error is detected
+ * @pdev: pointer to PCI device
+ * @state: current state of the PCI slot
+ */
+static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev,
+                                         enum pci_channel_state state)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	/* If slot is permanently frozen, turn everything off */
+	if (state == pci_channel_io_perm_failure) {
+		sym2_remove(pdev);
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	init_completion(&np->s.io_reset_wait);
+	disable_irq(pdev->irq);
+	pci_disable_device(pdev);
+
+	/* Request a slot reset. */
+	return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * sym2_reset_workarounds -- hardware-specific work-arounds
+ *
+ * This routine is similar to sym_set_workarounds(), except
+ * that, at this point, we already know that the device was
+ * succesfully intialized at least once before, and so most
+ * of the steps taken there are un-needed here.
+ */
+static void sym2_reset_workarounds(struct pci_dev *pdev)
+{
+	u_char revision;
+	u_short status_reg;
+	struct sym_chip *chip;
+
+	pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
+	chip = sym_lookup_chip_table(pdev->device, revision);
+
+	/* Work around for errant bit in 895A, in a fashion
+	 * similar to what is done in sym_set_workarounds().
+	 */
+	pci_read_config_word(pdev, PCI_STATUS, &status_reg);
+	if (!(chip->features & FE_66MHZ) && (status_reg & PCI_STATUS_66MHZ)) {
+	 	status_reg = PCI_STATUS_66MHZ;
+		pci_write_config_word(pdev, PCI_STATUS, status_reg);
+		pci_read_config_word(pdev, PCI_STATUS, &status_reg);
+	}
+}
+
+/**
+ * sym2_io_slot_reset() -- called when the pci bus has been reset.
+ * @pdev: pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t sym2_io_slot_reset(struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	printk(KERN_INFO "%s: recovering from a PCI slot reset\n",
+	          sym_name(np));
+
+	if (pci_enable_device(pdev)) {
+		printk(KERN_ERR "%s: Unable to enable afer PCI reset\n",
+		        sym_name(np));
+		return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	pci_set_master(pdev);
+	enable_irq(pdev->irq);
+
+	/* If the chip can do Memory Write Invalidate, enable it */
+	if (np->features & FE_WRIE) {
+		if (pci_set_mwi(pdev))
+				return PCI_ERS_RESULT_DISCONNECT;
+	}
+
+	/* Perform work-arounds, analogous to sym_set_workarounds() */
+	sym2_reset_workarounds(pdev);
+
+	/* Perform host reset only on one instance of the card */
+	if (PCI_FUNC (pdev->devfn) == 0) {
+		if (sym_reset_scsi_bus(np, 0)) {
+			printk(KERN_ERR "%s: Unable to reset scsi host\n",
+			        sym_name(np));
+			return PCI_ERS_RESULT_DISCONNECT;
+		}
+		sym_start_up(np, 1);
+	}
+
+	return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * sym2_io_resume() -- resume normal ops after PCI reset
+ * @pdev: pointer to PCI device
+ *
+ * Called when the error recovery driver tells us that its
+ * OK to resume normal operation. Use completion to allow
+ * halted scsi ops to resume.
+ */
+static void sym2_io_resume(struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+	complete_all(&np->s.io_reset_wait);
+}
+
 static void sym2_get_signalling(struct Scsi_Host *shost)
 {
 	struct sym_hcb *np = sym_get_hcb(shost);
@@ -2110,11 +2239,18 @@ static struct pci_device_id sym2_id_tabl
 
 MODULE_DEVICE_TABLE(pci, sym2_id_table);
 
+static struct pci_error_handlers sym2_err_handler = {
+	.error_detected = sym2_io_error_detected,
+	.slot_reset = sym2_io_slot_reset,
+	.resume = sym2_io_resume,
+};
+
 static struct pci_driver sym2_driver = {
 	.name		= NAME53C8XX,
 	.id_table	= sym2_id_table,
 	.probe		= sym2_probe,
 	.remove		= __devexit_p(sym2_remove),
+	.err_handler 	= &sym2_err_handler,
 };
 
 static int __init sym2_init(void)
Index: linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.h
===================================================================
--- linux-2.6.22-rc1.orig/drivers/scsi/sym53c8xx_2/sym_glue.h	2007-04-25 22:08:32.000000000 -0500
+++ linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.h	2007-05-14 17:31:44.000000000 -0500
@@ -40,6 +40,7 @@
 #ifndef SYM_GLUE_H
 #define SYM_GLUE_H
 
+#include <linux/completion.h>
 #include <linux/delay.h>
 #include <linux/ioport.h>
 #include <linux/pci.h>
@@ -179,6 +180,9 @@ struct sym_shcb {
 	char		chip_name[8];
 	struct pci_dev	*device;
 
+	/* Waiter for clearing of frozen PCI bus */
+	struct completion io_reset_wait;
+
 	struct Scsi_Host *host;
 
 	void __iomem *	ioaddr;		/* MMIO kernel io address	*/
Index: linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_hipd.c
===================================================================
--- linux-2.6.22-rc1.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c	2007-04-25 22:08:32.000000000 -0500
+++ linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_hipd.c	2007-05-14 17:31:44.000000000 -0500
@@ -2809,6 +2809,12 @@ void sym_interrupt (struct sym_hcb *np)
 			dstat |= INB(np, nc_dstat);
 		istatc = INB(np, nc_istat);
 		istat |= istatc;
+
+		/* Prevent deadlock waiting on a condition that may never clear. */
+		if (unlikely(sist == 0xffff && dstat == 0xff)) {
+			if (pci_channel_offline(np->s.device))
+				return;
+		}
 	} while (istatc & (SIP|DIP));
 
 	if (DEBUG_FLAGS & DEBUG_TINY)

^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2007-08-02 22:53 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-01-18 16:53 [PATCH] PCI Error Recovery: Symbios SCSI device driver linas
2006-01-18 17:07 ` Matthew Wilcox
2006-01-18 17:54   ` linas
  -- strict thread matches above, loose matches on Subject: below --
2006-02-02 20:15 Linas Vepstas
2006-09-21 23:13 [PATCH]: " Linas Vepstas
2006-09-22 22:06 ` Luca
2006-09-22 23:32   ` Linas Vepstas
2006-09-22 23:39     ` Randy.Dunlap
2006-09-22 23:50       ` Linas Vepstas
2006-09-23  0:57         ` Randy.Dunlap
2006-10-20 18:05 Linas Vepstas
2006-10-31 18:55 ` Matthew Wilcox
2006-10-31 19:24   ` James Bottomley
2006-10-31 22:26     ` Linas Vepstas
2006-10-31 23:13   ` Linas Vepstas
2006-11-02  4:46     ` Grant Grundler
2006-11-02  4:56       ` Matthew Wilcox
2007-07-02 18:39 Linas Vepstas
2007-07-05 18:28 ` Andrew Morton
2007-07-05 18:54   ` Matthew Wilcox
2007-08-02 22:53     ` Linas Vepstas

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).