* [PATCH] PCI Error Recovery: Symbios SCSI device driver
@ 2006-01-18 16:53 linas
2006-01-18 17:07 ` Matthew Wilcox
0 siblings, 1 reply; 21+ messages in thread
From: linas @ 2006-01-18 16:53 UTC (permalink / raw)
To: James.Bottomley; +Cc: brking, gregkh, linux-scsi
Hi James,
Please review the patch below, and forward upstream. I've been
bouncing it to various mailing lists for the last half-year,
it has been living in an -mm tree for a while, and was a part of
GregKH's patchset for a while. I'd like to get it propely upstream.
The general description of the principles at work here are given in
Documentation/pci-error-recovery.txt
Thanks,
--linas
----- Forwarded message from Greg KH <gregkh@suse.de> -----
Cc: linas@austin.ibm.com
Subject: [PATCH] PCI Error Recovery: Symbios SCSI device driver
Reply-To: Greg K-H <greg@kroah.com>
To: linux-pci@atrey.karlin.mff.cuni.cz
From: Greg KH <gregkh@suse.de>
[PATCH] PCI Error Recovery: Symbios SCSI device driver
Various PCI bus errors can be signaled by newer PCI controllers. This
patch adds the PCI error recovery callbacks to the Symbios SCSI device driver.
The patch has been tested, and appears to work well.
Signed-off-by: Linas Vepstas <linas@linas.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
commit d78cde68ab78766c3a175466aa8adcbdc5520963
tree 836445f183f96049046d2da633c45e04df53b775
parent 3c06ba2cdbd37f80ae6dc044d9e305f0dd0ad6dd
author linas <linas@austin.ibm.com> Fri, 18 Nov 2005 16:23:04 -0600
committer Greg Kroah-Hartman <gregkh@suse.de> Thu, 05 Jan 2006 21:54:54 -0800
drivers/scsi/sym53c8xx_2/sym_glue.c | 113 +++++++++++++++++++++++++++++++++++
drivers/scsi/sym53c8xx_2/sym_glue.h | 4 +
drivers/scsi/sym53c8xx_2/sym_hipd.c | 15 +++++
3 files changed, 132 insertions(+), 0 deletions(-)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 1fffd2b..19ca4ed 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -716,6 +716,10 @@ static irqreturn_t sym53c8xx_intr(int ir
if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
+ /* Avoid spinloop trying to handle interrupts on frozen device */
+ if (np->s.io_state != pci_channel_io_normal)
+ return IRQ_HANDLED;
+
spin_lock_irqsave(np->s.host->host_lock, flags);
sym_interrupt(np);
spin_unlock_irqrestore(np->s.host->host_lock, flags);
@@ -789,6 +793,25 @@ static void sym_eh_done(struct scsi_cmnd
*/
static void sym_eh_timeout(u_long p) { __sym_eh_done((struct scsi_cmnd *)p, 1); }
+static void sym_eeh_timeout(u_long p)
+{
+ struct sym_eh_wait *ep = (struct sym_eh_wait *) p;
+ if (!ep)
+ return;
+ complete(&ep->done);
+}
+
+static void sym_eeh_done(struct sym_eh_wait *ep)
+{
+ if (!ep)
+ return;
+ ep->timed_out = 0;
+ if (!del_timer(&ep->timer))
+ return;
+
+ complete(&ep->done);
+}
+
/*
* Generic method for our eh processing.
* The 'op' argument tells what we have to do.
@@ -829,6 +852,35 @@ prepare:
/* Try to proceed the operation we have been asked for */
sts = -1;
+
+ /* We may be in an error condition because the PCI bus
+ * went down. In this case, we need to wait until the
+ * PCI bus is reset, the card is reset, and only then
+ * proceed with the scsi error recovery. We'll wait
+ * for 15 seconds for this to happen.
+ */
+#define WAIT_FOR_PCI_RECOVERY 15
+ if (np->s.io_state != pci_channel_io_normal) {
+ struct sym_eh_wait eeh, *eep = &eeh;
+ np->s.io_reset_wait = eep;
+ init_completion(&eep->done);
+ init_timer(&eep->timer);
+ eep->to_do = SYM_EH_DO_WAIT;
+ eep->timer.expires = jiffies + (WAIT_FOR_PCI_RECOVERY*HZ);
+ eep->timer.function = sym_eeh_timeout;
+ eep->timer.data = (u_long)eep;
+ eep->timed_out = 1; /* Be pessimistic for once :) */
+ add_timer(&eep->timer);
+ spin_unlock_irq(np->s.host->host_lock);
+ wait_for_completion(&eep->done);
+ spin_lock_irq(np->s.host->host_lock);
+ if (eep->timed_out) {
+ printk (KERN_ERR "%s: Timed out waiting for PCI reset\n",
+ sym_name(np));
+ }
+ np->s.io_reset_wait = NULL;
+ }
+
switch(op) {
case SYM_EH_ABORT:
sts = sym_abort_scsiio(np, cmd, 1);
@@ -1630,6 +1682,8 @@ static struct Scsi_Host * __devinit sym_
np->maxoffs = dev->chip.offset_max;
np->maxburst = dev->chip.burst_max;
np->myaddr = dev->host_id;
+ np->s.io_state = pci_channel_io_normal;
+ np->s.io_reset_wait = NULL;
/*
* Edit its name.
@@ -1962,6 +2016,58 @@ static int sym_detach(struct sym_hcb *np
return 1;
}
+/* ------------- PCI Error Recovery infrastructure -------------- */
+/** sym2_io_error_detected() is called when PCI error is detected */
+static pci_ers_result_t sym2_io_error_detected (struct pci_dev *pdev, pci_channel_state_t state)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ np->s.io_state = state;
+ // XXX If slot is permanently frozen, then what?
+ // Should we scsi_remove_host() maybe ??
+
+ /* Request a slot slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/** sym2_io_slot_reset is called when the pci bus has been reset.
+ * Restart the card from scratch. */
+static pci_ers_result_t sym2_io_slot_reset (struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ printk (KERN_INFO "%s: recovering from a PCI slot reset\n",
+ sym_name(np));
+
+ if (pci_enable_device(pdev))
+ printk (KERN_ERR "%s: device setup failed most egregiously\n",
+ sym_name(np));
+
+ pci_set_master(pdev);
+ enable_irq (pdev->irq);
+
+ /* Perform host reset only on one instance of the card */
+ if (0 == PCI_FUNC (pdev->devfn))
+ sym_reset_scsi_bus(np, 0);
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+/** sym2_io_resume is called when the error recovery driver
+ * tells us that its OK to resume normal operation.
+ */
+static void sym2_io_resume (struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ /* Perform device startup only once for this card. */
+ if (0 == PCI_FUNC (pdev->devfn))
+ sym_start_up (np, 1);
+
+ np->s.io_state = pci_channel_io_normal;
+ sym_eeh_done (np->s.io_reset_wait);
+}
+
/*
* Driver host template.
*/
@@ -2219,11 +2325,18 @@ static struct pci_device_id sym2_id_tabl
MODULE_DEVICE_TABLE(pci, sym2_id_table);
+static struct pci_error_handlers sym2_err_handler = {
+ .error_detected = sym2_io_error_detected,
+ .slot_reset = sym2_io_slot_reset,
+ .resume = sym2_io_resume,
+};
+
static struct pci_driver sym2_driver = {
.name = NAME53C8XX,
.id_table = sym2_id_table,
.probe = sym2_probe,
.remove = __devexit_p(sym2_remove),
+ .err_handler = &sym2_err_handler,
};
static int __init sym2_init(void)
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.h b/drivers/scsi/sym53c8xx_2/sym_glue.h
index cc92d0c..1ccf0c5 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.h
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.h
@@ -180,6 +180,10 @@ struct sym_shcb {
char chip_name[8];
struct pci_dev *device;
+ /* pci bus i/o state; waiter for clearing of i/o state */
+ pci_channel_state_t io_state;
+ struct sym_eh_wait *io_reset_wait;
+
struct Scsi_Host *host;
void __iomem * ioaddr; /* MMIO kernel io address */
diff --git a/drivers/scsi/sym53c8xx_2/sym_hipd.c b/drivers/scsi/sym53c8xx_2/sym_hipd.c
index 8260f04..eec2feb 100644
--- a/drivers/scsi/sym53c8xx_2/sym_hipd.c
+++ b/drivers/scsi/sym53c8xx_2/sym_hipd.c
@@ -2761,6 +2761,7 @@ void sym_interrupt (struct sym_hcb *np)
u_char istat, istatc;
u_char dstat;
u_short sist;
+ u_int icnt;
/*
* interrupt on the fly ?
@@ -2802,6 +2803,7 @@ void sym_interrupt (struct sym_hcb *np)
sist = 0;
dstat = 0;
istatc = istat;
+ icnt = 0;
do {
if (istatc & SIP)
sist |= INW(np, nc_sist);
@@ -2809,6 +2811,19 @@ void sym_interrupt (struct sym_hcb *np)
dstat |= INB(np, nc_dstat);
istatc = INB(np, nc_istat);
istat |= istatc;
+
+ /* Prevent deadlock waiting on a condition that may never clear. */
+ /* XXX this is a temporary kludge; the correct to detect
+ * a PCI bus error would be to use the io_check interfaces
+ * proposed by Hidetoshi Seto <seto.hidetoshi@jp.fujitsu.com>
+ * Problem with polling like that is the state flag might not
+ * be set.
+ */
+ icnt ++;
+ if (100 < icnt) {
+ if (np->s.device->error_state != pci_channel_io_normal)
+ return;
+ }
} while (istatc & (SIP|DIP));
if (DEBUG_FLAGS & DEBUG_TINY)
----- End forwarded message -----
^ permalink raw reply related [flat|nested] 21+ messages in thread
* Re: [PATCH] PCI Error Recovery: Symbios SCSI device driver
2006-01-18 16:53 linas
@ 2006-01-18 17:07 ` Matthew Wilcox
2006-01-18 17:54 ` linas
0 siblings, 1 reply; 21+ messages in thread
From: Matthew Wilcox @ 2006-01-18 17:07 UTC (permalink / raw)
To: linas; +Cc: James.Bottomley, brking, gregkh, linux-scsi
On Wed, Jan 18, 2006 at 10:53:45AM -0600, linas wrote:
> Hi James,
Actually, I'm the sym2 maintainer.
> +static void sym_eeh_timeout(u_long p)
Please don't propagate more uses of this u_long crap. I haven't got
rid of all of them, but there's no need to add more.
> @@ -1630,6 +1682,8 @@ static struct Scsi_Host * __devinit sym_
> np->maxoffs = dev->chip.offset_max;
> np->maxburst = dev->chip.burst_max;
> np->myaddr = dev->host_id;
> + np->s.io_state = pci_channel_io_normal;
> + np->s.io_reset_wait = NULL;
Is there a good reason to put them in the 's'? It's really a dirty
hack.
> +/* ------------- PCI Error Recovery infrastructure -------------- */
> +/** sym2_io_error_detected() is called when PCI error is detected */
> +static pci_ers_result_t sym2_io_error_detected (struct pci_dev *pdev, pci_channel_state_t state)
> +{
> + struct sym_hcb *np = pci_get_drvdata(pdev);
> +
> + np->s.io_state = state;
> + // XXX If slot is permanently frozen, then what?
> + // Should we scsi_remove_host() maybe ??
> +
> + /* Request a slot slot reset. */
> + return PCI_ERS_RESULT_NEED_RESET;
> +}
Don't use C99 comments, don't put a space between the function name and
the opening bracket, and don't exceed 80 columns.
> +/** sym2_io_slot_reset is called when the pci bus has been reset.
> + * Restart the card from scratch. */
kerneldoc style comment, but not in kerneldoc format.
> + /* Perform host reset only on one instance of the card */
> + if (0 == PCI_FUNC (pdev->devfn))
I really hate "if (constant == variable)".
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH] PCI Error Recovery: Symbios SCSI device driver
2006-01-18 17:07 ` Matthew Wilcox
@ 2006-01-18 17:54 ` linas
0 siblings, 0 replies; 21+ messages in thread
From: linas @ 2006-01-18 17:54 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: James.Bottomley, brking, gregkh, linux-scsi
On Wed, Jan 18, 2006 at 10:07:16AM -0700, Matthew Wilcox was heard to remark:
> On Wed, Jan 18, 2006 at 10:53:45AM -0600, linas wrote:
> > Hi James,
>
> Actually, I'm the sym2 maintainer.
Ah, sorry, then.
> > +static void sym_eeh_timeout(u_long p)
>
> Please don't propagate more uses of this u_long crap. I haven't got
> rid of all of them, but there's no need to add more.
I was trying to follow the coding style as currently found in
that file; the other routines of this type use u_long. Should
I use u_int instead? or unsigned long?
> > @@ -1630,6 +1682,8 @@ static struct Scsi_Host * __devinit sym_
> > np->maxoffs = dev->chip.offset_max;
> > np->maxburst = dev->chip.burst_max;
> > np->myaddr = dev->host_id;
> > + np->s.io_state = pci_channel_io_normal;
> > + np->s.io_reset_wait = NULL;
>
> Is there a good reason to put them in the 's'? It's really a dirty
> hack.
The struct sym_shcb is defined in a file "sym_glue.h" which seems to
contain linux-kernel specific code. The comments before this struct
state that its a "System specific host data structure."
By contrast, struct sym_hcb is defined in "sym_hipd.h", which appears
to have no Linux-specific structs in it. Thus, I assumed that it
should go into the glue struct rather than the main struct. I can
of course add it to sym_hcb if that is a better location.
> I really hate "if (constant == variable)".
Sorry, I'll fix it. I find it easier on the eyes to read that way.
Old habit. (It has the marginal benefit of flagging as a compiler
error any accidental drop of an =, such as if (variable = constant)
which is usually not what the coder intended).
I'll fix the other bits you mentioned as well.
--linas
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH] PCI Error Recovery: Symbios SCSI device driver
@ 2006-02-02 20:15 Linas Vepstas
0 siblings, 0 replies; 21+ messages in thread
From: Linas Vepstas @ 2006-02-02 20:15 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-scsi
Hi Matthew,
Please review the patch below; if it looks good, please forward
upstream. I beleive its been in the AKPM kernels for a while now.
A variant as also been shipping with SuSE SLES9 kernels for quite
a while. This version of the patch should have fixed up all of
your previous comments/complaints.
--linas
Formal description:
Various PCI bus errors can be signaled by newer PCI controllers. This
patch adds the PCI error recovery callbacks to the Symbios SCSI device
driver. The patch has been tested, and appears to work well.
Signed-off-by: Linas Vepstas <linas@linas.org>
--
sym_glue.c | 136 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
sym_glue.h | 4 +
sym_hipd.c | 15 ++++++
3 files changed, 155 insertions(+)
Index: linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_glue.c
===================================================================
--- linux-2.6.16-rc1-git5.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-02-01 17:09:16.000000000 -0600
+++ linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-02-02 14:07:41.088534411 -0600
@@ -716,6 +716,10 @@
if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
+ /* Avoid spinloop trying to handle interrupts on frozen device */
+ if (np->s.io_state != pci_channel_io_normal)
+ return IRQ_HANDLED;
+
spin_lock_irqsave(np->s.host->host_lock, flags);
sym_interrupt(np);
spin_unlock_irqrestore(np->s.host->host_lock, flags);
@@ -789,6 +793,25 @@
*/
static void sym_eh_timeout(u_long p) { __sym_eh_done((struct scsi_cmnd *)p, 1); }
+static void sym_eeh_timeout(unsigned long p)
+{
+ struct sym_eh_wait *ep = (struct sym_eh_wait *) p;
+ if (!ep)
+ return;
+ complete(&ep->done);
+}
+
+static void sym_eeh_done(struct sym_eh_wait *ep)
+{
+ if (!ep)
+ return;
+ ep->timed_out = 0;
+ if (!del_timer(&ep->timer))
+ return;
+
+ complete(&ep->done);
+}
+
/*
* Generic method for our eh processing.
* The 'op' argument tells what we have to do.
@@ -829,6 +852,36 @@
/* Try to proceed the operation we have been asked for */
sts = -1;
+
+ /* We may be in an error condition because the PCI bus
+ * went down. In this case, we need to wait until the
+ * PCI bus is reset, the card is reset, and only then
+ * proceed with the scsi error recovery. We'll wait
+ * for 15 seconds for this to happen.
+ */
+#define WAIT_FOR_PCI_RECOVERY 15
+ if (np->s.io_state != pci_channel_io_normal) {
+ struct sym_eh_wait eeh, *eep = &eeh;
+ np->s.io_reset_wait = eep;
+ init_completion(&eep->done);
+ init_timer(&eep->timer);
+ eep->to_do = SYM_EH_DO_WAIT;
+ eep->timer.expires = jiffies + (WAIT_FOR_PCI_RECOVERY*HZ);
+ eep->timer.function = sym_eeh_timeout;
+ eep->timer.data = (u_long)eep;
+ eep->timed_out = 1; /* Be pessimistic for once :) */
+ add_timer(&eep->timer);
+ spin_unlock_irq(np->s.host->host_lock);
+ wait_for_completion(&eep->done);
+ spin_lock_irq(np->s.host->host_lock);
+ if (eep->timed_out) {
+ printk (KERN_ERR
+ "%s: Timed out waiting for PCI reset\n",
+ sym_name(np));
+ }
+ np->s.io_reset_wait = NULL;
+ }
+
switch(op) {
case SYM_EH_ABORT:
sts = sym_abort_scsiio(np, cmd, 1);
@@ -1630,6 +1683,8 @@
np->maxoffs = dev->chip.offset_max;
np->maxburst = dev->chip.burst_max;
np->myaddr = dev->host_id;
+ np->s.io_state = pci_channel_io_normal;
+ np->s.io_reset_wait = NULL;
/*
* Edit its name.
@@ -1962,6 +2017,80 @@
return 1;
}
+/**
+ * sym2_io_error_detected() - PCI error is detected
+ *
+ * Description: This routine is called shrtly after the PCI error
+ * recovery subsystem has detected a PCI bus error. At this point,
+ * all further i/o to te adapter will be fruitless, so hold off i/o.
+ * Basically, just queue up i/o and wait for the bus reset to happen.
+ */
+static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev,
+ enum pci_channel_state state)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ np->s.io_state = state;
+ /* If the reported state is "permanent failure", then
+ * we should shut down the driver for good, and -EIO
+ * all pending i/o requests. XXX Not implemented yet.
+ * (Not sure how - should we scsi_remove_host() maybe ??)
+ */
+
+ /* Request a slot slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * sym2_io_slot_reset - the pci bus has been reset.
+ *
+ * Description: This routine is called after the PCI error
+ * recovery system has completely reset the PCI slot. At
+ * this point, I/O is possible, although the card has just
+ * been reset, and is not yet initialized. A complete
+ * restart is performed.
+ */
+static pci_ers_result_t sym2_io_slot_reset(struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ printk(KERN_INFO "%s: recovering from a PCI slot reset\n",
+ sym_name(np));
+
+ if (pci_enable_device(pdev))
+ printk(KERN_ERR "%s: device setup failed most egregiously\n",
+ sym_name(np));
+
+ pci_set_master(pdev);
+ enable_irq(pdev->irq);
+
+ /* Perform host reset only on one instance of the card */
+ if (PCI_FUNC(pdev->devfn) == 0)
+ sym_reset_scsi_bus(np, 0);
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * sym2_io_resume - pci error recovery completed.
+ *
+ * Description: This routine is called when the PCI error
+ * recovery has finished recovering this and all other
+ * affects PCI cards. Normal I/O operations may resume.
+ * Start handling any queued requests.
+ */
+static void sym2_io_resume(struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ /* Perform device startup only once for this card. */
+ if (PCI_FUNC(pdev->devfn) == 0)
+ sym_start_up(np, 1);
+
+ np->s.io_state = pci_channel_io_normal;
+ sym_eeh_done(np->s.io_reset_wait);
+}
+
/*
* Driver host template.
*/
@@ -2219,11 +2348,18 @@
MODULE_DEVICE_TABLE(pci, sym2_id_table);
+static struct pci_error_handlers sym2_err_handler = {
+ .error_detected = sym2_io_error_detected,
+ .slot_reset = sym2_io_slot_reset,
+ .resume = sym2_io_resume,
+};
+
static struct pci_driver sym2_driver = {
.name = NAME53C8XX,
.id_table = sym2_id_table,
.probe = sym2_probe,
.remove = __devexit_p(sym2_remove),
+ .err_handler = &sym2_err_handler,
};
static int __init sym2_init(void)
Index: linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_glue.h
===================================================================
--- linux-2.6.16-rc1-git5.orig/drivers/scsi/sym53c8xx_2/sym_glue.h 2006-02-01 17:09:16.000000000 -0600
+++ linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_glue.h 2006-02-02 13:33:47.459016635 -0600
@@ -180,6 +180,10 @@
char chip_name[8];
struct pci_dev *device;
+ /* pci bus i/o state; waiter for clearing of i/o state */
+ enum pci_channel_state io_state;
+ struct sym_eh_wait *io_reset_wait;
+
struct Scsi_Host *host;
void __iomem * ioaddr; /* MMIO kernel io address */
Index: linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_hipd.c
===================================================================
--- linux-2.6.16-rc1-git5.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c 2006-02-01 17:09:16.000000000 -0600
+++ linux-2.6.16-rc1-git5/drivers/scsi/sym53c8xx_2/sym_hipd.c 2006-02-02 14:05:38.973739591 -0600
@@ -2761,6 +2761,7 @@
u_char istat, istatc;
u_char dstat;
u_short sist;
+ unsigned int icnt;
/*
* interrupt on the fly ?
@@ -2802,6 +2803,7 @@
sist = 0;
dstat = 0;
istatc = istat;
+ icnt = 0;
do {
if (istatc & SIP)
sist |= INW(np, nc_sist);
@@ -2809,6 +2811,19 @@
dstat |= INB(np, nc_dstat);
istatc = INB(np, nc_istat);
istat |= istatc;
+
+ /*
+ * Prevent deadlock waiting on a condition that may
+ * never clear. If the PCI bus has disconnected us,
+ * we shouldn't poll, as all reads will return 0xffffffff.
+ * If the flags aren't clearing, then check to see if
+ * the bus is disconnected. If it is, punt.
+ */
+ icnt ++;
+ if (100 < icnt) {
+ if (np->s.device->error_state != pci_channel_io_normal)
+ return;
+ }
} while (istatc & (SIP|DIP));
if (DEBUG_FLAGS & DEBUG_TINY)
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH]: PCI Error Recovery: Symbios SCSI device driver
@ 2006-09-21 23:13 Linas Vepstas
2006-09-22 22:06 ` Luca
0 siblings, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2006-09-21 23:13 UTC (permalink / raw)
To: matthew; +Cc: linux-scsi, linux-pci, linuxppc-dev, linux-kernel
Matthew,
I hope this looks like a good patch. Please review, apply and
forward upstream.
--linas
Various PCI bus errors can be signaled by newer PCI controllers.
This patch adds the PCI error recovery callbacks to the Symbios
SCSI device driver. The patch has been tested, and appears to
work well.
Signed-off-by: Linas Vepstas <linas@ausin.ibm.com>
----
drivers/scsi/sym53c8xx_2/sym_glue.c | 97 ++++++++++++++++++++++++++++++++++++
drivers/scsi/sym53c8xx_2/sym_glue.h | 4 +
drivers/scsi/sym53c8xx_2/sym_hipd.c | 10 +++
3 files changed, 111 insertions(+)
Index: linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.c
===================================================================
--- linux-2.6.18-rc7-git1.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-09-21 17:32:54.000000000 -0500
+++ linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-09-21 17:35:37.000000000 -0500
@@ -659,6 +659,11 @@ static irqreturn_t sym53c8xx_intr(int ir
if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
+ /* Avoid spinloop trying to handle interrupts on frozen device */
+ if ((np->s.device->error_state != pci_channel_io_normal) &&
+ (np->s.device->error_state != 0))
+ return IRQ_HANDLED;
+
spin_lock_irqsave(np->s.host->host_lock, flags);
sym_interrupt(np);
spin_unlock_irqrestore(np->s.host->host_lock, flags);
@@ -726,6 +731,19 @@ static int sym_eh_handler(int op, char *
dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
+ /* We may be in an error condition because the PCI bus
+ * went down. In this case, we need to wait until the
+ * PCI bus is reset, the card is reset, and only then
+ * proceed with the scsi error recovery. There's no
+ * point in hurrying; take a leisurely wait.
+ */
+#define WAIT_FOR_PCI_RECOVERY 35
+ if ((np->s.device->error_state != pci_channel_io_normal) &&
+ (np->s.device->error_state != 0) &&
+ (0 == wait_for_completion_timeout(&np->s.io_reset_wait,
+ WAIT_FOR_PCI_RECOVERY*HZ)))
+ return SCSI_FAILED;
+
spin_lock_irq(host->host_lock);
/* This one is queued in some place -> to wait for completion */
FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -1510,6 +1528,7 @@ static struct Scsi_Host * __devinit sym_
np->maxoffs = dev->chip.offset_max;
np->maxburst = dev->chip.burst_max;
np->myaddr = dev->host_id;
+ init_completion(&np->s.io_reset_wait);
/*
* Edit its name.
@@ -1948,6 +1967,77 @@ static void __devexit sym2_remove(struct
attach_count--;
}
+/**
+ * sym2_io_error_detected() -- called when PCI error is detected
+ * @pdev: pointer to PCI device
+ * @state: current state of the PCI slot
+ */
+static pci_ers_result_t sym2_io_error_detected (struct pci_dev *pdev,
+ enum pci_channel_state state)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ /* If slot is permanently frozen, turn everything off */
+ if (state == pci_channel_io_perm_failure) {
+ sym2_remove(pdev);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ init_completion(&np->s.io_reset_wait);
+ disable_irq(pdev->irq);
+ pci_disable_device(pdev);
+
+ /* Request a slot slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * sym2_io_slot_reset() -- called when the pci bus has been reset.
+ * @pdev: pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t sym2_io_slot_reset (struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ printk (KERN_INFO "%s: recovering from a PCI slot reset\n",
+ sym_name(np));
+
+ if (pci_enable_device(pdev))
+ printk (KERN_ERR "%s: device setup failed most egregiously\n",
+ sym_name(np));
+
+ pci_set_master(pdev);
+ enable_irq (pdev->irq);
+
+ /* Perform host reset only on one instance of the card */
+ if (0 == PCI_FUNC (pdev->devfn)) {
+ if (sym_reset_scsi_bus(np, 0)) {
+ printk(KERN_ERR "%s: Unable to reset scsi host controller\n",
+ sym_name(np));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ sym_start_up (np, 1);
+ }
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * sym2_io_resume() -- resume normal ops after PCI reset
+ * @pdev: pointer to PCI device
+ *
+ * Called when the error recovery driver tells us that its
+ * OK to resume normal operation. Use completion to allow
+ * halted scsi ops to resume.
+ */
+static void sym2_io_resume (struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+ complete_all(&np->s.io_reset_wait);
+}
+
static void sym2_get_signalling(struct Scsi_Host *shost)
{
struct sym_hcb *np = sym_get_hcb(shost);
@@ -2110,11 +2200,18 @@ static struct pci_device_id sym2_id_tabl
MODULE_DEVICE_TABLE(pci, sym2_id_table);
+static struct pci_error_handlers sym2_err_handler = {
+ .error_detected = sym2_io_error_detected,
+ .slot_reset = sym2_io_slot_reset,
+ .resume = sym2_io_resume,
+};
+
static struct pci_driver sym2_driver = {
.name = NAME53C8XX,
.id_table = sym2_id_table,
.probe = sym2_probe,
.remove = __devexit_p(sym2_remove),
+ .err_handler = &sym2_err_handler,
};
static int __init sym2_init(void)
Index: linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.h
===================================================================
--- linux-2.6.18-rc7-git1.orig/drivers/scsi/sym53c8xx_2/sym_glue.h 2006-09-21 17:32:54.000000000 -0500
+++ linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.h 2006-09-21 17:35:37.000000000 -0500
@@ -40,6 +40,7 @@
#ifndef SYM_GLUE_H
#define SYM_GLUE_H
+#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/ioport.h>
#include <linux/pci.h>
@@ -179,6 +180,9 @@ struct sym_shcb {
char chip_name[8];
struct pci_dev *device;
+ /* Waiter for clearing of frozen PCI bus */
+ struct completion io_reset_wait;
+
struct Scsi_Host *host;
void __iomem * ioaddr; /* MMIO kernel io address */
Index: linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_hipd.c
===================================================================
--- linux-2.6.18-rc7-git1.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c 2006-09-21 17:32:54.000000000 -0500
+++ linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_hipd.c 2006-09-21 17:35:37.000000000 -0500
@@ -2761,6 +2761,7 @@ void sym_interrupt (struct sym_hcb *np)
u_char istat, istatc;
u_char dstat;
u_short sist;
+ u_int icnt;
/*
* interrupt on the fly ?
@@ -2802,6 +2803,7 @@ void sym_interrupt (struct sym_hcb *np)
sist = 0;
dstat = 0;
istatc = istat;
+ icnt = 0;
do {
if (istatc & SIP)
sist |= INW(np, nc_sist);
@@ -2809,6 +2811,14 @@ void sym_interrupt (struct sym_hcb *np)
dstat |= INB(np, nc_dstat);
istatc = INB(np, nc_istat);
istat |= istatc;
+
+ /* Prevent deadlock waiting on a condition that may never clear. */
+ icnt ++;
+ if (100 < icnt) {
+ if ((np->s.device->error_state != pci_channel_io_normal)
+ && (np->s.device->error_state != 0))
+ return;
+ }
} while (istatc & (SIP|DIP));
if (DEBUG_FLAGS & DEBUG_TINY)
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-09-21 23:13 Linas Vepstas
@ 2006-09-22 22:06 ` Luca
2006-09-22 23:32 ` Linas Vepstas
0 siblings, 1 reply; 21+ messages in thread
From: Luca @ 2006-09-22 22:06 UTC (permalink / raw)
To: Linas Vepstas; +Cc: linux-scsi, linux-pci, linuxppc-dev, linux-kernel
Hi Linas,
Linas Vepstas <linas@austin.ibm.com> ha scritto:
> Index: linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.c
> ===================================================================
> --- linux-2.6.18-rc7-git1.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-09-21 17:32:54.000000000 -0500
> +++ linux-2.6.18-rc7-git1/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-09-21 17:35:37.000000000 -0500
> +/**
> + * sym2_io_slot_reset() -- called when the pci bus has been reset.
> + * @pdev: pointer to PCI device
> + *
> + * Restart the card from scratch.
> + */
> +static pci_ers_result_t sym2_io_slot_reset (struct pci_dev *pdev)
> +{
> + struct sym_hcb *np = pci_get_drvdata(pdev);
> +
> + printk (KERN_INFO "%s: recovering from a PCI slot reset\n",
Space after function name? You put in other places too, it's not
consistent with the rest of the patch.
> + sym_name(np));
> +
> + if (pci_enable_device(pdev))
> + printk (KERN_ERR "%s: device setup failed most egregiously\n",
> + sym_name(np));
Is the failure of pci_enable_device ignored on purpose?
(plus extra space)
> +
> + pci_set_master(pdev);
> + enable_irq (pdev->irq);
Spurious space.
> +
> + /* Perform host reset only on one instance of the card */
> + if (0 == PCI_FUNC (pdev->devfn)) {
Ditto.
> + if (sym_reset_scsi_bus(np, 0)) {
> + printk(KERN_ERR "%s: Unable to reset scsi host controller\n",
> + sym_name(np));
> + return PCI_ERS_RESULT_DISCONNECT;
> + }
> + sym_start_up (np, 1);
Ditto.
> + }
> +
> + return PCI_ERS_RESULT_RECOVERED;
> +}
Luca
--
"L'abilita` politica e` l'abilita` di prevedere quello che
accadra` domani, la prossima settimana, il prossimo mese e
l'anno prossimo. E di essere cosi` abili, piu` tardi,
da spiegare perche' non e` accaduto."
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-09-22 22:06 ` Luca
@ 2006-09-22 23:32 ` Linas Vepstas
2006-09-22 23:39 ` Randy.Dunlap
0 siblings, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2006-09-22 23:32 UTC (permalink / raw)
To: Luca; +Cc: linux-scsi, linux-pci, linuxppc-dev, linux-kernel
On Sat, Sep 23, 2006 at 12:06:29AM +0200, Luca wrote:
>
> Space after function name? You put in other places too, it's not
> consistent with the rest of the patch.
Oops. I was also coding on a different project recently, with a
different style. I'll send a revised patch in a moment.
> > + if (pci_enable_device(pdev))
> > + printk (KERN_ERR "%s: device setup failed most egregiously\n",
> > + sym_name(np));
>
> Is the failure of pci_enable_device ignored on purpose?
No. :-( Thanks for the catch. I think I got cross-eyed staring at the code.
--linas
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-09-22 23:32 ` Linas Vepstas
@ 2006-09-22 23:39 ` Randy.Dunlap
2006-09-22 23:50 ` Linas Vepstas
0 siblings, 1 reply; 21+ messages in thread
From: Randy.Dunlap @ 2006-09-22 23:39 UTC (permalink / raw)
To: Linas Vepstas; +Cc: Luca, linux-scsi, linux-pci, linuxppc-dev, linux-kernel
On Fri, 22 Sep 2006 18:32:35 -0500 Linas Vepstas wrote:
> On Sat, Sep 23, 2006 at 12:06:29AM +0200, Luca wrote:
> >
> > Space after function name? You put in other places too, it's not
> > consistent with the rest of the patch.
>
> Oops. I was also coding on a different project recently, with a
> different style. I'll send a revised patch in a moment.
Please change if()'s to use
if (var == constant)
instead of
if (constant == var)
also. (or whatever condition is being used) Thanks.
> > > + if (pci_enable_device(pdev))
> > > + printk (KERN_ERR "%s: device setup failed most egregiously\n",
> > > + sym_name(np));
> >
> > Is the failure of pci_enable_device ignored on purpose?
>
> No. :-( Thanks for the catch. I think I got cross-eyed staring at the code.
---
~Randy
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-09-22 23:39 ` Randy.Dunlap
@ 2006-09-22 23:50 ` Linas Vepstas
2006-09-23 0:57 ` Randy.Dunlap
0 siblings, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2006-09-22 23:50 UTC (permalink / raw)
To: Randy.Dunlap; +Cc: Luca, linux-scsi, linux-pci, linuxppc-dev, linux-kernel
On Fri, Sep 22, 2006 at 04:39:29PM -0700, Randy.Dunlap wrote:
> On Fri, 22 Sep 2006 18:32:35 -0500 Linas Vepstas wrote:
>
> > On Sat, Sep 23, 2006 at 12:06:29AM +0200, Luca wrote:
> > >
> > > Space after function name? You put in other places too, it's not
> > > consistent with the rest of the patch.
> >
> > Oops. I was also coding on a different project recently, with a
> > different style. I'll send a revised patch in a moment.
>
> Please change if()'s to use
>
> if (var == constant)
> instead of
> if (constant == var)
Yuck! Horrid coding style! No rational excuse for coding like that.
Advice taken under protest; new patch shortly.
--linas
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-09-22 23:50 ` Linas Vepstas
@ 2006-09-23 0:57 ` Randy.Dunlap
0 siblings, 0 replies; 21+ messages in thread
From: Randy.Dunlap @ 2006-09-23 0:57 UTC (permalink / raw)
To: Linas Vepstas; +Cc: Luca, linux-scsi, linux-pci, linuxppc-dev, linux-kernel
On Fri, 22 Sep 2006 18:50:17 -0500 Linas Vepstas wrote:
> On Fri, Sep 22, 2006 at 04:39:29PM -0700, Randy.Dunlap wrote:
> > On Fri, 22 Sep 2006 18:32:35 -0500 Linas Vepstas wrote:
> >
> > > On Sat, Sep 23, 2006 at 12:06:29AM +0200, Luca wrote:
> > > >
> > > > Space after function name? You put in other places too, it's not
> > > > consistent with the rest of the patch.
> > >
> > > Oops. I was also coding on a different project recently, with a
> > > different style. I'll send a revised patch in a moment.
> >
> > Please change if()'s to use
> >
> > if (var == constant)
> > instead of
> > if (constant == var)
>
> Yuck! Horrid coding style! No rational excuse for coding like that.
> Advice taken under protest; new patch shortly.
Just after my email, I saw this :)
http://marc.theaimsgroup.com/?l=linux-mm-commits&m=115896769322020&w=2
---
~Randy
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH]: PCI Error Recovery: Symbios SCSI device driver
@ 2006-10-20 18:05 Linas Vepstas
2006-10-31 18:55 ` Matthew Wilcox
0 siblings, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2006-10-20 18:05 UTC (permalink / raw)
To: matthew; +Cc: linux-scsi, linux-pci, linux-kernel
Matthew,
Please apply and forward upstream. This patch is nearly identical
to the one from a month earlier, except that it fixes some
indentation problems, and shortens a few over-length lines.
This patch has been in an -mm tree since 29 Sept, and a variant
of it has been shipping for over a year in Novell SLES9, where
it has not received any complaints/defects/fixes/etc.
You had previously mentioned some objections to this patch,
I assume these were about the bad indentation? Are there other
concerns?
--linas
Various PCI bus errors can be signaled by newer PCI controllers.
This patch adds the PCI error recovery callbacks to the Symbios
SCSI device driver. The patch has been tested, and appears to
work well.
Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
--
drivers/scsi/sym53c8xx_2/sym_glue.c | 99 ++++++++++++++++++++++++++++++++++++
drivers/scsi/sym53c8xx_2/sym_glue.h | 4 +
drivers/scsi/sym53c8xx_2/sym_hipd.c | 10 +++
3 files changed, 113 insertions(+)
Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.c
===================================================================
--- linux-2.6.19-rc1-git11.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-10-20 12:25:11.000000000 -0500
+++ linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-10-20 12:41:15.000000000 -0500
@@ -659,6 +659,11 @@ static irqreturn_t sym53c8xx_intr(int ir
if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
+ /* Avoid spinloop trying to handle interrupts on frozen device */
+ if ((np->s.device->error_state != pci_channel_io_normal) &&
+ (np->s.device->error_state != 0))
+ return IRQ_HANDLED;
+
spin_lock_irqsave(np->s.host->host_lock, flags);
sym_interrupt(np);
spin_unlock_irqrestore(np->s.host->host_lock, flags);
@@ -726,6 +731,19 @@ static int sym_eh_handler(int op, char *
dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
+ /* We may be in an error condition because the PCI bus
+ * went down. In this case, we need to wait until the
+ * PCI bus is reset, the card is reset, and only then
+ * proceed with the scsi error recovery. There's no
+ * point in hurrying; take a leisurely wait.
+ */
+#define WAIT_FOR_PCI_RECOVERY 35
+ if ((np->s.device->error_state != pci_channel_io_normal) &&
+ (np->s.device->error_state != 0) &&
+ (wait_for_completion_timeout(&np->s.io_reset_wait,
+ WAIT_FOR_PCI_RECOVERY*HZ) == 0))
+ return SCSI_FAILED;
+
spin_lock_irq(host->host_lock);
/* This one is queued in some place -> to wait for completion */
FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -1510,6 +1528,7 @@ static struct Scsi_Host * __devinit sym_
np->maxoffs = dev->chip.offset_max;
np->maxburst = dev->chip.burst_max;
np->myaddr = dev->host_id;
+ init_completion(&np->s.io_reset_wait);
/*
* Edit its name.
@@ -1948,6 +1967,79 @@ static void __devexit sym2_remove(struct
attach_count--;
}
+/**
+ * sym2_io_error_detected() -- called when PCI error is detected
+ * @pdev: pointer to PCI device
+ * @state: current state of the PCI slot
+ */
+static pci_ers_result_t sym2_io_error_detected (struct pci_dev *pdev,
+ enum pci_channel_state state)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ /* If slot is permanently frozen, turn everything off */
+ if (state == pci_channel_io_perm_failure) {
+ sym2_remove(pdev);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ init_completion(&np->s.io_reset_wait);
+ disable_irq(pdev->irq);
+ pci_disable_device(pdev);
+
+ /* Request a slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * sym2_io_slot_reset() -- called when the pci bus has been reset.
+ * @pdev: pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t sym2_io_slot_reset (struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ printk(KERN_INFO "%s: recovering from a PCI slot reset\n",
+ sym_name(np));
+
+ if (pci_enable_device(pdev)) {
+ printk(KERN_ERR "%s: Unable to enable afer PCI reset\n",
+ sym_name(np));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ pci_set_master(pdev);
+ enable_irq(pdev->irq);
+
+ /* Perform host reset only on one instance of the card */
+ if (PCI_FUNC (pdev->devfn) == 0) {
+ if (sym_reset_scsi_bus(np, 0)) {
+ printk(KERN_ERR "%s: Unable to reset scsi host\n",
+ sym_name(np));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ sym_start_up(np, 1);
+ }
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * sym2_io_resume() -- resume normal ops after PCI reset
+ * @pdev: pointer to PCI device
+ *
+ * Called when the error recovery driver tells us that its
+ * OK to resume normal operation. Use completion to allow
+ * halted scsi ops to resume.
+ */
+static void sym2_io_resume (struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+ complete_all(&np->s.io_reset_wait);
+}
+
static void sym2_get_signalling(struct Scsi_Host *shost)
{
struct sym_hcb *np = sym_get_hcb(shost);
@@ -2110,11 +2202,18 @@ static struct pci_device_id sym2_id_tabl
MODULE_DEVICE_TABLE(pci, sym2_id_table);
+static struct pci_error_handlers sym2_err_handler = {
+ .error_detected = sym2_io_error_detected,
+ .slot_reset = sym2_io_slot_reset,
+ .resume = sym2_io_resume,
+};
+
static struct pci_driver sym2_driver = {
.name = NAME53C8XX,
.id_table = sym2_id_table,
.probe = sym2_probe,
.remove = __devexit_p(sym2_remove),
+ .err_handler = &sym2_err_handler,
};
static int __init sym2_init(void)
Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.h
===================================================================
--- linux-2.6.19-rc1-git11.orig/drivers/scsi/sym53c8xx_2/sym_glue.h 2006-10-20 12:25:11.000000000 -0500
+++ linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.h 2006-10-20 12:41:16.000000000 -0500
@@ -40,6 +40,7 @@
#ifndef SYM_GLUE_H
#define SYM_GLUE_H
+#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/ioport.h>
#include <linux/pci.h>
@@ -179,6 +180,9 @@ struct sym_shcb {
char chip_name[8];
struct pci_dev *device;
+ /* Waiter for clearing of frozen PCI bus */
+ struct completion io_reset_wait;
+
struct Scsi_Host *host;
void __iomem * ioaddr; /* MMIO kernel io address */
Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_hipd.c
===================================================================
--- linux-2.6.19-rc1-git11.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c 2006-10-20 12:25:11.000000000 -0500
+++ linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_hipd.c 2006-10-20 12:41:16.000000000 -0500
@@ -2761,6 +2761,7 @@ void sym_interrupt (struct sym_hcb *np)
u_char istat, istatc;
u_char dstat;
u_short sist;
+ u_int icnt;
/*
* interrupt on the fly ?
@@ -2802,6 +2803,7 @@ void sym_interrupt (struct sym_hcb *np)
sist = 0;
dstat = 0;
istatc = istat;
+ icnt = 0;
do {
if (istatc & SIP)
sist |= INW(np, nc_sist);
@@ -2809,6 +2811,14 @@ void sym_interrupt (struct sym_hcb *np)
dstat |= INB(np, nc_dstat);
istatc = INB(np, nc_istat);
istat |= istatc;
+
+ /* Prevent deadlock waiting on a condition that may never clear. */
+ icnt ++;
+ if (icnt > 100) {
+ if ((np->s.device->error_state != pci_channel_io_normal)
+ && (np->s.device->error_state != 0))
+ return;
+ }
} while (istatc & (SIP|DIP));
if (DEBUG_FLAGS & DEBUG_TINY)
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-10-20 18:05 Linas Vepstas
@ 2006-10-31 18:55 ` Matthew Wilcox
2006-10-31 19:24 ` James Bottomley
2006-10-31 23:13 ` Linas Vepstas
0 siblings, 2 replies; 21+ messages in thread
From: Matthew Wilcox @ 2006-10-31 18:55 UTC (permalink / raw)
To: Linas Vepstas; +Cc: linux-scsi, linux-pci, linux-kernel
On Fri, Oct 20, 2006 at 01:05:10PM -0500, Linas Vepstas wrote:
> Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.c
> ===================================================================
> --- linux-2.6.19-rc1-git11.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-10-20 12:25:11.000000000 -0500
> +++ linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.c 2006-10-20 12:41:15.000000000 -0500
> @@ -659,6 +659,11 @@ static irqreturn_t sym53c8xx_intr(int ir
>
> if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
>
> + /* Avoid spinloop trying to handle interrupts on frozen device */
> + if ((np->s.device->error_state != pci_channel_io_normal) &&
> + (np->s.device->error_state != 0))
> + return IRQ_HANDLED;
> +
This needs to be before the printf_debug call.
> @@ -726,6 +731,19 @@ static int sym_eh_handler(int op, char *
>
> dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
>
> + /* We may be in an error condition because the PCI bus
> + * went down. In this case, we need to wait until the
> + * PCI bus is reset, the card is reset, and only then
> + * proceed with the scsi error recovery. There's no
> + * point in hurrying; take a leisurely wait.
> + */
> +#define WAIT_FOR_PCI_RECOVERY 35
> + if ((np->s.device->error_state != pci_channel_io_normal) &&
> + (np->s.device->error_state != 0) &&
> + (wait_for_completion_timeout(&np->s.io_reset_wait,
> + WAIT_FOR_PCI_RECOVERY*HZ) == 0))
> + return SCSI_FAILED;
> +
Is it safe / reasonable / a good idea to sleep for 35 seconds in the EH
handler? I'm not that familiar with how the EH code works. It has its
own thread, so I suppose that's OK.
Are the driver's data structures still intact after a reset?
I generally prefer not to be so perlish in conditionals, ie:
if ((np->s.device->error_state != pci_channel_io_normal) &&
(np->s.device->error_state != 0) {
int timed_out = wait_for_completion_timeout(
&np->s.io_reset_wait, WAIT_FOR_PCI_RECOVERY*HZ);
if (!timed_out)
return SCSI_FAILED;
}
Why is the condition so complicated though? What does 0 mean if it's
not io_normal? At least let's hide that behind a convenience macro:
if (abnormal_error_state(np->s.device->error_state)) {
...
}
> Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_hipd.c
> ===================================================================
> --- linux-2.6.19-rc1-git11.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c 2006-10-20 12:25:11.000000000 -0500
> +++ linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_hipd.c 2006-10-20 12:41:16.000000000 -0500
> @@ -2761,6 +2761,7 @@ void sym_interrupt (struct sym_hcb *np)
> u_char istat, istatc;
> u_char dstat;
> u_short sist;
> + u_int icnt;
The cryptic names in this routine are actually register names. Calling
a counter 'icnt' is unhelpful (rather than fitting in with the style).
Just 'i' will do.
> /*
> * interrupt on the fly ?
> @@ -2802,6 +2803,7 @@ void sym_interrupt (struct sym_hcb *np)
> sist = 0;
> dstat = 0;
> istatc = istat;
> + icnt = 0;
> do {
> if (istatc & SIP)
> sist |= INW(np, nc_sist);
> @@ -2809,6 +2811,14 @@ void sym_interrupt (struct sym_hcb *np)
> dstat |= INB(np, nc_dstat);
> istatc = INB(np, nc_istat);
> istat |= istatc;
> +
> + /* Prevent deadlock waiting on a condition that may never clear. */
> + icnt ++;
> + if (icnt > 100) {
> + if ((np->s.device->error_state != pci_channel_io_normal)
> + && (np->s.device->error_state != 0))
> + return;
> + }
> } while (istatc & (SIP|DIP));
Though, since INB and INW will return 0xff and 0xffff, why not use that
as our test rather than using a counter?
if (sist == 0xffff && dstat == 0xff) {
if (abnormal_error_state(np->s.device->error_state)
return;
}
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-10-31 18:55 ` Matthew Wilcox
@ 2006-10-31 19:24 ` James Bottomley
2006-10-31 22:26 ` Linas Vepstas
2006-10-31 23:13 ` Linas Vepstas
1 sibling, 1 reply; 21+ messages in thread
From: James Bottomley @ 2006-10-31 19:24 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: Linas Vepstas, linux-scsi, linux-pci, linux-kernel
On Tue, 2006-10-31 at 11:55 -0700, Matthew Wilcox wrote:
> Is it safe / reasonable / a good idea to sleep for 35 seconds in the EH
> handler? I'm not that familiar with how the EH code works. It has its
> own thread, so I suppose that's OK.
Yes, each host has its own thread. Ordinarily it would be impolite to
delay recovery this long, but I assume that since the card is wedged
there's nothing else it could be doing anyway.
Just for my own edification, what happens on the dual function (dual
channel) boards? We have two threads there and two separate I/O
processors. I assume a PCI error will kill both, do we need to do
something about this?
James
> I generally prefer not to be so perlish in conditionals, ie:
>
> if ((np->s.device->error_state != pci_channel_io_normal) &&
> (np->s.device->error_state != 0) {
> int timed_out = wait_for_completion_timeout(
> &np->s.io_reset_wait, WAIT_FOR_PCI_RECOVERY*HZ);
> if (!timed_out)
> return SCSI_FAILED;
> }
>
> Why is the condition so complicated though? What does 0 mean if it's
> not io_normal? At least let's hide that behind a convenience macro:
>
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-10-31 19:24 ` James Bottomley
@ 2006-10-31 22:26 ` Linas Vepstas
0 siblings, 0 replies; 21+ messages in thread
From: Linas Vepstas @ 2006-10-31 22:26 UTC (permalink / raw)
To: James Bottomley; +Cc: Matthew Wilcox, linux-scsi, linux-pci, linux-kernel
On Tue, Oct 31, 2006 at 02:24:01PM -0500, James Bottomley wrote:
>
> Just for my own edification, what happens on the dual function (dual
> channel) boards? We have two threads there and two separate I/O
> processors. I assume a PCI error will kill both,
Yes.
> do we need to do
> something about this?
I'm not sure, and actually, I have not thought about
or tested this case for the symbios.
The answer depends on the h/w design. On PCI
multi-function cards, the PCI reset callbacks will
get called for each PCI function. (Each function
gets to vote/veto how it wants te reset to proceed).
If the hardware supports completely independent scsi
host initialization for each scsi i/o processor,
then things should just work.
If some part of the card init sequence needs to run
only once, even when there are two i/o processors, then
this needs to be protected against. I presume that
using if(0 == PCI_FUNC(pdev->devfn)) is enough to
make sure the hardware initilization is called only once
for the card -- i.e. by calling it only for PCI
function zero. If there needs to be some additional
locking to make sure that the card initialization
completes before the i/o processor initialization
starts ... well, I don't know about that.
--linas
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-10-31 18:55 ` Matthew Wilcox
2006-10-31 19:24 ` James Bottomley
@ 2006-10-31 23:13 ` Linas Vepstas
2006-11-02 4:46 ` Grant Grundler
1 sibling, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2006-10-31 23:13 UTC (permalink / raw)
To: Matthew Wilcox; +Cc: linux-scsi, linux-pci, linux-kernel
On Tue, Oct 31, 2006 at 11:55:07AM -0700, Matthew Wilcox wrote:
> On Fri, Oct 20, 2006 at 01:05:10PM -0500, Linas Vepstas wrote:
> > Index: linux-2.6.19-rc1-git11/drivers/scsi/sym53c8xx_2/sym_glue.c
>
> This needs to be before the printf_debug call.
Right. This'll be in the next patch submision.
> > @@ -726,6 +731,19 @@ static int sym_eh_handler(int op, char *
> > + /* We may be in an error condition because the PCI bus
> > + * went down. In this case, we need to wait until the
> > + * PCI bus is reset, the card is reset, and only then
> > + * proceed with the scsi error recovery. There's no
> > + * point in hurrying; take a leisurely wait.
> > + */
> > +#define WAIT_FOR_PCI_RECOVERY 35
> > + if ((np->s.device->error_state != pci_channel_io_normal) &&
> > + (np->s.device->error_state != 0) &&
> > + (wait_for_completion_timeout(&np->s.io_reset_wait,
> > + WAIT_FOR_PCI_RECOVERY*HZ) == 0))
> > + return SCSI_FAILED;
> > +
>
> Is it safe / reasonable / a good idea to sleep for 35 seconds in the EH
> handler? I'm not that familiar with how the EH code works. It has its
> own thread, so I suppose that's OK.
As James pointed out, the pci channel is is not available until the
reset sequence is done. The 35 seconds seemed like a reasonable time
to wait for the pci reset to complete; hopefuly it will complete much
sooner. If the pci reset fails for some reason, then things are hosed,
and the sysadmin will need to intervene.
> Are the driver's data structures still intact after a reset?
They should be. No one is attempting to free or shut down the driver.
> I generally prefer not to be so perlish in conditionals, ie:
I wasn't sure what style is popular. Actually, I agree with you on
that, and picked the other style cause i thought it was prefered.
Nex patch submission will be more nested.
> if ((np->s.device->error_state != pci_channel_io_normal) &&
> (np->s.device->error_state != 0) {
>
> Why is the condition so complicated though? What does 0 mean if it's
> not io_normal?
Its an unresolved stupidity. For some imagined, hypothetical
reason, it momentarily seemed wise to make pci_channel_io_normal
be non-zero; but this imagined reason, although vague, did manage
to bite back, as the above code demonstrates.
> At least let's hide that behind a convenience macro:
>
> if (abnormal_error_state(np->s.device->error_state)) {
Should I submit a patch to make pci_channel_io_normal be zero,
or submit a patch to define abnormal_error_state, or both?
Both, probably; I don't have much of an opinion.
> Though, since INB and INW will return 0xff and 0xffff, why not use that
> as our test rather than using a counter?
Right. I wanted to avoid checking for specific values,
as that vaguely seemed more robust; the direct check is easier.
I'll change this.
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-10-31 23:13 ` Linas Vepstas
@ 2006-11-02 4:46 ` Grant Grundler
2006-11-02 4:56 ` Matthew Wilcox
0 siblings, 1 reply; 21+ messages in thread
From: Grant Grundler @ 2006-11-02 4:46 UTC (permalink / raw)
To: Linas Vepstas; +Cc: Matthew Wilcox, linux-scsi, linux-pci, linux-kernel
On Tue, Oct 31, 2006 at 05:13:34PM -0600, Linas Vepstas wrote:
...
> > Though, since INB and INW will return 0xff and 0xffff, why not use that
> > as our test rather than using a counter?
>
> Right. I wanted to avoid checking for specific values,
> as that vaguely seemed more robust; the direct check is easier.
ISTR some chipsets return 0 or the most recent data on the bus
when INB/INW master-abort. Maybe this an ISA bus behavior?
Or is config space access the only space which behaves this way
for master abort on PCI?
I'm looking at drivers/pci/probe.c:pci_scan_device().
thanks,
grant
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2006-11-02 4:46 ` Grant Grundler
@ 2006-11-02 4:56 ` Matthew Wilcox
0 siblings, 0 replies; 21+ messages in thread
From: Matthew Wilcox @ 2006-11-02 4:56 UTC (permalink / raw)
To: Grant Grundler; +Cc: Linas Vepstas, linux-scsi, linux-pci, linux-kernel
On Wed, Nov 01, 2006 at 09:46:33PM -0700, Grant Grundler wrote:
> ISTR some chipsets return 0 or the most recent data on the bus
> when INB/INW master-abort. Maybe this an ISA bus behavior?
It's not a master-abort; it won't get as far as the device.
Documentation/pci-error-recovery.txt says reads return 0xffffffff.
> Or is config space access the only space which behaves this way
> for master abort on PCI?
> I'm looking at drivers/pci/probe.c:pci_scan_device().
As the comment says, the boards which do this are broken. It's highly
unlikely those boards will support error isolation and recovery ;-)
^ permalink raw reply [flat|nested] 21+ messages in thread
* [PATCH]: PCI Error Recovery: Symbios SCSI device driver
@ 2007-07-02 18:39 Linas Vepstas
2007-07-05 18:28 ` Andrew Morton
0 siblings, 1 reply; 21+ messages in thread
From: Linas Vepstas @ 2007-07-02 18:39 UTC (permalink / raw)
To: James.Bottomley, matthew, willy
Cc: linux-scsi, Paul Mackerras, linuxppc-dev, linux-kernel, linux-pci,
Andrew Morton
Various PCI bus errors can be signaled by newer PCI controllers.
This patch adds the PCI error recovery callbacks to the Symbios
SCSI device driver. The patch has been tested, and appears to
work well.
Signed-off-by: Linas Vepstas <linas@austin.ibm.com>
----
Hi,
This patch has been bouncing around for a long time, and has made
appearences in various -mm trees since 2.6.something-teen. However,
it has never made it into mainline, and I'm starting to get concerned
that it will miss 2.6.23 as well.
There was some discussion, and I think I addressed all of the various
issues that came up. I'd really like to get this patch in, but am unclear
on exactly who to pester at this point. Matt Wilcox seems to be looking
for a job (???) and I am unable to git-clone James Bottmley's
git://kernel.org:/pub/scm/linux/kernel/git/jejb/scsi-misc-2.6.git
git tree; there's some error on the server side.
Linas.
drivers/scsi/sym53c8xx_2/sym_glue.c | 136 ++++++++++++++++++++++++++++++++++++
drivers/scsi/sym53c8xx_2/sym_glue.h | 4 +
drivers/scsi/sym53c8xx_2/sym_hipd.c | 6 +
3 files changed, 146 insertions(+)
Index: linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.c
===================================================================
--- linux-2.6.22-rc1.orig/drivers/scsi/sym53c8xx_2/sym_glue.c 2007-04-25 22:08:32.000000000 -0500
+++ linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.c 2007-05-14 17:31:44.000000000 -0500
@@ -657,6 +657,10 @@ static irqreturn_t sym53c8xx_intr(int ir
unsigned long flags;
struct sym_hcb *np = (struct sym_hcb *)dev_id;
+ /* Avoid spinloop trying to handle interrupts on frozen device */
+ if (pci_channel_offline(np->s.device))
+ return IRQ_HANDLED;
+
if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
spin_lock_irqsave(np->s.host->host_lock, flags);
@@ -726,6 +730,20 @@ static int sym_eh_handler(int op, char *
dev_warn(&cmd->device->sdev_gendev, "%s operation started.\n", opname);
+ /* We may be in an error condition because the PCI bus
+ * went down. In this case, we need to wait until the
+ * PCI bus is reset, the card is reset, and only then
+ * proceed with the scsi error recovery. There's no
+ * point in hurrying; take a leisurely wait.
+ */
+#define WAIT_FOR_PCI_RECOVERY 35
+ if (pci_channel_offline(np->s.device)) {
+ int finished_reset = wait_for_completion_timeout(
+ &np->s.io_reset_wait, WAIT_FOR_PCI_RECOVERY*HZ);
+ if (!finished_reset)
+ return SCSI_FAILED;
+ }
+
spin_lock_irq(host->host_lock);
/* This one is queued in some place -> to wait for completion */
FOR_EACH_QUEUED_ELEMENT(&np->busy_ccbq, qp) {
@@ -1510,6 +1528,7 @@ static struct Scsi_Host * __devinit sym_
np->maxoffs = dev->chip.offset_max;
np->maxburst = dev->chip.burst_max;
np->myaddr = dev->host_id;
+ init_completion(&np->s.io_reset_wait);
/*
* Edit its name.
@@ -1948,6 +1967,116 @@ static void __devexit sym2_remove(struct
attach_count--;
}
+/**
+ * sym2_io_error_detected() -- called when PCI error is detected
+ * @pdev: pointer to PCI device
+ * @state: current state of the PCI slot
+ */
+static pci_ers_result_t sym2_io_error_detected(struct pci_dev *pdev,
+ enum pci_channel_state state)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ /* If slot is permanently frozen, turn everything off */
+ if (state == pci_channel_io_perm_failure) {
+ sym2_remove(pdev);
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ init_completion(&np->s.io_reset_wait);
+ disable_irq(pdev->irq);
+ pci_disable_device(pdev);
+
+ /* Request a slot reset. */
+ return PCI_ERS_RESULT_NEED_RESET;
+}
+
+/**
+ * sym2_reset_workarounds -- hardware-specific work-arounds
+ *
+ * This routine is similar to sym_set_workarounds(), except
+ * that, at this point, we already know that the device was
+ * succesfully intialized at least once before, and so most
+ * of the steps taken there are un-needed here.
+ */
+static void sym2_reset_workarounds(struct pci_dev *pdev)
+{
+ u_char revision;
+ u_short status_reg;
+ struct sym_chip *chip;
+
+ pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
+ chip = sym_lookup_chip_table(pdev->device, revision);
+
+ /* Work around for errant bit in 895A, in a fashion
+ * similar to what is done in sym_set_workarounds().
+ */
+ pci_read_config_word(pdev, PCI_STATUS, &status_reg);
+ if (!(chip->features & FE_66MHZ) && (status_reg & PCI_STATUS_66MHZ)) {
+ status_reg = PCI_STATUS_66MHZ;
+ pci_write_config_word(pdev, PCI_STATUS, status_reg);
+ pci_read_config_word(pdev, PCI_STATUS, &status_reg);
+ }
+}
+
+/**
+ * sym2_io_slot_reset() -- called when the pci bus has been reset.
+ * @pdev: pointer to PCI device
+ *
+ * Restart the card from scratch.
+ */
+static pci_ers_result_t sym2_io_slot_reset(struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+
+ printk(KERN_INFO "%s: recovering from a PCI slot reset\n",
+ sym_name(np));
+
+ if (pci_enable_device(pdev)) {
+ printk(KERN_ERR "%s: Unable to enable afer PCI reset\n",
+ sym_name(np));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ pci_set_master(pdev);
+ enable_irq(pdev->irq);
+
+ /* If the chip can do Memory Write Invalidate, enable it */
+ if (np->features & FE_WRIE) {
+ if (pci_set_mwi(pdev))
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+
+ /* Perform work-arounds, analogous to sym_set_workarounds() */
+ sym2_reset_workarounds(pdev);
+
+ /* Perform host reset only on one instance of the card */
+ if (PCI_FUNC (pdev->devfn) == 0) {
+ if (sym_reset_scsi_bus(np, 0)) {
+ printk(KERN_ERR "%s: Unable to reset scsi host\n",
+ sym_name(np));
+ return PCI_ERS_RESULT_DISCONNECT;
+ }
+ sym_start_up(np, 1);
+ }
+
+ return PCI_ERS_RESULT_RECOVERED;
+}
+
+/**
+ * sym2_io_resume() -- resume normal ops after PCI reset
+ * @pdev: pointer to PCI device
+ *
+ * Called when the error recovery driver tells us that its
+ * OK to resume normal operation. Use completion to allow
+ * halted scsi ops to resume.
+ */
+static void sym2_io_resume(struct pci_dev *pdev)
+{
+ struct sym_hcb *np = pci_get_drvdata(pdev);
+ complete_all(&np->s.io_reset_wait);
+}
+
static void sym2_get_signalling(struct Scsi_Host *shost)
{
struct sym_hcb *np = sym_get_hcb(shost);
@@ -2110,11 +2239,18 @@ static struct pci_device_id sym2_id_tabl
MODULE_DEVICE_TABLE(pci, sym2_id_table);
+static struct pci_error_handlers sym2_err_handler = {
+ .error_detected = sym2_io_error_detected,
+ .slot_reset = sym2_io_slot_reset,
+ .resume = sym2_io_resume,
+};
+
static struct pci_driver sym2_driver = {
.name = NAME53C8XX,
.id_table = sym2_id_table,
.probe = sym2_probe,
.remove = __devexit_p(sym2_remove),
+ .err_handler = &sym2_err_handler,
};
static int __init sym2_init(void)
Index: linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.h
===================================================================
--- linux-2.6.22-rc1.orig/drivers/scsi/sym53c8xx_2/sym_glue.h 2007-04-25 22:08:32.000000000 -0500
+++ linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_glue.h 2007-05-14 17:31:44.000000000 -0500
@@ -40,6 +40,7 @@
#ifndef SYM_GLUE_H
#define SYM_GLUE_H
+#include <linux/completion.h>
#include <linux/delay.h>
#include <linux/ioport.h>
#include <linux/pci.h>
@@ -179,6 +180,9 @@ struct sym_shcb {
char chip_name[8];
struct pci_dev *device;
+ /* Waiter for clearing of frozen PCI bus */
+ struct completion io_reset_wait;
+
struct Scsi_Host *host;
void __iomem * ioaddr; /* MMIO kernel io address */
Index: linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_hipd.c
===================================================================
--- linux-2.6.22-rc1.orig/drivers/scsi/sym53c8xx_2/sym_hipd.c 2007-04-25 22:08:32.000000000 -0500
+++ linux-2.6.22-rc1/drivers/scsi/sym53c8xx_2/sym_hipd.c 2007-05-14 17:31:44.000000000 -0500
@@ -2809,6 +2809,12 @@ void sym_interrupt (struct sym_hcb *np)
dstat |= INB(np, nc_dstat);
istatc = INB(np, nc_istat);
istat |= istatc;
+
+ /* Prevent deadlock waiting on a condition that may never clear. */
+ if (unlikely(sist == 0xffff && dstat == 0xff)) {
+ if (pci_channel_offline(np->s.device))
+ return;
+ }
} while (istatc & (SIP|DIP));
if (DEBUG_FLAGS & DEBUG_TINY)
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2007-07-02 18:39 [PATCH]: PCI Error Recovery: Symbios SCSI device driver Linas Vepstas
@ 2007-07-05 18:28 ` Andrew Morton
2007-07-05 18:54 ` Matthew Wilcox
0 siblings, 1 reply; 21+ messages in thread
From: Andrew Morton @ 2007-07-05 18:28 UTC (permalink / raw)
To: Linas Vepstas
Cc: James.Bottomley, linux-scsi, matthew, willy, linux-kernel,
linuxppc-dev, Paul Mackerras, linux-pci
On Mon, 2 Jul 2007 13:39:17 -0500
linas@austin.ibm.com (Linas Vepstas) wrote:
>
> Various PCI bus errors can be signaled by newer PCI controllers.
> This patch adds the PCI error recovery callbacks to the Symbios
> SCSI device driver. The patch has been tested, and appears to
> work well.
>
yup, this is identical to -mm's
pci-error-recovery-symbios-scsi-base-support.patch
What is the status of
ftp://ftp.kernel.org/pub/linux/kernel/people/akpm/patches/2.6/2.6.22-rc6/2.6.22-rc6-mm1/broken-out/pci-error-recovery-symbios-scsi-first-failure.patch?
>
> ----
>
> Hi,
>
> This patch has been bouncing around for a long time, and has made
> appearences in various -mm trees since 2.6.something-teen. However,
> it has never made it into mainline, and I'm starting to get concerned
> that it will miss 2.6.23 as well.
Well you've sent it a couple of times, and I've sent it in five more times
over the past year. Once we were told "awaiting maintainer ack".
This situation is fairly stupid. How about we make you the maintainer?
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2007-07-05 18:28 ` Andrew Morton
@ 2007-07-05 18:54 ` Matthew Wilcox
2007-08-02 22:53 ` Linas Vepstas
0 siblings, 1 reply; 21+ messages in thread
From: Matthew Wilcox @ 2007-07-05 18:54 UTC (permalink / raw)
To: Andrew Morton
Cc: Linas Vepstas, James.Bottomley, willy, linux-scsi, Paul Mackerras,
linuxppc-dev, linux-kernel, linux-pci
On Thu, Jul 05, 2007 at 11:28:38AM -0700, Andrew Morton wrote:
> Well you've sent it a couple of times, and I've sent it in five more times
> over the past year. Once we were told "awaiting maintainer ack".
>
> This situation is fairly stupid. How about we make you the maintainer?
Last time I looked at it, I still wasn't comfortable with it. I'm going
to look at it again.
I'm fairly sure Linas doesn't want to be the sym2 maintainer. It's
still an ugly pile of junk that needs cleaning up.
--
"Bill, look, we understand that you're interested in selling us this
operating system, but compare it to ours. We can't possibly take such
a retrograde step."
^ permalink raw reply [flat|nested] 21+ messages in thread
* Re: [PATCH]: PCI Error Recovery: Symbios SCSI device driver
2007-07-05 18:54 ` Matthew Wilcox
@ 2007-08-02 22:53 ` Linas Vepstas
0 siblings, 0 replies; 21+ messages in thread
From: Linas Vepstas @ 2007-08-02 22:53 UTC (permalink / raw)
To: Matthew Wilcox
Cc: Andrew Morton, James.Bottomley, willy, linux-scsi, Paul Mackerras,
linuxppc-dev, linux-kernel, linux-pci
On Thu, Jul 05, 2007 at 12:54:06PM -0600, Matthew Wilcox wrote:
> On Thu, Jul 05, 2007 at 11:28:38AM -0700, Andrew Morton wrote:
> > Well you've sent it a couple of times, and I've sent it in five more times
> > over the past year. Once we were told "awaiting maintainer ack".
> >
> > This situation is fairly stupid. How about we make you the maintainer?
>
> Last time I looked at it, I still wasn't comfortable with it. I'm going
> to look at it again.
Please do. Its burning the proverbial hole in my pocket; I'd really
like to get this off my list of things I worry about.
> I'm fairly sure Linas doesn't want to be the sym2 maintainer. It's
> still an ugly pile of junk that needs cleaning up.
Heh. I have no difficulty living with ugly code: its actually a
great excuse to fix things instead of doing "real work" :-)
Rather, the menagerie of hardware I have access to is constantly
changing; I don't have a symbios card just right now, and it might
take a few days to even find someone who did. Which is an incredibly
unpleasent, unrewarding activity.
--linas
^ permalink raw reply [flat|nested] 21+ messages in thread
end of thread, other threads:[~2007-08-02 22:53 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2007-07-02 18:39 [PATCH]: PCI Error Recovery: Symbios SCSI device driver Linas Vepstas
2007-07-05 18:28 ` Andrew Morton
2007-07-05 18:54 ` Matthew Wilcox
2007-08-02 22:53 ` Linas Vepstas
-- strict thread matches above, loose matches on Subject: below --
2006-10-20 18:05 Linas Vepstas
2006-10-31 18:55 ` Matthew Wilcox
2006-10-31 19:24 ` James Bottomley
2006-10-31 22:26 ` Linas Vepstas
2006-10-31 23:13 ` Linas Vepstas
2006-11-02 4:46 ` Grant Grundler
2006-11-02 4:56 ` Matthew Wilcox
2006-09-21 23:13 Linas Vepstas
2006-09-22 22:06 ` Luca
2006-09-22 23:32 ` Linas Vepstas
2006-09-22 23:39 ` Randy.Dunlap
2006-09-22 23:50 ` Linas Vepstas
2006-09-23 0:57 ` Randy.Dunlap
2006-02-02 20:15 [PATCH] " Linas Vepstas
2006-01-18 16:53 linas
2006-01-18 17:07 ` Matthew Wilcox
2006-01-18 17:54 ` linas
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).