public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 7/13]: PCI Err: Symbios SCSI  driver recovery
@ 2005-06-28 23:59 Linas Vepstas
  2005-06-29  1:51 ` Benjamin Herrenschmidt
  2005-06-29  3:02 ` Andi Kleen
  0 siblings, 2 replies; 7+ messages in thread
From: Linas Vepstas @ 2005-06-28 23:59 UTC (permalink / raw)
  To: linux-kernel, Benjamin Herrenschmidt, long
  Cc: Hidetoshi Seto, Greg KH, ak, Paul Mackerras, linuxppc64-dev,
	linux-pci, johnrose

[-- Attachment #1: Type: text/plain, Size: 499 bytes --]


pci-err-7-symbios.patch

Adds PCI Error recoervy callbacks to the Symbios Sym53c8xx driver.
Tested, seems to work well under i/o stress to one disk. Not
stress tested under heavy i/o to multiple scsi devices.

Note the check of the pci error state flag inside an infinite
loop inside the interrupt handler. Without this check, the 
device can spin forever, locking up hard, long before the 
asynchronous error event (and callbacks) are ever called. 

Signed-off-by: Linas Vepstas <linas@linas.org>

[-- Attachment #2: pci-err-7-symbios.patch --]
[-- Type: text/plain, Size: 7749 bytes --]

--- linux-2.6.12-git10/drivers/scsi/sym53c8xx_2/sym_glue.c.linas-orig	2005-06-22 15:26:17.000000000 -0500
+++ linux-2.6.12-git10/drivers/scsi/sym53c8xx_2/sym_glue.c	2005-06-22 17:17:00.000000000 -0500
@@ -685,6 +685,10 @@ static irqreturn_t sym53c8xx_intr(int ir
 	struct sym_hcb *np = (struct sym_hcb *)dev_id;
 
 	if (DEBUG_FLAGS & DEBUG_TINY) printf_debug ("[");
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+	if (np->s.io_state != pci_channel_io_normal)
+		return IRQ_HANDLED;
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
 
 	spin_lock_irqsave(np->s.host->host_lock, flags);
 	sym_interrupt(np);
@@ -759,6 +763,27 @@ static void sym_eh_done(struct scsi_cmnd
  */
 static void sym_eh_timeout(u_long p) { __sym_eh_done((struct scsi_cmnd *)p, 1); }
 
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+static void sym_eeh_timeout(u_long p)
+{
+	struct sym_eh_wait *ep = (struct sym_eh_wait *) p;
+	if (!ep)
+		return;
+	complete(&ep->done);
+}
+
+static void sym_eeh_done(struct sym_eh_wait *ep)
+{
+	if (!ep)
+		return;
+	ep->timed_out = 0;
+	if (!del_timer(&ep->timer))
+		return;
+
+	complete(&ep->done);
+}
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
+
 /*
  *  Generic method for our eh processing.
  *  The 'op' argument tells what we have to do.
@@ -799,6 +824,37 @@ prepare:
 
 	/* Try to proceed the operation we have been asked for */
 	sts = -1;
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+
+	/* We may be in an error condition because the PCI bus
+	 * went down. In this case, we need to wait until the
+	 * PCI bus is reset, the card is reset, and only then
+	 * proceed with the scsi error recovery.  We'll wait
+	 * for 15 seconds for this to happen.
+	 */
+#define WAIT_FOR_PCI_RECOVERY	15
+	if (np->s.io_state != pci_channel_io_normal) {
+		struct sym_eh_wait eeh, *eep = &eeh;
+		np->s.io_reset_wait = eep;
+		init_completion(&eep->done);
+		init_timer(&eep->timer);
+		eep->to_do = SYM_EH_DO_WAIT;
+		eep->timer.expires = jiffies + (WAIT_FOR_PCI_RECOVERY*HZ);
+		eep->timer.function = sym_eeh_timeout;
+		eep->timer.data = (u_long)eep;
+		eep->timed_out = 1;	/* Be pessimistic for once :) */
+		add_timer(&eep->timer);
+		spin_unlock_irq(np->s.host->host_lock);
+		wait_for_completion(&eep->done);
+		spin_lock_irq(np->s.host->host_lock);
+		if (eep->timed_out) {
+			printk (KERN_ERR "%s: Timed out waiting for PCI reset\n",
+			       sym_name(np));
+		}
+		np->s.io_reset_wait = NULL;
+	}
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
+
 	switch(op) {
 	case SYM_EH_ABORT:
 		sts = sym_abort_scsiio(np, cmd, 1);
@@ -1584,6 +1640,10 @@ static struct Scsi_Host * __devinit sym_
 	np->maxoffs	= dev->chip.offset_max;
 	np->maxburst	= dev->chip.burst_max;
 	np->myaddr	= dev->host_id;
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+	np->s.io_state = pci_channel_io_normal;
+	np->s.io_reset_wait = NULL;
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
 
 	/*
 	 *  Edit its name.
@@ -1916,6 +1976,59 @@ static int sym_detach(struct sym_hcb *np
 	return 1;
 }
 
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+/** sym2_io_error_detected() is called when PCI error is detected */
+static int sym2_io_error_detected (struct pci_dev *pdev, enum pci_channel_state state)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	np->s.io_state = state;
+	// XXX If slot is permanently frozen, then what? 
+	// Should we scsi_remove_host() maybe ??
+
+	/* Request a slot slot reset. */
+	return PCIERR_RESULT_NEED_RESET;
+}
+
+/** sym2_io_slot_reset is called when the pci bus has been reset.
+ *  Restart the card from scratch. */
+static int sym2_io_slot_reset (struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	printk (KERN_INFO "%s: recovering from a PCI slot reset\n",
+	    sym_name(np));
+
+	if (pci_enable_device(pdev))
+		printk (KERN_ERR "%s: device setup failed most egregiously\n",
+			    sym_name(np));
+
+	pci_set_master(pdev);
+	enable_irq (pdev->irq);
+
+	/* Perform host reset only on one instance of the card */
+	if (0 == PCI_FUNC (pdev->devfn))
+		sym_reset_scsi_bus(np, 0);
+
+	return PCIERR_RESULT_RECOVERED;
+}
+
+/** sym2_io_resume is called when the error recovery driver
+ *  tells us that its OK to resume normal operation.
+ */
+static void sym2_io_resume (struct pci_dev *pdev)
+{
+	struct sym_hcb *np = pci_get_drvdata(pdev);
+
+	/* Perform device startup only once for this card. */
+	if (0 == PCI_FUNC (pdev->devfn))
+		sym_start_up (np, 1);
+
+	np->s.io_state = pci_channel_io_normal;
+	sym_eeh_done (np->s.io_reset_wait);
+}
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
+
 /*
  * Driver host template.
  */
@@ -2174,6 +2287,13 @@ static struct pci_driver sym2_driver = {
 	.id_table	= sym2_id_table,
 	.probe		= sym2_probe,
 	.remove		= __devexit_p(sym2_remove),
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+	.err_handler = {
+		.error_detected = sym2_io_error_detected,
+		.slot_reset = sym2_io_slot_reset,
+		.resume = sym2_io_resume,
+	},
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
 };
 
 static int __init sym2_init(void)
--- linux-2.6.12-git10/drivers/scsi/sym53c8xx_2/sym_glue.h.linas-orig	2005-06-22 15:26:17.000000000 -0500
+++ linux-2.6.12-git10/drivers/scsi/sym53c8xx_2/sym_glue.h	2005-06-22 15:28:29.000000000 -0500
@@ -181,6 +181,10 @@ struct sym_shcb {
 	char		chip_name[8];
 	struct pci_dev	*device;
 
+	/* pci bus i/o state; waiter for clearing of i/o state */
+	enum pci_channel_state io_state;
+	struct sym_eh_wait *io_reset_wait;
+
 	struct Scsi_Host *host;
 
 	void __iomem *	ioaddr;		/* MMIO kernel io address	*/
--- linux-2.6.12-git10/drivers/scsi/sym53c8xx_2/sym_hipd.c.linas-orig	2005-06-22 15:26:17.000000000 -0500
+++ linux-2.6.12-git10/drivers/scsi/sym53c8xx_2/sym_hipd.c	2005-06-22 15:28:29.000000000 -0500
@@ -2806,6 +2806,7 @@ void sym_interrupt (struct sym_hcb *np)
 	u_char	istat, istatc;
 	u_char	dstat;
 	u_short	sist;
+	u_int    icnt;
 
 	/*
 	 *  interrupt on the fly ?
@@ -2847,6 +2848,7 @@ void sym_interrupt (struct sym_hcb *np)
 	sist	= 0;
 	dstat	= 0;
 	istatc	= istat;
+	icnt = 0;
 	do {
 		if (istatc & SIP)
 			sist  |= INW(np, nc_sist);
@@ -2854,6 +2856,14 @@ void sym_interrupt (struct sym_hcb *np)
 			dstat |= INB(np, nc_dstat);
 		istatc = INB(np, nc_istat);
 		istat |= istatc;
+#ifdef CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY
+		/* Prevent deadlock waiting on a condition that may never clear. */
+		icnt ++;
+		if (100 < icnt) {
+			if (np->s.device->driver->err_handler.error_state != pci_channel_io_normal)
+				return;
+		}
+#endif /* CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY */
 	} while (istatc & (SIP|DIP));
 
 	if (DEBUG_FLAGS & DEBUG_TINY)
--- linux-2.6.12-git10/drivers/scsi/Kconfig.linas-orig	2005-06-22 15:26:14.000000000 -0500
+++ linux-2.6.12-git10/drivers/scsi/Kconfig	2005-06-22 15:28:29.000000000 -0500
@@ -1040,6 +1040,14 @@ config SCSI_SYM53C8XX_IOMAPPED
 	  the card.  This is significantly slower then using memory
 	  mapped IO.  Most people should answer N.
 
+config SCSI_SYM53C8XX_EEH_RECOVERY
+	bool "Enable PCI bus error recovery"
+	depends on SCSI_SYM53C8XX_2 && PPC_PSERIES
+	help
+		If you say Y here, the driver will be able to recover from
+		PCI bus errors on many PowerPC platforms. IBM pSeries users
+		should answer Y.
+
 config SCSI_IPR
 	tristate "IBM Power Linux RAID adapter support"
 	depends on PCI && SCSI
--- linux-2.6.12-git10/arch/ppc64/configs/pSeries_defconfig.linas-orig	2005-06-17 14:48:29.000000000 -0500
+++ linux-2.6.12-git10/arch/ppc64/configs/pSeries_defconfig	2005-06-22 15:30:33.000000000 -0500
@@ -311,6 +311,7 @@ CONFIG_SCSI_SYM53C8XX_DMA_ADDRESSING_MOD
 CONFIG_SCSI_SYM53C8XX_DEFAULT_TAGS=16
 CONFIG_SCSI_SYM53C8XX_MAX_TAGS=64
 # CONFIG_SCSI_SYM53C8XX_IOMAPPED is not set
+CONFIG_SCSI_SYM53C8XX_EEH_RECOVERY=y
 CONFIG_SCSI_IPR=y
 CONFIG_SCSI_IPR_TRACE=y
 CONFIG_SCSI_IPR_DUMP=y

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 7/13]: PCI Err: Symbios SCSI  driver recovery
  2005-06-28 23:59 [PATCH 7/13]: PCI Err: Symbios SCSI driver recovery Linas Vepstas
@ 2005-06-29  1:51 ` Benjamin Herrenschmidt
  2005-06-29 20:48   ` Linas Vepstas
  2005-06-29  3:02 ` Andi Kleen
  1 sibling, 1 reply; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2005-06-29  1:51 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: linux-kernel, long, Hidetoshi Seto, Greg KH, ak, Paul Mackerras,
	linuxppc64-dev, linux-pci, johnrose

On Tue, 2005-06-28 at 18:59 -0500, Linas Vepstas wrote:
> pci-err-7-symbios.patch
> 
> Adds PCI Error recoervy callbacks to the Symbios Sym53c8xx driver.
> Tested, seems to work well under i/o stress to one disk. Not
> stress tested under heavy i/o to multiple scsi devices.
> 
> Note the check of the pci error state flag inside an infinite
> loop inside the interrupt handler. Without this check, the 
> device can spin forever, locking up hard, long before the 
> asynchronous error event (and callbacks) are ever called. 

I don't understand the logic of that check. In general, I don't think
checking the error state is reliable at all. You may be in an interrupt
on the only CPU in the system, thus the error management code may have
no chance to update that error state field while you are looping... It
may work for us since we call the eeh stuff from the IO accessors but
will not in the generic case.

Normally, you should check for non-responding hardware by testing things
like reading all ff's or having a timeout in the loop. The bug is that
the driver has a potential infinite loop in the first place.

The only type of "synchronous" error checking that can be done is what
is proposed by Hidetoshi Seto. You could use his stuff here.

Ben.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 7/13]: PCI Err: Symbios SCSI  driver recovery
  2005-06-28 23:59 [PATCH 7/13]: PCI Err: Symbios SCSI driver recovery Linas Vepstas
  2005-06-29  1:51 ` Benjamin Herrenschmidt
@ 2005-06-29  3:02 ` Andi Kleen
  2005-06-29 16:34   ` Linas Vepstas
  1 sibling, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2005-06-29  3:02 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: linux-kernel, Benjamin Herrenschmidt, long, Hidetoshi Seto,
	Greg KH, Paul Mackerras, linuxppc64-dev, linux-pci, johnrose

On Tue, Jun 28, 2005 at 06:59:19PM -0500, Linas Vepstas wrote:
> 
> pci-err-7-symbios.patch
> 
> Adds PCI Error recoervy callbacks to the Symbios Sym53c8xx driver.
> Tested, seems to work well under i/o stress to one disk. Not
> stress tested under heavy i/o to multiple scsi devices.

What does this do to the IO requests currently being processed
by the firmware? Do they get all aborted? Is it ensured
that they all error out properly? 

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 7/13]: PCI Err: Symbios SCSI  driver recovery
  2005-06-29  3:02 ` Andi Kleen
@ 2005-06-29 16:34   ` Linas Vepstas
  2005-07-02  8:21     ` Grant Grundler
  0 siblings, 1 reply; 7+ messages in thread
From: Linas Vepstas @ 2005-06-29 16:34 UTC (permalink / raw)
  To: Andi Kleen
  Cc: linux-kernel, Benjamin Herrenschmidt, long, Hidetoshi Seto,
	Greg KH, Paul Mackerras, linuxppc64-dev, linux-pci, johnrose

On Wed, Jun 29, 2005 at 05:02:37AM +0200, Andi Kleen was heard to remark:
> On Tue, Jun 28, 2005 at 06:59:19PM -0500, Linas Vepstas wrote:
> > 
> > pci-err-7-symbios.patch
> > 
> > Adds PCI Error recoervy callbacks to the Symbios Sym53c8xx driver.
> > Tested, seems to work well under i/o stress to one disk. Not
> > stress tested under heavy i/o to multiple scsi devices.
> 
> What does this do to the IO requests currently being processed
> by the firmware? Do they get all aborted? Is it ensured
> that they all error out properly? 

Interesting question; two replies.

>From the hardware point of view, the scsi card is soft-reset, which
wipes out all state on the card, including any command queues on the
card.  In-progress transactions, e.g. disk drives in the middle of
receiving commands or in the process of responding to reads, are lost.
The freshly rebooted scsi controller may wonder why disks are suddenly
sending it data. 

This may sound alarming, but it not much different than the existing
standard/generic SCSI bus-reset/host resest sequences, which I
beleive (hope) work correctly.  In particular, there shouldn't be 
any data corrpution; here's why:

>From the kernel point of view, file system i/o goes through the block 
device, through to scsi_dispatch_cmd(), to the symbios driver.  
Any queued requests stay queued until they are fulfilled.  Queued
requests get replayed, in a fashion similar to what would be needed
after a host reset.  In particular, there shouldn't be and (permanent)
file system corruption because any inconsistent state on the disk 
would get over-written when the queued reqeusts get re-issued.

At least, that's how i think it should work.  My testing was light ...
inject errors while doing mild single-disk i/o.  Haven't run any
full stress tests, with would e.g. write patterns to multiple disks
and then read back the patterns and bit-compare.   Someday, I hope to 
run this test :) However, if this reveals bugs, I beleive these will 
be generic bugs, rather than PCI error recovery related bugs.

FWIW, yes, I have heard of devices that "cheat", and report back that a
transaction is complete, even though it is still pending in firmware
somewhere, either  on the host or the disk.  Those devices get screwed.

No doubt, this will happen to some giant banking customer, and result
in the corruption of serious financial data.  There will be hundreds 
of airplane trips as dozens of techies will be hunched over the system 
wondering "what happened" while executives fume in the corner,
threatening billion dollar lawsuits. The net output of this will be
a one-line patch to drivers/scsi/scsi_lib.c which will be lost in the
noise of the LKML.  Shit happens.

--linas



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 7/13]: PCI Err: Symbios SCSI  driver recovery
  2005-06-29  1:51 ` Benjamin Herrenschmidt
@ 2005-06-29 20:48   ` Linas Vepstas
  2005-06-29 23:41     ` Benjamin Herrenschmidt
  0 siblings, 1 reply; 7+ messages in thread
From: Linas Vepstas @ 2005-06-29 20:48 UTC (permalink / raw)
  To: Benjamin Herrenschmidt
  Cc: linux-kernel, long, Hidetoshi Seto, Greg KH, ak, Paul Mackerras,
	linuxppc64-dev, linux-pci, johnrose

On Wed, Jun 29, 2005 at 11:51:07AM +1000, Benjamin Herrenschmidt was heard to remark:
> On Tue, 2005-06-28 at 18:59 -0500, Linas Vepstas wrote:
> > pci-err-7-symbios.patch
> > 
> > Adds PCI Error recoervy callbacks to the Symbios Sym53c8xx driver.
> > Tested, seems to work well under i/o stress to one disk. Not
> > stress tested under heavy i/o to multiple scsi devices.
> > 
> > Note the check of the pci error state flag inside an infinite
> > loop inside the interrupt handler. Without this check, the 
> > device can spin forever, locking up hard, long before the 
> > asynchronous error event (and callbacks) are ever called. 
> 
> Normally, you should check for non-responding hardware by testing things
> like reading all ff's or having a timeout in the loop. 

For ppc64, that does happen in the loop, and so the flag does get
set synchronously, even on a single-cpu system.  But point taken.

> The bug is that
> the driver has a potential infinite loop in the first place.
> 
> The only type of "synchronous" error checking that can be done is what
> is proposed by Hidetoshi Seto. You could use his stuff here.

Yes. However, I will leave this bit in for now, (and mark it as a hack) 
until Seto-san's patches are on deck. I'd rather not have a built-in 
pre-req right now.

--linas

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 7/13]: PCI Err: Symbios SCSI  driver recovery
  2005-06-29 20:48   ` Linas Vepstas
@ 2005-06-29 23:41     ` Benjamin Herrenschmidt
  0 siblings, 0 replies; 7+ messages in thread
From: Benjamin Herrenschmidt @ 2005-06-29 23:41 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: linux-kernel, long, Hidetoshi Seto, Greg KH, ak, Paul Mackerras,
	linuxppc64-dev, linux-pci, johnrose

On Wed, 2005-06-29 at 15:48 -0500, Linas Vepstas wrote:

> > The only type of "synchronous" error checking that can be done is what
> > is proposed by Hidetoshi Seto. You could use his stuff here.
> 
> Yes. However, I will leave this bit in for now, (and mark it as a hack) 
> until Seto-san's patches are on deck. I'd rather not have a built-in 
> pre-req right now.

No, check for ff's where they don't make sense or add a timeout to the
loop. That's the correct solution for now.

Ben.



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 7/13]: PCI Err: Symbios SCSI  driver recovery
  2005-06-29 16:34   ` Linas Vepstas
@ 2005-07-02  8:21     ` Grant Grundler
  0 siblings, 0 replies; 7+ messages in thread
From: Grant Grundler @ 2005-07-02  8:21 UTC (permalink / raw)
  To: Linas Vepstas
  Cc: Andi Kleen, linux-kernel, Benjamin Herrenschmidt, long,
	Hidetoshi Seto, Greg KH, Paul Mackerras, linuxppc64-dev,
	linux-pci, johnrose

On Wed, Jun 29, 2005 at 11:34:08AM -0500, Linas Vepstas wrote:
...
> requests get replayed, in a fashion similar to what would be needed
> after a host reset.  In particular, there shouldn't be and (permanent)
> file system corruption because any inconsistent state on the disk 
> would get over-written when the queued reqeusts get re-issued.

FS's that require some ordering (journal) should be handling
this sort of stuff already. I have the same expectations as
Linas does WRT design. FS's that don't, will have the same sort
of problems that they would have as if the OS crashed.

> FWIW, yes, I have heard of devices that "cheat", and report back that a
> transaction is complete, even though it is still pending in firmware
> somewhere, either  on the host or the disk.  Those devices get screwed.

See "Write Cache Enabled" (aka WCE or in HPUX speak "Immediate Reporting").
WCE must be disabled if data corruption can not be tolerated.
"Desktop" (ie unix workstations) systems typically have WCE enabled
so they look good on (stupid) performance benchmarks.

The only devices that lie about WCE have battery backed RAM buffers.
(e.g. SCSI RAID *devices* - multi-LUN, dual controller beasts)

> No doubt, this will happen to some giant banking customer,

It won't happen because of WCE. None of the major HW vendors will
sell or support HW with WCE enabled. Exactly for the reasons you
point out.

grant

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2005-07-02  8:17 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2005-06-28 23:59 [PATCH 7/13]: PCI Err: Symbios SCSI driver recovery Linas Vepstas
2005-06-29  1:51 ` Benjamin Herrenschmidt
2005-06-29 20:48   ` Linas Vepstas
2005-06-29 23:41     ` Benjamin Herrenschmidt
2005-06-29  3:02 ` Andi Kleen
2005-06-29 16:34   ` Linas Vepstas
2005-07-02  8:21     ` Grant Grundler

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox