public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/1] fusion: hold off error recovery while alternate ioc is initializing
@ 2009-12-16 21:20 Michael Reed
  0 siblings, 0 replies; 6+ messages in thread
From: Michael Reed @ 2009-12-16 21:20 UTC (permalink / raw)
  To: linux-scsi; +Cc: Moore, Eric, Jeremy Higdon, Robin Holt, kashyap.desai

Fusion FC chips are two function with some shared resources.  During
initialization of one function its driver inhibits the ability of the
other function's driver to allocate message frames by clearing its
"active" flag.  Should mid-layer error recovery be initiated for a
scsi command during this initialization (which can take many seconds)
error recovery will escalate to the level of host reset.  This host
reset might fail resulting in all connected targets being taken offline.

This patch holds off mid-layer error recovery for up to 60 seconds
to permit initialization of the other function to complete.

Applies to scsi-misc.

Signed-off-by: Michael Reed <mdr@sgi.com>

==

--- a/drivers/message/fusion/mptfc.c	2009-12-16 15:09:22.817382765 -0600
+++ b/drivers/message/fusion/mptfc.c	2009-12-16 15:10:31.949380663 -0600
@@ -195,29 +195,31 @@ mptfc_block_error_handler(struct scsi_cm
 	unsigned long		flags;
 	int			ready;
 	MPT_ADAPTER 		*ioc;
+	int			sleep_interval = 1000;
+	int			loops = 60 * sleep_interval;
 
 	hd = shost_priv(SCpnt->device->host);
 	ioc = hd->ioc;
 	spin_lock_irqsave(shost->host_lock, flags);
-	while ((ready = fc_remote_port_chkready(rport) >> 16) == DID_IMM_RETRY) {
+
+	while ((loops > 0 && ioc->active == 0)
+	 || (ready = fc_remote_port_chkready(rport) >> 16) == DID_IMM_RETRY) {
+
 		spin_unlock_irqrestore(shost->host_lock, flags);
-		dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
-			"mptfc_block_error_handler.%d: %d:%d, port status is "
-			"DID_IMM_RETRY, deferring %s recovery.\n",
-			ioc->name, ioc->sh->host_no,
-			SCpnt->device->id, SCpnt->device->lun, caller));
-		msleep(1000);
+		msleep(sleep_interval);
+		loops -= sleep_interval;
 		spin_lock_irqsave(shost->host_lock, flags);
 	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
-	if (ready == DID_NO_CONNECT || !SCpnt->device->hostdata) {
+	if (ioc->active == 0
+	 || ready == DID_NO_CONNECT || !SCpnt->device->hostdata) {
 		dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
-			"%s.%d: %d:%d, failing recovery, "
+			"%s.%d: %d:%d, failing recovery, active %d, "
 			"port state %d, vdevice %p.\n", caller,
 			ioc->name, ioc->sh->host_no,
-			SCpnt->device->id, SCpnt->device->lun, ready,
-			SCpnt->device->hostdata));
+			SCpnt->device->id, SCpnt->device->lun,
+			ioc->active, ready, SCpnt->device->hostdata));
 		return FAILED;
 	}
 	dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT

^ permalink raw reply	[flat|nested] 6+ messages in thread
* [PATCH 1/1] fusion: hold off error recovery while alternate ioc is initializing
@ 2010-02-10 20:32 Michael Reed
  2010-02-11  9:27 ` Desai, Kashyap
  0 siblings, 1 reply; 6+ messages in thread
From: Michael Reed @ 2010-02-10 20:32 UTC (permalink / raw)
  To: linux-scsi, kashyap.desai, Prakash, Sathya
  Cc: Moore, Eric, Jeremy Higdon, Robin Holt

After discussing this patch with LSI, I resubmitting with a recommended
40 second wait for the alternate ioc's initialization to complete.
--
Fusion FC chips are two function with some shared resources.  During
initialization of one function its driver inhibits the ability of the
other function's driver to allocate message frames by clearing its
"active" flag.  Should mid-layer error recovery be initiated for a
scsi command during this initialization (which can take up to 40 seconds)
error recovery will escalate to the level of host reset.  This host
reset might fail (as the other function is resetting) resulting in
all connected targets being taken offline.

This patch holds off mid-layer error recovery for up to 40 seconds
to permit initialization of the other function to complete.

Applies to scsi-misc.

Signed-off-by: Michael Reed <mdr@sgi.com>

==

--- scsi-misc-2.6/drivers/message/fusion/mptfc.c	2010-02-08 11:19:47.000000000 -0600
+++ scsi-misc-2.6-2010_02_08-modified/drivers/message/fusion/mptfc.c	2010-02-10 12:40:23.184510802 -0600
@@ -195,29 +195,34 @@ mptfc_block_error_handler(struct scsi_cm
 	unsigned long		flags;
 	int			ready;
 	MPT_ADAPTER 		*ioc;
+	int			loops = 40;	/* seconds */
 
 	hd = shost_priv(SCpnt->device->host);
 	ioc = hd->ioc;
 	spin_lock_irqsave(shost->host_lock, flags);
-	while ((ready = fc_remote_port_chkready(rport) >> 16) == DID_IMM_RETRY) {
+	while ((ready = fc_remote_port_chkready(rport) >> 16) == DID_IMM_RETRY
+	 || (loops > 0 && ioc->active == 0)) {
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
 			"mptfc_block_error_handler.%d: %d:%d, port status is "
-			"DID_IMM_RETRY, deferring %s recovery.\n",
+			"%x, active flag %d, deferring %s recovery.\n",
 			ioc->name, ioc->sh->host_no,
-			SCpnt->device->id, SCpnt->device->lun, caller));
+			SCpnt->device->id, SCpnt->device->lun,
+			ready, ioc->active, caller));
 		msleep(1000);
 		spin_lock_irqsave(shost->host_lock, flags);
+		loops --;
 	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
-	if (ready == DID_NO_CONNECT || !SCpnt->device->hostdata) {
+	if (ready == DID_NO_CONNECT || !SCpnt->device->hostdata
+	 || ioc->active == 0) {
 		dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT
 			"%s.%d: %d:%d, failing recovery, "
-			"port state %d, vdevice %p.\n", caller,
+			"port state %x, active %d, vdevice %p.\n", caller,
 			ioc->name, ioc->sh->host_no,
 			SCpnt->device->id, SCpnt->device->lun, ready,
-			SCpnt->device->hostdata));
+			ioc->active, SCpnt->device->hostdata));
 		return FAILED;
 	}
 	dfcprintk (ioc, printk(MYIOC_s_DEBUG_FMT

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2010-02-11 18:11 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-12-16 21:20 [PATCH 1/1] fusion: hold off error recovery while alternate ioc is initializing Michael Reed
  -- strict thread matches above, loose matches on Subject: below --
2010-02-10 20:32 Michael Reed
2010-02-11  9:27 ` Desai, Kashyap
2010-02-11 17:53   ` Bernd Schubert
2010-02-11 18:01     ` James Bottomley
2010-02-11 18:11       ` Bernd Schubert

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox