public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
From: James Bottomley <James.Bottomley@SteelEye.com>
To: Alan Stern <stern@rowland.harvard.edu>
Cc: SCSI development list <linux-scsi@vger.kernel.org>
Subject: Re: [PATCH 1/5] SCSI scanning and removal fixes
Date: Wed, 07 Sep 2005 17:05:32 -0500	[thread overview]
Message-ID: <1126130732.4823.57.camel@mulgrave> (raw)
In-Reply-To: <1126123089.4823.48.camel@mulgrave>

On Wed, 2005-09-07 at 14:58 -0500, James Bottomley wrote:
> On Wed, 2005-09-07 at 14:27 -0400, Alan Stern wrote:
> > I'm going to argue strongly about this.  scsi_remove_host should _not_
> > wait for error recovery to complete -- to do so will invite deadlocks.  
> > (Suppose the error handler is waiting for a bus reset, but the bus reset
> > routine requires a semaphore held by the LLD during the call to
> > scsi_remove_host?)  Furthermore, error recovery can potentially take quite
> > a long time -- much longer than we want to wait during a removal event.  
> > Instead, the error handler should not be allowed to make the transition to
> > RUNNING once the removal has started.
> 
> I agree (about the deadlocks).  However, as things stand RECOVERY is a
> state in the model and the model can only be in a single state.  If you
> permit the transition, and recovery is going on in parallel with
> removal, they'll race to set the final state (removal wants DEL and the
> eh thread will set it to RUNNING).
> 
> Either we go back to having an in_recovery flag (i.e. lift recovery out
> of the state model) or we make the model more complex to cope with this.
> Since really the only thing we test is in_recovery, we could do a more
> complex model; something like:

OK, try this as an implementation of that model (except I junked the DEL
-> DEL_RECOVERY path).

There's a few nasties in this (notably that timed out commands will be
finished without error recovery from the DEL state).

James

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -98,6 +98,7 @@ int scsi_host_set_state(struct Scsi_Host
 		switch (oldstate) {
 		case SHOST_CREATED:
 		case SHOST_RUNNING:
+		case SHOST_CANCEL_RECOVERY:
 			break;
 		default:
 			goto illegal;
@@ -107,12 +108,31 @@ int scsi_host_set_state(struct Scsi_Host
 	case SHOST_DEL:
 		switch (oldstate) {
 		case SHOST_CANCEL:
+		case SHOST_DEL_RECOVERY:
 			break;
 		default:
 			goto illegal;
 		}
 		break;
 
+	case SHOST_CANCEL_RECOVERY:
+		switch (oldstate) {
+		case SHOST_CANCEL:
+		case SHOST_RECOVERY:
+			break;
+		default:
+			goto illegal;
+		}
+		break;
+
+	case SHOST_DEL_RECOVERY:
+		switch (oldstate) {
+		case SHOST_CANCEL_RECOVERY:
+			break;
+		default:
+			goto illegal;
+		}
+		break;
 	}
 	shost->shost_state = state;
 	return 0;
@@ -135,12 +155,17 @@ EXPORT_SYMBOL(scsi_host_set_state);
 void scsi_remove_host(struct Scsi_Host *shost)
 {
 	down(&shost->scan_mutex);
-	scsi_host_set_state(shost, SHOST_CANCEL);
+	if (!scsi_host_set_state(shost, SHOST_CANCEL))
+		if (!scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY)) {
+			up(&shost->scan_mutex);
+			return;
+		}
 	up(&shost->scan_mutex);
 	scsi_forget_host(shost);
 	scsi_proc_host_rm(shost);
 
-	scsi_host_set_state(shost, SHOST_DEL);
+	if (!scsi_host_set_state(shost, SHOST_DEL))
+		BUG_ON(!scsi_host_set_state(shost, SHOST_DEL_RECOVERY));
 
 	transport_unregister_device(&shost->shost_gendev);
 	class_device_unregister(&shost->shost_classdev);
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -68,19 +68,24 @@ int scsi_eh_scmd_add(struct scsi_cmnd *s
 {
 	struct Scsi_Host *shost = scmd->device->host;
 	unsigned long flags;
+	int ret = 0;
 
 	if (shost->eh_wait == NULL)
 		return 0;
 
 	spin_lock_irqsave(shost->host_lock, flags);
+	if (!scsi_host_set_state(shost, SHOST_RECOVERY))
+		if (!scsi_host_set_state(shost, SHOST_CANCEL_RECOVERY))
+			goto out_unlock;
 
+	ret = 1;
 	scmd->eh_eflags |= eh_flag;
 	list_add_tail(&scmd->eh_entry, &shost->eh_cmd_q);
-	scsi_host_set_state(shost, SHOST_RECOVERY);
 	shost->host_failed++;
 	scsi_eh_wakeup(shost);
+ out_unlock:
 	spin_unlock_irqrestore(shost->host_lock, flags);
-	return 1;
+	return ret;
 }
 
 /**
@@ -176,8 +181,8 @@ void scsi_times_out(struct scsi_cmnd *sc
 		}
 
 	if (unlikely(!scsi_eh_scmd_add(scmd, SCSI_EH_CANCEL_CMD))) {
-		panic("Error handler thread not present at %p %p %s %d",
-		      scmd, scmd->device->host, __FILE__, __LINE__);
+		scmd->result |= DID_TIME_OUT << 16;
+		__scsi_done(scmd);
 	}
 }
 
@@ -196,8 +201,7 @@ int scsi_block_when_processing_errors(st
 {
 	int online;
 
-	wait_event(sdev->host->host_wait, (sdev->host->shost_state !=
-					   SHOST_RECOVERY));
+	wait_event(sdev->host->host_wait, !scsi_host_in_recovery(sdev->host));
 
 	online = scsi_device_online(sdev);
 
@@ -1460,7 +1464,9 @@ static void scsi_restart_operations(stru
 	SCSI_LOG_ERROR_RECOVERY(3, printk("%s: waking up host to restart\n",
 					  __FUNCTION__));
 
-	scsi_host_set_state(shost, SHOST_RUNNING);
+	if (!scsi_host_set_state(shost, SHOST_RUNNING))
+		if (!scsi_host_set_state(shost, SHOST_CANCEL))
+			BUG_ON(!scsi_host_set_state(shost, SHOST_DEL));
 
 	wake_up(&shost->host_wait);
 
diff --git a/drivers/scsi/scsi_ioctl.c b/drivers/scsi/scsi_ioctl.c
--- a/drivers/scsi/scsi_ioctl.c
+++ b/drivers/scsi/scsi_ioctl.c
@@ -458,7 +458,7 @@ int scsi_nonblockable_ioctl(struct scsi_
 	 * error processing, as long as the device was opened
 	 * non-blocking */
 	if (filp && filp->f_flags & O_NONBLOCK) {
-		if (sdev->host->shost_state == SHOST_RECOVERY)
+		if (scsi_host_in_recovery(sdev->host))
 			return -ENODEV;
 	} else if (!scsi_block_when_processing_errors(sdev))
 		return -ENODEV;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -424,7 +424,7 @@ void scsi_device_unbusy(struct scsi_devi
 
 	spin_lock_irqsave(shost->host_lock, flags);
 	shost->host_busy--;
-	if (unlikely((shost->shost_state == SHOST_RECOVERY) &&
+	if (unlikely(scsi_host_in_recovery(shost) &&
 		     shost->host_failed))
 		scsi_eh_wakeup(shost);
 	spin_unlock(shost->host_lock);
@@ -1306,7 +1306,7 @@ static inline int scsi_host_queue_ready(
 				   struct Scsi_Host *shost,
 				   struct scsi_device *sdev)
 {
-	if (shost->shost_state == SHOST_RECOVERY)
+	if (scsi_host_in_recovery(shost))
 		return 0;
 	if (shost->host_busy == 0 && shost->host_blocked) {
 		/*
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -57,6 +57,8 @@ static struct {
 	{ SHOST_CANCEL, "cancel" },
 	{ SHOST_DEL, "deleted" },
 	{ SHOST_RECOVERY, "recovery" },
+	{ SHOST_CANCEL_RECOVERY, "cancel/recovery" },
+	{ SHOST_DEL_RECOVERY, "deleted/recovery", },
 };
 const char *scsi_host_state_name(enum scsi_host_state state)
 {
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1027,7 +1027,7 @@ sg_ioctl(struct inode *inode, struct fil
 		if (sdp->detached)
 			return -ENODEV;
 		if (filp->f_flags & O_NONBLOCK) {
-			if (sdp->device->host->shost_state == SHOST_RECOVERY)
+			if (scsi_host_in_recovery(sdp->device->host))
 				return -EBUSY;
 		} else if (!scsi_block_when_processing_errors(sdp->device))
 			return -EBUSY;
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -439,6 +439,8 @@ enum scsi_host_state {
 	SHOST_CANCEL,
 	SHOST_DEL,
 	SHOST_RECOVERY,
+	SHOST_CANCEL_RECOVERY,
+	SHOST_DEL_RECOVERY,
 };
 
 struct Scsi_Host {
@@ -621,6 +623,13 @@ static inline struct Scsi_Host *dev_to_s
 	return container_of(dev, struct Scsi_Host, shost_gendev);
 }
 
+static inline int scsi_host_in_recovery(struct Scsi_Host *shost)
+{
+	return shost->shost_state == SHOST_RECOVERY ||
+		shost->shost_state == SHOST_CANCEL_RECOVERY ||
+		shost->shost_state == SHOST_DEL_RECOVERY;
+}
+
 extern int scsi_queue_work(struct Scsi_Host *, struct work_struct *);
 extern void scsi_flush_work(struct Scsi_Host *);
 



  reply	other threads:[~2005-09-07 22:05 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2005-07-26 14:12 [PATCH 1/5] SCSI scanning and removal fixes Alan Stern
2005-09-07 15:16 ` James Bottomley
2005-09-07 18:27   ` Alan Stern
2005-09-07 18:37     ` Luben Tuikov
2005-09-07 18:42     ` Luben Tuikov
2005-09-07 19:31       ` Alan Stern
2005-09-07 20:00         ` Mike Anderson
2005-09-07 20:43         ` Luben Tuikov
2005-09-07 21:34           ` Stefan Richter
2005-09-08 15:19           ` Alan Stern
2005-09-08 16:07             ` Luben Tuikov
2005-09-08 18:36               ` Alan Stern
2005-09-08 23:59                 ` Luben Tuikov
2005-09-09 14:44                   ` Alan Stern
2005-09-09 17:08                   ` Stefan Richter
2005-09-09 17:15                     ` Luben Tuikov
2005-09-07 19:58     ` James Bottomley
2005-09-07 22:05       ` James Bottomley [this message]
2005-09-08 15:59       ` Alan Stern
2005-09-08 16:15         ` James Bottomley
2005-09-08 18:58           ` Alan Stern
2005-09-08 20:15             ` James Bottomley
2005-09-09  0:18               ` Luben Tuikov
2005-09-09 14:16               ` Alan Stern
2005-09-09 14:44                 ` James Bottomley
2005-09-09 15:16                   ` Alan Stern
2005-09-09 15:37                     ` James Bottomley
2005-09-09 16:17                       ` Alan Stern
2005-09-09 16:47                         ` Mike Anderson
2005-09-08 16:08       ` Alan Stern

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1126130732.4823.57.camel@mulgrave \
    --to=james.bottomley@steeleye.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=stern@rowland.harvard.edu \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox