[RFC PATCH 4/7] fc class: don't return from fc_block_scsi_eh until IO has been cleaned up

linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: michaelc@cs.wisc.edu
To: linux-scsi@vger.kernel.org
Cc: Mike Christie <michaelc@cs.wisc.edu>
Subject: [RFC PATCH 4/7] fc class: don't return from fc_block_scsi_eh until IO has been cleaned up
Date: Thu, 23 Sep 2010 00:17:22 -0500	[thread overview]
Message-ID: <1285219045-14645-5-git-send-email-michaelc@cs.wisc.edu> (raw)
In-Reply-To: <1285219045-14645-1-git-send-email-michaelc@cs.wisc.edu>

From: Mike Christie <michaelc@cs.wisc.edu>

If a lld does:

        ret = fc_block_scsi_eh(cmnd);
        if (ret)
                return ret;

in the eh callbacks, then it could cause the following race:

1 the LLD will call fc_block_scsi_eh from the scsi eh thread.
2 From the FC class thread, the fast io fail tmo will fire and set
FC_RPORT_FAST_FAIL_TIMEDOUT, then begin to call terminate_rport_io.
3 The scsi eh thread and the LLD will then break from the
fc_block_scsi_eh block and will return FAST_IO_FAIL.
4 The scsi eh will then assume it owns the command and will start to
process it. It will call scsi_eh_flush_done_q which might fail it or
retry it.
5 But then in the FC class thread, the LLD terminate_rport_io callback
could be processing the IO and possibly accessing a scsi_cmnd struct
that the scsi eh thread has now started to retry or failed and
reallocated to a new request in #4.

This patch has fc_block_scsi_eh wait until the terminate_rport_io
callback has completed before returning. This allows LLDs to not
have to worry about the race.

Signed-off-by: Mike Christie <michaelc@cs.wisc.edu>
---
 drivers/scsi/scsi_transport_fc.c |   54 ++++++++++++++++++++++++++++---------
 include/scsi/scsi_transport_fc.h |    1 +
 2 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index a15e815..93a8edc 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -1575,6 +1575,7 @@ store_fc_private_host_tgtid_bind_type(struct device *dev,
 				&fc_host_rport_bindings(shost), peers);
 			list_del(&rport->peers);
 			rport->port_state = FC_PORTSTATE_DELETED;
+			rport->flags |= FC_RPORT_TERMINATING_RPORT;
 			fc_queue_work(shost, &rport->rport_delete_work);
 		}
 		spin_unlock_irqrestore(shost->host_lock, flags);
@@ -2316,6 +2317,7 @@ fc_remove_host(struct Scsi_Host *shost)
 			&fc_host->rports, peers) {
 		list_del(&rport->peers);
 		rport->port_state = FC_PORTSTATE_DELETED;
+		rport->flags |= FC_RPORT_TERMINATING_RPORT;
 		fc_queue_work(shost, &rport->rport_delete_work);
 	}
 
@@ -2323,6 +2325,7 @@ fc_remove_host(struct Scsi_Host *shost)
 			&fc_host->rport_bindings, peers) {
 		list_del(&rport->peers);
 		rport->port_state = FC_PORTSTATE_DELETED;
+		rport->flags |= FC_RPORT_TERMINATING_RPORT;
 		fc_queue_work(shost, &rport->rport_delete_work);
 	}
 
@@ -2351,11 +2354,20 @@ static void fc_terminate_rport_io(struct fc_rport *rport)
 {
 	struct Scsi_Host *shost = rport_to_shost(rport);
 	struct fc_internal *i = to_fc_internal(shost->transportt);
+	unsigned long flags;
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	rport->flags |= FC_RPORT_TERMINATING_RPORT;
+	spin_unlock_irqrestore(shost->host_lock, flags);
 
 	/* Involve the LLDD if possible to terminate all io on the rport. */
 	if (i->f->terminate_rport_io)
 		i->f->terminate_rport_io(rport);
 
+	spin_lock_irqsave(shost->host_lock, flags);
+	rport->flags &= ~FC_RPORT_TERMINATING_RPORT;
+	spin_unlock_irqrestore(shost->host_lock, flags);
+
 	/*
 	 * must unblock to flush queued IO. The caller will have set
 	 * the port_state or flags, so that fc_remote_port_chkready will
@@ -2696,7 +2708,8 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 
 				rport->flags &= ~(FC_RPORT_FAST_FAIL_TIMEDOUT |
 						  FC_RPORT_DEVLOSS_PENDING |
-						  FC_RPORT_DEVLOSS_CALLBK_DONE);
+						  FC_RPORT_DEVLOSS_CALLBK_DONE |
+						  FC_RPORT_TERMINATING_RPORT);
 
 				/* if target, initiate a scan */
 				if (rport->scsi_target_id != -1) {
@@ -2773,8 +2786,8 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel,
 				sizeof(rport->port_name));
 			rport->port_id = ids->port_id;
 			rport->roles = ids->roles;
-			rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
-
+			rport->flags &= ~(FC_RPORT_FAST_FAIL_TIMEDOUT |
+					  FC_RPORT_TERMINATING_RPORT);
 			if (fci->f->dd_fcrport_size)
 				memset(rport->dd_data, 0,
 						fci->f->dd_fcrport_size);
@@ -2975,7 +2988,8 @@ fc_remote_port_rolechg(struct fc_rport  *rport, u32 roles)
 
 		spin_lock_irqsave(shost->host_lock, flags);
 		rport->flags &= ~(FC_RPORT_FAST_FAIL_TIMEDOUT |
-				  FC_RPORT_DEVLOSS_PENDING);
+				  FC_RPORT_DEVLOSS_PENDING |
+				  FC_RPORT_TERMINATING_RPORT);
 		spin_unlock_irqrestore(shost->host_lock, flags);
 
 		/* initiate a scan of the target */
@@ -3041,6 +3055,7 @@ fc_timeout_deleted_rport(struct work_struct *work)
 	    (rport->scsi_target_id == -1)) {
 		list_del(&rport->peers);
 		rport->port_state = FC_PORTSTATE_DELETED;
+		rport->flags |= FC_RPORT_TERMINATING_RPORT;
 		dev_printk(KERN_ERR, &rport->dev,
 			"blocked FC remote port time out: removing"
 			" rport%s\n",
@@ -3070,6 +3085,12 @@ fc_timeout_deleted_rport(struct work_struct *work)
 	rport->roles = FC_PORT_ROLE_UNKNOWN;
 	rport->port_state = FC_PORTSTATE_NOTPRESENT;
 	rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT;
+	/*
+	 * We changed the port_state and are going to drop the lock, so
+	 * we set this now because we want fc_block_scsi_eh to stay blocked
+	 * until terminate_rport_io has completed.
+	 */
+	rport->flags |= FC_RPORT_TERMINATING_RPORT;
 
 	/*
 	 * Pre-emptively kill I/O rather than waiting for the work queue
@@ -3137,12 +3158,17 @@ fc_timeout_fail_rport_io(struct work_struct *work)
 {
 	struct fc_rport *rport =
 		container_of(work, struct fc_rport, fail_io_work.work);
+	struct Scsi_Host *shost = rport_to_shost(rport);
+	unsigned long flags;
 
 	if (rport->port_state != FC_PORTSTATE_BLOCKED)
 		return;
 
-	rport->flags |= FC_RPORT_FAST_FAIL_TIMEDOUT;
 	fc_terminate_rport_io(rport);
+
+	spin_lock_irqsave(shost->host_lock, flags);
+	rport->flags |= FC_RPORT_FAST_FAIL_TIMEDOUT;
+	spin_unlock_irqrestore(shost->host_lock, flags);
 }
 
 /**
@@ -3176,9 +3202,10 @@ fc_scsi_scan_rport(struct work_struct *work)
  *
  * This routine can be called from a FC LLD scsi_eh callback. It
  * blocks the scsi_eh thread until the fc_rport leaves the
- * FC_PORTSTATE_BLOCKED, or the fast_io_fail_tmo fires. This is
- * necessary to avoid the scsi_eh failing recovery actions for blocked
- * rports which would lead to offlined SCSI devices.
+ * FC_PORTSTATE_BLOCKED, or the fast_io_fail_tmo fires and the IO
+ * on the rport has been terminated with the terminate_port_io callback.
+ * This is necessary to avoid the scsi_eh failing recovery actions for
+ * blocked rports which would lead to offlined SCSI devices.
  *
  * Returns: 0 if the fc_rport left the state FC_PORTSTATE_BLOCKED.
  *	    TRANSPORT_FAILED if the fast_io_fail_tmo fired, this should be
@@ -3191,18 +3218,19 @@ int fc_block_scsi_eh(struct scsi_cmnd *cmnd)
 	unsigned long flags;
 
 	spin_lock_irqsave(shost->host_lock, flags);
-	while (rport->port_state == FC_PORTSTATE_BLOCKED &&
-	       !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) {
+	while ((rport->port_state == FC_PORTSTATE_BLOCKED &&
+	       !(rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)) ||
+	       rport->flags & FC_RPORT_TERMINATING_RPORT) {
 		spin_unlock_irqrestore(shost->host_lock, flags);
 		msleep(1000);
 		spin_lock_irqsave(shost->host_lock, flags);
 	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
-	if (rport->flags & FC_RPORT_FAST_FAIL_TIMEDOUT)
-		return TRANSPORT_FAILED;
+	if (rport->port_state == FC_PORTSTATE_ONLINE)
+		return 0;
 
-	return 0;
+	return TRANSPORT_FAILED;
 }
 EXPORT_SYMBOL(fc_block_scsi_eh);
 
diff --git a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
index 9f98fca..f392570 100644
--- a/include/scsi/scsi_transport_fc.h
+++ b/include/scsi/scsi_transport_fc.h
@@ -359,6 +359,7 @@ struct fc_rport {	/* aka fc_starget_attrs */
 #define FC_RPORT_SCAN_PENDING		0x02
 #define FC_RPORT_FAST_FAIL_TIMEDOUT	0x04
 #define FC_RPORT_DEVLOSS_CALLBK_DONE	0x08
+#define FC_RPORT_TERMINATING_RPORT	0x10
 
 #define	dev_to_rport(d)				\
 	container_of(d, struct fc_rport, dev)
-- 
1.7.2.2

next prev parent reply	other threads:[~2010-09-23  5:11 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-09-23  5:17 [RFC] FC class: misc fixes michaelc
2010-09-23  5:17 ` [RFC PATCH 1/7] fc class: fix rport re-add dev_loss handling race michaelc
2010-09-23  5:17 ` [RFC PATCH 2/7] fc class: remove fc_flush_work in fc_remote_port_add michaelc
2010-09-23  5:17 ` [RFC PATCH 3/7] scsi error: rename FAST_IO_FAIL to TRANSPORT_FAILED michaelc
2010-09-23  5:17 ` michaelc [this message]
2010-09-23  5:47   ` [RFC PATCH 4/7] fc class: don't return from fc_block_scsi_eh until IO has been cleaned up Mike Christie
2010-09-23  7:18     ` Hannes Reinecke
2010-09-23  5:17 ` [RFC PATCH 5/7] libfc: hook scsi eh into fc_block_scsi_eh michaelc
2010-09-23  5:17 ` [RFC PATCH 6/7] fnic: " michaelc
2010-09-23  5:37   ` Mike Christie
2010-09-23  5:17 ` [RFC PATCH 7/7] qla2xxx: " michaelc

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:a15e815 dfblob:93a8edc dfblob:9f98fca dfblob:f392570 )
 OR (
bs:"[RFC PATCH 4/7] fc class: don't return from fc_block_scsi_eh until IO has been cleaned up" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1285219045-14645-5-git-send-email-michaelc@cs.wisc.edu \
    --to=michaelc@cs.wisc.edu \
    --cc=linux-scsi@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).