public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
From: Robert Love <robert.w.love@intel.com>
To: James.Bottomley@HansenPartnership.com, linux-scsi@vger.kernel.org
Cc: Joe Eykholt <jeykholt@cisco.com>, Robert Love <robert.w.love@intel.com>
Subject: [PATCH 20/20] libfc: fix free of fc_rport_priv with timer pending
Date: Wed, 21 Oct 2009 16:28:30 -0700	[thread overview]
Message-ID: <20091021232830.12986.88095.stgit@localhost.localdomain> (raw)
In-Reply-To: <20091021232640.12986.79205.stgit@localhost.localdomain>

From: Joe Eykholt <jeykholt@cisco.com>

Timer crashes were caused by freeing a struct fc_rport_priv
with a timer pending, causing the timer facility list to be
corrupted.  This was during FC uplink flap tests with a lot
of targets.

After discovery, we were doing an PLOGI on an rdata that was
in DELETE state but not yet removed from the lookup list.
This moved the rdata from DELETE state to PLOGI state.
If the PLOGI exchange allocation failed and needed to be
retried, the timer scheduling could race with the free
being done by fc_rport_work().

When fc_rport_login() is called on a rport in DELETE state,
move it to a new state RESTART.  In fc_rport_work, when
handling a LOGO, STOPPED or FAILED event, look for restart
state.  In the RESTART case, don't take the rdata off the
list and after the transport remote port is deleted and
exchanges are reset, re-login to the remote port.

Note that the new RESTART state also corrects a problem we
had when re-discovering a port that had moved to DELETE state.
In that case, a new rdata was created, but the old rdata
would do an exchange manager reset affecting the FC_ID
for both the new rdata and old rdata.  With the new state,
the new port isn't logged into until after any old exchanges
are reset.

Signed-off-by: Joe Eykholt <jeykholt@cisco.com>
Signed-off-by: Robert Love <robert.w.love@intel.com>
---

 drivers/scsi/libfc/fc_rport.c |   69 ++++++++++++++++++++++++++++++-----------
 include/scsi/libfc.h          |    1 +
 2 files changed, 51 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c
index 49abb83..324e156 100644
--- a/drivers/scsi/libfc/fc_rport.c
+++ b/drivers/scsi/libfc/fc_rport.c
@@ -86,6 +86,7 @@ static const char *fc_rport_state_names[] = {
 	[RPORT_ST_LOGO] = "LOGO",
 	[RPORT_ST_ADISC] = "ADISC",
 	[RPORT_ST_DELETE] = "Delete",
+	[RPORT_ST_RESTART] = "Restart",
 };
 
 /**
@@ -99,8 +100,7 @@ static struct fc_rport_priv *fc_rport_lookup(const struct fc_lport *lport,
 	struct fc_rport_priv *rdata;
 
 	list_for_each_entry(rdata, &lport->disc.rports, peers)
-		if (rdata->ids.port_id == port_id &&
-		    rdata->rp_state != RPORT_ST_DELETE)
+		if (rdata->ids.port_id == port_id)
 			return rdata;
 	return NULL;
 }
@@ -235,6 +235,7 @@ static void fc_rport_work(struct work_struct *work)
 	struct fc_rport_operations *rport_ops;
 	struct fc_rport_identifiers ids;
 	struct fc_rport *rport;
+	int restart = 0;
 
 	mutex_lock(&rdata->rp_mutex);
 	event = rdata->event;
@@ -287,8 +288,19 @@ static void fc_rport_work(struct work_struct *work)
 		mutex_unlock(&rdata->rp_mutex);
 
 		if (port_id != FC_FID_DIR_SERV) {
+			/*
+			 * We must drop rp_mutex before taking disc_mutex.
+			 * Re-evaluate state to allow for restart.
+			 * A transition to RESTART state must only happen
+			 * while disc_mutex is held and rdata is on the list.
+			 */
 			mutex_lock(&lport->disc.disc_mutex);
-			list_del(&rdata->peers);
+			mutex_lock(&rdata->rp_mutex);
+			if (rdata->rp_state == RPORT_ST_RESTART)
+				restart = 1;
+			else
+				list_del(&rdata->peers);
+			mutex_unlock(&rdata->rp_mutex);
 			mutex_unlock(&lport->disc.disc_mutex);
 		}
 
@@ -312,7 +324,13 @@ static void fc_rport_work(struct work_struct *work)
 			mutex_unlock(&rdata->rp_mutex);
 			fc_remote_port_delete(rport);
 		}
-		kref_put(&rdata->kref, lport->tt.rport_destroy);
+		if (restart) {
+			mutex_lock(&rdata->rp_mutex);
+			FC_RPORT_DBG(rdata, "work restart\n");
+			fc_rport_enter_plogi(rdata);
+			mutex_unlock(&rdata->rp_mutex);
+		} else
+			kref_put(&rdata->kref, lport->tt.rport_destroy);
 		break;
 
 	default:
@@ -342,6 +360,12 @@ int fc_rport_login(struct fc_rport_priv *rdata)
 		FC_RPORT_DBG(rdata, "ADISC port\n");
 		fc_rport_enter_adisc(rdata);
 		break;
+	case RPORT_ST_RESTART:
+		break;
+	case RPORT_ST_DELETE:
+		FC_RPORT_DBG(rdata, "Restart deleted port\n");
+		fc_rport_state_enter(rdata, RPORT_ST_RESTART);
+		break;
 	default:
 		FC_RPORT_DBG(rdata, "Login to port\n");
 		fc_rport_enter_plogi(rdata);
@@ -397,20 +421,21 @@ int fc_rport_logoff(struct fc_rport_priv *rdata)
 
 	if (rdata->rp_state == RPORT_ST_DELETE) {
 		FC_RPORT_DBG(rdata, "Port in Delete state, not removing\n");
-		mutex_unlock(&rdata->rp_mutex);
 		goto out;
 	}
 
-	fc_rport_enter_logo(rdata);
+	if (rdata->rp_state == RPORT_ST_RESTART)
+		FC_RPORT_DBG(rdata, "Port in Restart state, deleting\n");
+	else
+		fc_rport_enter_logo(rdata);
 
 	/*
 	 * Change the state to Delete so that we discard
 	 * the response.
 	 */
 	fc_rport_enter_delete(rdata, RPORT_EV_STOP);
-	mutex_unlock(&rdata->rp_mutex);
-
 out:
+	mutex_unlock(&rdata->rp_mutex);
 	return 0;
 }
 
@@ -466,6 +491,7 @@ static void fc_rport_timeout(struct work_struct *work)
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
 	case RPORT_ST_DELETE:
+	case RPORT_ST_RESTART:
 		break;
 	}
 
@@ -499,6 +525,7 @@ static void fc_rport_error(struct fc_rport_priv *rdata, struct fc_frame *fp)
 		fc_rport_enter_logo(rdata);
 		break;
 	case RPORT_ST_DELETE:
+	case RPORT_ST_RESTART:
 	case RPORT_ST_READY:
 	case RPORT_ST_INIT:
 		break;
@@ -1248,6 +1275,7 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 		}
 		break;
 	case RPORT_ST_PRLI:
+	case RPORT_ST_RTV:
 	case RPORT_ST_READY:
 	case RPORT_ST_ADISC:
 		FC_RPORT_DBG(rdata, "Received PLOGI in logged-in state %d "
@@ -1255,11 +1283,14 @@ static void fc_rport_recv_plogi_req(struct fc_lport *lport,
 		/* XXX TBD - should reset */
 		break;
 	case RPORT_ST_DELETE:
-	default:
-		FC_RPORT_DBG(rdata, "Received PLOGI in unexpected state %d\n",
-			     rdata->rp_state);
-		fc_frame_free(rx_fp);
-		goto out;
+	case RPORT_ST_LOGO:
+	case RPORT_ST_RESTART:
+		FC_RPORT_DBG(rdata, "Received PLOGI in state %s - send busy\n",
+			     fc_rport_state(rdata));
+		mutex_unlock(&rdata->rp_mutex);
+		rjt_data.reason = ELS_RJT_BUSY;
+		rjt_data.explan = ELS_EXPL_NONE;
+		goto reject;
 	}
 
 	/*
@@ -1510,14 +1541,14 @@ static void fc_rport_recv_logo_req(struct fc_lport *lport,
 		FC_RPORT_DBG(rdata, "Received LOGO request while in state %s\n",
 			     fc_rport_state(rdata));
 
+		fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
+
 		/*
-		 * If the remote port was created due to discovery,
-		 * log back in.  It may have seen a stale RSCN about us.
+		 * If the remote port was created due to discovery, set state
+		 * to log back in.  It may have seen a stale RSCN about us.
 		 */
-		if (rdata->rp_state != RPORT_ST_DELETE && rdata->disc_id)
-			fc_rport_enter_plogi(rdata);
-		else
-			fc_rport_enter_delete(rdata, RPORT_EV_LOGO);
+		if (rdata->disc_id)
+			fc_rport_state_enter(rdata, RPORT_ST_RESTART);
 		mutex_unlock(&rdata->rp_mutex);
 	} else
 		FC_RPORT_ID_DBG(lport, sid,
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 4ff1485..1662d73 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -145,6 +145,7 @@ enum fc_rport_state {
 	RPORT_ST_LOGO,		/* port logout sent */
 	RPORT_ST_ADISC,		/* Discover Address sent */
 	RPORT_ST_DELETE,	/* port being deleted */
+	RPORT_ST_RESTART,       /* remote port being deleted and will restart */
 };
 
 /**


      parent reply	other threads:[~2009-10-21 23:29 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-10-21 23:26 [PATCH 00/20] libfc, fcoe and fnci fixes for 2.6.32 RC Robert Love
2009-10-21 23:26 ` [PATCH 01/20] libfc: fix typo in retry check on received PRLI Robert Love
2009-10-21 23:26 ` [PATCH 02/20] libfc: fix ddp in fc_fcp for 0 xid Robert Love
2009-10-21 23:26 ` [PATCH 03/20] fcoe: remove redundant checking of netdev->netdev_ops Robert Love
2009-10-21 23:27 ` [PATCH 04/20] libfc, fcoe: Don't EXPORT_SYMBOLS unnecessarily Robert Love
2009-10-21 23:27 ` [PATCH 05/20] libfc: Remove unused fc_lport pointer from fc_fcp_pkt_abort Robert Love
2009-10-21 23:27 ` [PATCH 06/20] libfc: Fix wrong scsi return status under FC_DATA_UNDRUN Robert Love
2009-10-21 23:27 ` [PATCH 07/20] libfc: lport: fix minor documentation errors Robert Love
2009-10-21 23:27 ` [PATCH 08/20] libfc: don't WARN_ON in lport_timeout for RESET state Robert Love
2009-10-21 23:27 ` [PATCH 09/20] libfc: removes initializing fc_cpu_order and fc_cpu_mask per lport Robert Love
2009-10-21 23:27 ` [PATCH 10/20] libfc: adds missing exch release for accepted RRQ Robert Love
2009-10-21 23:27 ` [PATCH 11/20] libfc: removes unused disc_work and ex_list Robert Love
2009-10-21 23:27 ` [PATCH 12/20] fcoe: initialize return value in fcoe_destroy Robert Love
2009-10-21 23:27 ` [PATCH 13/20] fcoe: Use NETIF_F_FCOE_MTU flag to set up max frame size (lport->mfs) Robert Love
2009-10-21 23:27 ` [PATCH 14/20] libfc: Fix frags in frame exceeding SKB_MAX_FRAGS in fc_fcp_send_data Robert Love
2009-10-21 23:28 ` [PATCH 15/20] fcoe: Call ndo_fcoe_enable/disable to turn FCoE feature on/off in LLD Robert Love
2009-10-21 23:28 ` [PATCH 16/20] libfc: fix memory corruption caused by double frees and bad error handling Robert Love
2009-10-21 23:28 ` [PATCH 17/20] fnic: Process all cq entries per ISR Robert Love
2009-10-21 23:28 ` [PATCH 18/20] fnic: Set max_cmd_len to driver supported CDB length Robert Love
2009-10-21 23:28 ` [PATCH 19/20] fnic: Pad the unused bytes of CDB to 0s Robert Love
2009-10-21 23:28 ` Robert Love [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20091021232830.12986.88095.stgit@localhost.localdomain \
    --to=robert.w.love@intel.com \
    --cc=James.Bottomley@HansenPartnership.com \
    --cc=jeykholt@cisco.com \
    --cc=linux-scsi@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox