linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Hannes Reinecke <hare@suse.de>
To: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
	Christoph Hellwig <hch@lst.de>,
	Johannes Thumshirn <jth@kernel.org>,
	linux-scsi@vger.kernel.org, Mike Snitzer <snitzer@redhat.com>,
	Martin Wilck <martin.wilck@suse.com>,
	Hannes Reinecke <hare@suse.de>, Hannes Reinecke <hare@suse.com>
Subject: [PATCH 19/30] libfc: quarantine timed out xids
Date: Fri, 26 Aug 2016 14:01:42 +0200	[thread overview]
Message-ID: <1472212913-39810-20-git-send-email-hare@suse.de> (raw)
In-Reply-To: <1472212913-39810-1-git-send-email-hare@suse.de>

When a sequence times out we have no idea what happened to the
frame. And we do not know if we will ever receive the frame.
Hence we cannot re-use the xid as we would risk data corruption
if the xid had been re-used and the timed out frame would be
received after that.
So we need to quarantine the xid until the lport is reset.
Yes, I know this will (eventually) deplete the xid pool.
But for now it's the safest method.

Signed-off-by: Hannes Reinecke <hare@suse.com>
---
 drivers/scsi/libfc/fc_exch.c | 33 ++++++++++++++++++++++-----------
 drivers/scsi/libfc/fc_fcp.c  | 13 +++++++------
 include/scsi/libfc.h         |  1 +
 3 files changed, 30 insertions(+), 17 deletions(-)

diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index da9c727..510f38c 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -94,6 +94,7 @@ struct fc_exch_pool {
 struct fc_exch_mgr {
 	struct fc_exch_pool __percpu *pool;
 	mempool_t	*ep_pool;
+	struct fc_lport	*lport;
 	enum fc_class	class;
 	struct kref	kref;
 	u16		min_xid;
@@ -408,6 +409,8 @@ static int fc_exch_done_locked(struct fc_exch *ep)
 	return rc;
 }
 
+static struct fc_exch fc_quarantine_exch;
+
 /**
  * fc_exch_ptr_get() - Return an exchange from an exchange pool
  * @pool:  Exchange Pool to get an exchange from
@@ -452,14 +455,17 @@ static void fc_exch_delete(struct fc_exch *ep)
 
 	/* update cache of free slot */
 	index = (ep->xid - ep->em->min_xid) >> fc_cpu_order;
-	if (pool->left == FC_XID_UNKNOWN)
-		pool->left = index;
-	else if (pool->right == FC_XID_UNKNOWN)
-		pool->right = index;
-	else
-		pool->next_index = index;
-
-	fc_exch_ptr_set(pool, index, NULL);
+	if (!(ep->state & FC_EX_QUARANTINE)) {
+		if (pool->left == FC_XID_UNKNOWN)
+			pool->left = index;
+		else if (pool->right == FC_XID_UNKNOWN)
+			pool->right = index;
+		else
+			pool->next_index = index;
+		fc_exch_ptr_set(pool, index, NULL);
+	} else {
+		fc_exch_ptr_set(pool, index, &fc_quarantine_exch);
+	}
 	list_del(&ep->ex_list);
 	spin_unlock_bh(&pool->lock);
 	fc_exch_release(ep);	/* drop hold for exch in mp */
@@ -916,14 +922,14 @@ static inline struct fc_exch *fc_exch_alloc(struct fc_lport *lport,
  */
 static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid)
 {
+	struct fc_lport *lport = mp->lport;
 	struct fc_exch_pool *pool;
 	struct fc_exch *ep = NULL;
 	u16 cpu = xid & fc_cpu_mask;
 
 	if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
-		printk_ratelimited(KERN_ERR
-			"libfc: lookup request for XID = %d, "
-			"indicates invalid CPU %d\n", xid, cpu);
+		pr_err("host%u: lport %6.6x: xid %d invalid CPU %d\n:",
+		       lport->host->host_no, lport->port_id, xid, cpu);
 		return NULL;
 	}
 
@@ -931,6 +937,10 @@ static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid)
 		pool = per_cpu_ptr(mp->pool, cpu);
 		spin_lock_bh(&pool->lock);
 		ep = fc_exch_ptr_get(pool, (xid - mp->min_xid) >> fc_cpu_order);
+		if (ep == &fc_quarantine_exch) {
+			FC_LPORT_DBG(lport, "xid %x quarantined\n", xid);
+			ep = NULL;
+		}
 		if (ep) {
 			WARN_ON(ep->xid != xid);
 			fc_exch_hold(ep);
@@ -2429,6 +2439,7 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lport,
 		return NULL;
 
 	mp->class = class;
+	mp->lport = lport;
 	/* adjust em exch xid range for offload */
 	mp->min_xid = min_xid;
 
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 1cb2c59..8694e87 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1531,13 +1531,14 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
 				   fsp->rport->port_id, rjt->er_reason,
 				   rjt->er_explan, fsp->xfer_len);
 			/*
-			 * If no data transfer, the command frame got dropped
-			 * so we just retry.  If data was transferred, we
-			 * lost the response but the target has no record,
-			 * so we abort and retry.
+			 * If response got lost or is stuck in the
+			 * queue somewhere we have no idea if and when
+			 * the response will be received. So quarantine
+			 * the xid and retry the command.
 			 */
-			if (rjt->er_explan == ELS_EXPL_OXID_RXID &&
-			    fsp->xfer_len == 0) {
+			if (rjt->er_explan == ELS_EXPL_OXID_RXID) {
+				struct fc_exch *ep = fc_seq_exch(fsp->seq_ptr);
+				ep->state |= FC_EX_QUARANTINE;
 				fsp->state |= FC_SRB_ABORTED;
 				fc_fcp_retry_cmd(fsp, FC_TRANS_RESET);
 				break;
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index dc42d80..8cb752f 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -390,6 +390,7 @@ struct fc_seq {
 
 #define FC_EX_DONE		(1 << 0) /* ep is completed */
 #define FC_EX_RST_CLEANUP	(1 << 1) /* reset is forcing completion */
+#define FC_EX_QUARANTINE	(1 << 2) /* exch is quarantined */
 
 /**
  * struct fc_exch - Fibre Channel Exchange
-- 
1.8.5.6


  parent reply	other threads:[~2016-08-26 12:02 UTC|newest]

Thread overview: 35+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-08-26 12:01 [PATCH 00/30] FCoE VN2VN fixes Hannes Reinecke
2016-08-26 12:01 ` [PATCH 01/30] libfc: Revisit kref handling Hannes Reinecke
2016-08-26 13:37   ` Johannes Thumshirn
2016-08-26 12:01 ` [PATCH 02/30] libfc: additional debugging messages Hannes Reinecke
2016-08-26 12:01 ` [PATCH 03/30] libfc: spurious I/O error under high load Hannes Reinecke
2016-08-26 12:01 ` [PATCH 04/30] libfc: Do not login if the port is already started Hannes Reinecke
2016-08-26 12:01 ` [PATCH 05/30] libfc: use configured lport R_A_TOV when sending Hannes Reinecke
2016-08-26 12:01 ` [PATCH 06/30] libfc: use configured e_d_tov for remote port state Hannes Reinecke
2016-08-26 12:01 ` [PATCH 07/30] libfc: do not overwrite DID_TIME_OUT status Hannes Reinecke
2016-08-26 12:01 ` [PATCH 08/30] libfc: use error code for fc_rport_error() Hannes Reinecke
2016-08-26 12:01 ` [PATCH 09/30] libfc: Send LS_RJT responses on frame allocation Hannes Reinecke
2016-08-26 12:01 ` [PATCH 10/30] libfc: don't advance state machine for incoming FLOGI Hannes Reinecke
2016-08-26 12:01 ` [PATCH 11/30] libfc: Fixup disc_mutex handling Hannes Reinecke
2016-08-26 12:01 ` [PATCH 12/30] libfc: Do not drop down to FLOGI for fc_rport_login() Hannes Reinecke
2016-08-26 12:01 ` [PATCH 13/30] libfc: Implement RTV responder Hannes Reinecke
2016-08-26 12:01 ` [PATCH 14/30] libfc: Rework PRLI handling Hannes Reinecke
2016-08-26 12:01 ` [PATCH 15/30] libfc: Return LS_RJT_BUSY for PRLI in status PLOGI Hannes Reinecke
2016-08-26 12:01 ` [PATCH 16/30] libfc: Clarify ramp-down messages Hannes Reinecke
2016-08-26 12:01 ` [PATCH 17/30] libfc: sanitize E_D_TOV and R_A_TOV setting Hannes Reinecke
2016-08-26 12:01 ` [PATCH 18/30] libfc: safeguard against invalid exchange index Hannes Reinecke
2016-08-26 12:01 ` Hannes Reinecke [this message]
2016-08-26 12:01 ` [PATCH 20/30] libfc: don't fail sequence abort for completed Hannes Reinecke
2016-08-26 12:01 ` [PATCH 21/30] libfc: Do not drop out-of-order frames Hannes Reinecke
2016-08-26 12:01 ` [PATCH 22/30] libfc: reset timeout on queue full Hannes Reinecke
2016-08-26 12:01 ` [PATCH 23/30] libfc: wait for E_D_TOV when out-of-order sequence is received Hannes Reinecke
2016-08-26 12:01 ` [PATCH 24/30] fcoe: Use kfree_skb() instead of kfree() Hannes Reinecke
2016-08-26 12:01 ` [PATCH 25/30] fcoe: set default TC priority Hannes Reinecke
2016-08-26 12:01 ` [PATCH 26/30] fcoe: inhibit writing invalid values into the 'enabled' Hannes Reinecke
2016-08-26 12:01 ` [PATCH 27/30] fcoe: correct sending FIP VLAN packets on VLAN 0 Hannes Reinecke
2016-08-26 12:01 ` [PATCH 28/30] fcoe: FIP debugging Hannes Reinecke
2016-08-26 12:01 ` [PATCH 29/30] fcoe: filter out frames from invalid vlans Hannes Reinecke
2016-08-26 12:01 ` [PATCH 30/30] fcoe: make R_A_TOV and E_D_TOV configurable Hannes Reinecke
2016-08-26 13:48 ` [PATCH 00/30] FCoE VN2VN fixes Johannes Thumshirn
2016-08-31  2:46   ` Martin K. Petersen
2016-08-31  7:25     ` Johannes Thumshirn

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1472212913-39810-20-git-send-email-hare@suse.de \
    --to=hare@suse.de \
    --cc=hare@suse.com \
    --cc=hch@lst.de \
    --cc=james.bottomley@hansenpartnership.com \
    --cc=jth@kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=martin.wilck@suse.com \
    --cc=snitzer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).