From: Hannes Reinecke <hare@suse.de>
To: "Martin K. Petersen" <martin.petersen@oracle.com>
Cc: James Bottomley <james.bottomley@hansenpartnership.com>,
Christoph Hellwig <hch@lst.de>,
Johannes Thumshirn <jth@kernel.org>,
linux-scsi@vger.kernel.org, Mike Snitzer <snitzer@redhat.com>,
Martin Wilck <martin.wilck@suse.com>,
Hannes Reinecke <hare@suse.de>, Hannes Reinecke <hare@suse.com>
Subject: [PATCH 19/30] libfc: quarantine timed out xids
Date: Fri, 26 Aug 2016 14:01:42 +0200 [thread overview]
Message-ID: <1472212913-39810-20-git-send-email-hare@suse.de> (raw)
In-Reply-To: <1472212913-39810-1-git-send-email-hare@suse.de>
When a sequence times out we have no idea what happened to the
frame. And we do not know if we will ever receive the frame.
Hence we cannot re-use the xid as we would risk data corruption
if the xid had been re-used and the timed out frame would be
received after that.
So we need to quarantine the xid until the lport is reset.
Yes, I know this will (eventually) deplete the xid pool.
But for now it's the safest method.
Signed-off-by: Hannes Reinecke <hare@suse.com>
---
drivers/scsi/libfc/fc_exch.c | 33 ++++++++++++++++++++++-----------
drivers/scsi/libfc/fc_fcp.c | 13 +++++++------
include/scsi/libfc.h | 1 +
3 files changed, 30 insertions(+), 17 deletions(-)
diff --git a/drivers/scsi/libfc/fc_exch.c b/drivers/scsi/libfc/fc_exch.c
index da9c727..510f38c 100644
--- a/drivers/scsi/libfc/fc_exch.c
+++ b/drivers/scsi/libfc/fc_exch.c
@@ -94,6 +94,7 @@ struct fc_exch_pool {
struct fc_exch_mgr {
struct fc_exch_pool __percpu *pool;
mempool_t *ep_pool;
+ struct fc_lport *lport;
enum fc_class class;
struct kref kref;
u16 min_xid;
@@ -408,6 +409,8 @@ static int fc_exch_done_locked(struct fc_exch *ep)
return rc;
}
+static struct fc_exch fc_quarantine_exch;
+
/**
* fc_exch_ptr_get() - Return an exchange from an exchange pool
* @pool: Exchange Pool to get an exchange from
@@ -452,14 +455,17 @@ static void fc_exch_delete(struct fc_exch *ep)
/* update cache of free slot */
index = (ep->xid - ep->em->min_xid) >> fc_cpu_order;
- if (pool->left == FC_XID_UNKNOWN)
- pool->left = index;
- else if (pool->right == FC_XID_UNKNOWN)
- pool->right = index;
- else
- pool->next_index = index;
-
- fc_exch_ptr_set(pool, index, NULL);
+ if (!(ep->state & FC_EX_QUARANTINE)) {
+ if (pool->left == FC_XID_UNKNOWN)
+ pool->left = index;
+ else if (pool->right == FC_XID_UNKNOWN)
+ pool->right = index;
+ else
+ pool->next_index = index;
+ fc_exch_ptr_set(pool, index, NULL);
+ } else {
+ fc_exch_ptr_set(pool, index, &fc_quarantine_exch);
+ }
list_del(&ep->ex_list);
spin_unlock_bh(&pool->lock);
fc_exch_release(ep); /* drop hold for exch in mp */
@@ -916,14 +922,14 @@ static inline struct fc_exch *fc_exch_alloc(struct fc_lport *lport,
*/
static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid)
{
+ struct fc_lport *lport = mp->lport;
struct fc_exch_pool *pool;
struct fc_exch *ep = NULL;
u16 cpu = xid & fc_cpu_mask;
if (cpu >= nr_cpu_ids || !cpu_possible(cpu)) {
- printk_ratelimited(KERN_ERR
- "libfc: lookup request for XID = %d, "
- "indicates invalid CPU %d\n", xid, cpu);
+ pr_err("host%u: lport %6.6x: xid %d invalid CPU %d\n:",
+ lport->host->host_no, lport->port_id, xid, cpu);
return NULL;
}
@@ -931,6 +937,10 @@ static struct fc_exch *fc_exch_find(struct fc_exch_mgr *mp, u16 xid)
pool = per_cpu_ptr(mp->pool, cpu);
spin_lock_bh(&pool->lock);
ep = fc_exch_ptr_get(pool, (xid - mp->min_xid) >> fc_cpu_order);
+ if (ep == &fc_quarantine_exch) {
+ FC_LPORT_DBG(lport, "xid %x quarantined\n", xid);
+ ep = NULL;
+ }
if (ep) {
WARN_ON(ep->xid != xid);
fc_exch_hold(ep);
@@ -2429,6 +2439,7 @@ struct fc_exch_mgr *fc_exch_mgr_alloc(struct fc_lport *lport,
return NULL;
mp->class = class;
+ mp->lport = lport;
/* adjust em exch xid range for offload */
mp->min_xid = min_xid;
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 1cb2c59..8694e87 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -1531,13 +1531,14 @@ static void fc_fcp_rec_resp(struct fc_seq *seq, struct fc_frame *fp, void *arg)
fsp->rport->port_id, rjt->er_reason,
rjt->er_explan, fsp->xfer_len);
/*
- * If no data transfer, the command frame got dropped
- * so we just retry. If data was transferred, we
- * lost the response but the target has no record,
- * so we abort and retry.
+ * If response got lost or is stuck in the
+ * queue somewhere we have no idea if and when
+ * the response will be received. So quarantine
+ * the xid and retry the command.
*/
- if (rjt->er_explan == ELS_EXPL_OXID_RXID &&
- fsp->xfer_len == 0) {
+ if (rjt->er_explan == ELS_EXPL_OXID_RXID) {
+ struct fc_exch *ep = fc_seq_exch(fsp->seq_ptr);
+ ep->state |= FC_EX_QUARANTINE;
fsp->state |= FC_SRB_ABORTED;
fc_fcp_retry_cmd(fsp, FC_TRANS_RESET);
break;
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index dc42d80..8cb752f 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -390,6 +390,7 @@ struct fc_seq {
#define FC_EX_DONE (1 << 0) /* ep is completed */
#define FC_EX_RST_CLEANUP (1 << 1) /* reset is forcing completion */
+#define FC_EX_QUARANTINE (1 << 2) /* exch is quarantined */
/**
* struct fc_exch - Fibre Channel Exchange
--
1.8.5.6
next prev parent reply other threads:[~2016-08-26 12:02 UTC|newest]
Thread overview: 35+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-08-26 12:01 [PATCH 00/30] FCoE VN2VN fixes Hannes Reinecke
2016-08-26 12:01 ` [PATCH 01/30] libfc: Revisit kref handling Hannes Reinecke
2016-08-26 13:37 ` Johannes Thumshirn
2016-08-26 12:01 ` [PATCH 02/30] libfc: additional debugging messages Hannes Reinecke
2016-08-26 12:01 ` [PATCH 03/30] libfc: spurious I/O error under high load Hannes Reinecke
2016-08-26 12:01 ` [PATCH 04/30] libfc: Do not login if the port is already started Hannes Reinecke
2016-08-26 12:01 ` [PATCH 05/30] libfc: use configured lport R_A_TOV when sending Hannes Reinecke
2016-08-26 12:01 ` [PATCH 06/30] libfc: use configured e_d_tov for remote port state Hannes Reinecke
2016-08-26 12:01 ` [PATCH 07/30] libfc: do not overwrite DID_TIME_OUT status Hannes Reinecke
2016-08-26 12:01 ` [PATCH 08/30] libfc: use error code for fc_rport_error() Hannes Reinecke
2016-08-26 12:01 ` [PATCH 09/30] libfc: Send LS_RJT responses on frame allocation Hannes Reinecke
2016-08-26 12:01 ` [PATCH 10/30] libfc: don't advance state machine for incoming FLOGI Hannes Reinecke
2016-08-26 12:01 ` [PATCH 11/30] libfc: Fixup disc_mutex handling Hannes Reinecke
2016-08-26 12:01 ` [PATCH 12/30] libfc: Do not drop down to FLOGI for fc_rport_login() Hannes Reinecke
2016-08-26 12:01 ` [PATCH 13/30] libfc: Implement RTV responder Hannes Reinecke
2016-08-26 12:01 ` [PATCH 14/30] libfc: Rework PRLI handling Hannes Reinecke
2016-08-26 12:01 ` [PATCH 15/30] libfc: Return LS_RJT_BUSY for PRLI in status PLOGI Hannes Reinecke
2016-08-26 12:01 ` [PATCH 16/30] libfc: Clarify ramp-down messages Hannes Reinecke
2016-08-26 12:01 ` [PATCH 17/30] libfc: sanitize E_D_TOV and R_A_TOV setting Hannes Reinecke
2016-08-26 12:01 ` [PATCH 18/30] libfc: safeguard against invalid exchange index Hannes Reinecke
2016-08-26 12:01 ` Hannes Reinecke [this message]
2016-08-26 12:01 ` [PATCH 20/30] libfc: don't fail sequence abort for completed Hannes Reinecke
2016-08-26 12:01 ` [PATCH 21/30] libfc: Do not drop out-of-order frames Hannes Reinecke
2016-08-26 12:01 ` [PATCH 22/30] libfc: reset timeout on queue full Hannes Reinecke
2016-08-26 12:01 ` [PATCH 23/30] libfc: wait for E_D_TOV when out-of-order sequence is received Hannes Reinecke
2016-08-26 12:01 ` [PATCH 24/30] fcoe: Use kfree_skb() instead of kfree() Hannes Reinecke
2016-08-26 12:01 ` [PATCH 25/30] fcoe: set default TC priority Hannes Reinecke
2016-08-26 12:01 ` [PATCH 26/30] fcoe: inhibit writing invalid values into the 'enabled' Hannes Reinecke
2016-08-26 12:01 ` [PATCH 27/30] fcoe: correct sending FIP VLAN packets on VLAN 0 Hannes Reinecke
2016-08-26 12:01 ` [PATCH 28/30] fcoe: FIP debugging Hannes Reinecke
2016-08-26 12:01 ` [PATCH 29/30] fcoe: filter out frames from invalid vlans Hannes Reinecke
2016-08-26 12:01 ` [PATCH 30/30] fcoe: make R_A_TOV and E_D_TOV configurable Hannes Reinecke
2016-08-26 13:48 ` [PATCH 00/30] FCoE VN2VN fixes Johannes Thumshirn
2016-08-31 2:46 ` Martin K. Petersen
2016-08-31 7:25 ` Johannes Thumshirn
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1472212913-39810-20-git-send-email-hare@suse.de \
--to=hare@suse.de \
--cc=hare@suse.com \
--cc=hch@lst.de \
--cc=james.bottomley@hansenpartnership.com \
--cc=jth@kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=martin.wilck@suse.com \
--cc=snitzer@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).