From: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>
To: linux-scsi@vger.kernel.org,
James Bottomley <James.Bottomley@HansenPartnership.com>,
"Nicholas A. Bellinger" <nab@linux-iscsi.org>,
Brian King <brking@linux.vnet.ibm.com>,
Ian Munsie <imunsie@au1.ibm.com>,
Daniel Axtens <dja@ozlabs.au.ibm.com>,
Andrew Donnellan <andrew.donnellan@au1.ibm.com>,
Tomas Henzl <thenzl@redhat.com>,
David Laight <David.Laight@ACULAB.COM>
Cc: Michael Neuling <mikey@neuling.org>,
"Manoj N. Kumar" <manoj@linux.vnet.ibm.com>,
linuxppc-dev@lists.ozlabs.org
Subject: [PATCH v6 13/37] cxlflash: Fix to avoid stall while waiting on TMF
Date: Wed, 21 Oct 2015 15:13:21 -0500 [thread overview]
Message-ID: <1445458401-3279-1-git-send-email-mrochs@linux.vnet.ibm.com> (raw)
In-Reply-To: <1445458134-63197-1-git-send-email-mrochs@linux.vnet.ibm.com>
Borrowing the TMF waitq's spinlock causes a stall condition when
waiting for the TMF to complete. To remedy, introduce our own spin
lock to serialize TMF and use the appropriate wait services.
Also add a timeout while waiting for a TMF completion. When a TMF
times out, report back a failure such that a bigger hammer reset
can occur.
Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
Reviewed-by: Brian King <brking@linux.vnet.ibm.com>
---
drivers/scsi/cxlflash/common.h | 1 +
drivers/scsi/cxlflash/main.c | 55 +++++++++++++++++++++++++-----------------
2 files changed, 34 insertions(+), 22 deletions(-)
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index b038ac7..7a0cb5c 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -124,6 +124,7 @@ struct cxlflash_cfg {
struct list_head lluns; /* list of llun_info structs */
wait_queue_head_t tmf_waitq;
+ spinlock_t tmf_slock;
bool tmf_active;
wait_queue_head_t reset_waitq;
enum cxlflash_state state;
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 527ff85..110037d 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -249,11 +249,10 @@ static void cmd_complete(struct afu_cmd *cmd)
scp->scsi_done(scp);
if (cmd_is_tmf) {
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
cfg->tmf_active = false;
wake_up_all_locked(&cfg->tmf_waitq);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock,
- lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
}
} else
complete(&cmd->cevent);
@@ -420,6 +419,7 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
struct device *dev = &cfg->dev->dev;
ulong lock_flags;
int rc = 0;
+ ulong to;
cmd = cmd_checkout(afu);
if (unlikely(!cmd)) {
@@ -428,15 +428,15 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
goto out;
}
- /* If a Task Management Function is active, do not send one more.
- */
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ /* When Task Management Function is active do not send another */
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active)
- wait_event_interruptible_locked_irq(cfg->tmf_waitq,
- !cfg->tmf_active);
+ wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock);
cfg->tmf_active = true;
cmd->cmd_tmf = true;
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
cmd->rcb.ctx_id = afu->ctx_hndl;
cmd->rcb.port_sel = port_sel;
@@ -457,15 +457,24 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
rc = send_cmd(afu, cmd);
if (unlikely(rc)) {
cmd_checkin(cmd);
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
cfg->tmf_active = false;
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
goto out;
}
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
- wait_event_interruptible_locked_irq(cfg->tmf_waitq, !cfg->tmf_active);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
+ to = msecs_to_jiffies(5000);
+ to = wait_event_interruptible_lock_irq_timeout(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock,
+ to);
+ if (!to) {
+ cfg->tmf_active = false;
+ dev_err(dev, "%s: TMF timed out!\n", __func__);
+ rc = -1;
+ }
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
out:
return rc;
}
@@ -512,16 +521,17 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
- /* If a Task Management Function is active, wait for it to complete
+ /*
+ * If a Task Management Function is active, wait for it to complete
* before continuing with regular commands.
*/
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active) {
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
rc = SCSI_MLQUEUE_HOST_BUSY;
goto out;
}
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
switch (cfg->state) {
case STATE_RESET:
@@ -713,11 +723,12 @@ static void cxlflash_remove(struct pci_dev *pdev)
/* If a Task Management Function is active, wait for it to complete
* before continuing with remove.
*/
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active)
- wait_event_interruptible_locked_irq(cfg->tmf_waitq,
- !cfg->tmf_active);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
cfg->state = STATE_FAILTERM;
cxlflash_stop_term_user_contexts(cfg);
--
2.1.0
next prev parent reply other threads:[~2015-10-21 20:14 UTC|newest]
Thread overview: 69+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-10-21 20:08 [PATCH v6 00/37] cxlflash: Miscellaneous bug fixes and corrections Matthew R. Ochs
2015-10-21 20:10 ` [PATCH v6 01/37] cxlflash: Fix to avoid invalid port_sel value Matthew R. Ochs
2015-10-21 20:11 ` [PATCH v6 02/37] cxlflash: Replace magic numbers with literals Matthew R. Ochs
2015-10-21 20:11 ` [PATCH v6 03/37] cxlflash: Fix read capacity timeout Matthew R. Ochs
2015-10-21 20:11 ` [PATCH v6 04/37] cxlflash: Fix potential oops following LUN removal Matthew R. Ochs
2015-10-21 20:11 ` [PATCH v6 05/37] cxlflash: Fix data corruption when vLUN used over multiple cards Matthew R. Ochs
2015-10-21 20:11 ` [PATCH v6 06/37] cxlflash: Fix to avoid sizeof(bool) Matthew R. Ochs
2015-10-21 20:11 ` [PATCH v6 07/37] cxlflash: Fix context encode mask width Matthew R. Ochs
2015-10-21 20:11 ` [PATCH v6 08/37] cxlflash: Fix to avoid CXL services during EEH Matthew R. Ochs
2015-10-21 20:12 ` [PATCH v6 09/37] cxlflash: Correct naming of limbo state and waitq Matthew R. Ochs
2015-10-21 20:12 ` [PATCH v6 10/37] cxlflash: Make functions static Matthew R. Ochs
2015-10-21 20:12 ` [PATCH v6 11/37] cxlflash: Refine host/device attributes Matthew R. Ochs
2015-10-23 13:33 ` Tomas Henzl
2015-10-21 20:13 ` [PATCH v6 12/37] cxlflash: Fix to avoid spamming the kernel log Matthew R. Ochs
2015-10-23 13:33 ` Tomas Henzl
2015-10-21 20:13 ` Matthew R. Ochs [this message]
2015-10-23 13:36 ` [PATCH v6 13/37] cxlflash: Fix to avoid stall while waiting on TMF Tomas Henzl
2015-10-21 20:13 ` [PATCH v6 14/37] cxlflash: Fix location of setting resid Matthew R. Ochs
2015-10-23 13:37 ` Tomas Henzl
2015-10-21 20:13 ` [PATCH v6 15/37] cxlflash: Fix host link up event handling Matthew R. Ochs
2015-10-23 13:38 ` Tomas Henzl
2015-10-21 20:13 ` [PATCH v6 16/37] cxlflash: Fix async interrupt bypass logic Matthew R. Ochs
2015-10-23 3:40 ` Andrew Donnellan
2015-10-23 13:39 ` Tomas Henzl
2015-10-21 20:13 ` [PATCH v6 17/37] cxlflash: Remove dual port online dependency Matthew R. Ochs
2015-10-23 13:39 ` Tomas Henzl
2015-10-21 20:14 ` [PATCH v6 18/37] cxlflash: Fix AFU version access/storage and add check Matthew R. Ochs
2015-10-23 13:40 ` Tomas Henzl
2015-10-21 20:14 ` [PATCH v6 19/37] cxlflash: Correct usage of scsi_host_put() Matthew R. Ochs
2015-10-23 13:41 ` Tomas Henzl
2015-10-21 20:14 ` [PATCH v6 20/37] cxlflash: Fix to prevent workq from accessing freed memory Matthew R. Ochs
2015-10-23 13:41 ` Tomas Henzl
2015-10-21 20:14 ` [PATCH v6 21/37] cxlflash: Correct behavior in device reset handler following EEH Matthew R. Ochs
2015-10-23 13:42 ` Tomas Henzl
2015-10-21 20:14 ` [PATCH v6 22/37] cxlflash: Remove unnecessary scsi_block_requests Matthew R. Ochs
2015-10-23 13:42 ` Tomas Henzl
2015-10-21 20:14 ` [PATCH v6 23/37] cxlflash: Fix function prolog parameters and return codes Matthew R. Ochs
2015-10-23 13:45 ` Tomas Henzl
2015-10-21 20:14 ` [PATCH v6 24/37] cxlflash: Fix MMIO and endianness errors Matthew R. Ochs
2015-10-23 13:53 ` Tomas Henzl
2015-10-21 20:14 ` [PATCH v6 25/37] cxlflash: Fix to prevent EEH recovery failure Matthew R. Ochs
2015-10-23 13:54 ` Tomas Henzl
2015-10-21 20:15 ` [PATCH v6 26/37] cxlflash: Correct spelling, grammar, and alignment mistakes Matthew R. Ochs
2015-10-23 13:54 ` Tomas Henzl
2015-10-21 20:15 ` [PATCH v6 27/37] cxlflash: Fix to prevent stale AFU RRQ Matthew R. Ochs
2015-10-23 13:55 ` Tomas Henzl
2015-10-21 20:15 ` [PATCH v6 28/37] MAINTAINERS: Add cxlflash driver Matthew R. Ochs
2015-10-21 20:15 ` [PATCH v6 29/37] cxlflash: Fix to double the delay each time Matthew R. Ochs
2015-10-23 13:57 ` Tomas Henzl
2015-10-21 20:15 ` [PATCH v6 30/37] cxlflash: Fix to avoid corrupting adapter fops Matthew R. Ochs
2015-10-23 14:00 ` Tomas Henzl
2015-10-21 20:15 ` [PATCH v6 31/37] cxlflash: Correct trace string Matthew R. Ochs
2015-10-23 14:00 ` Tomas Henzl
2015-10-21 20:15 ` [PATCH v6 32/37] cxlflash: Fix to avoid potential deadlock on EEH Matthew R. Ochs
2015-10-23 14:01 ` Tomas Henzl
2015-10-21 20:16 ` [PATCH v6 33/37] cxlflash: Fix to avoid leaving dangling interrupt resources Matthew R. Ochs
2015-10-23 14:01 ` Tomas Henzl
2015-10-21 20:16 ` [PATCH v6 34/37] cxlflash: Fix to escalate to LINK_RESET on login timeout Matthew R. Ochs
2015-10-23 14:01 ` Tomas Henzl
2015-10-21 20:16 ` [PATCH v6 35/37] cxlflash: Fix to avoid corrupting port selection mask Matthew R. Ochs
2015-10-22 17:17 ` Manoj Kumar
2015-10-23 3:52 ` Andrew Donnellan
2015-10-21 20:16 ` [PATCH v6 36/37] cxlflash: Fix to avoid lock instrumentation rejection Matthew R. Ochs
2015-10-22 17:34 ` Manoj Kumar
2015-10-23 3:22 ` Andrew Donnellan
2015-10-21 20:16 ` [PATCH v6 37/37] cxlflash: Fix to avoid bypassing context cleanup Matthew R. Ochs
2015-10-22 2:01 ` Andrew Donnellan
2015-10-22 18:05 ` Manoj Kumar
2015-10-27 23:30 ` [PATCH v6 00/37] cxlflash: Miscellaneous bug fixes and corrections Matthew R. Ochs
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1445458401-3279-1-git-send-email-mrochs@linux.vnet.ibm.com \
--to=mrochs@linux.vnet.ibm.com \
--cc=David.Laight@ACULAB.COM \
--cc=James.Bottomley@HansenPartnership.com \
--cc=andrew.donnellan@au1.ibm.com \
--cc=brking@linux.vnet.ibm.com \
--cc=dja@ozlabs.au.ibm.com \
--cc=imunsie@au1.ibm.com \
--cc=linux-scsi@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=manoj@linux.vnet.ibm.com \
--cc=mikey@neuling.org \
--cc=nab@linux-iscsi.org \
--cc=thenzl@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).