* [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF
@ 2015-09-16 16:57 Matthew R. Ochs
0 siblings, 0 replies; 4+ messages in thread
From: Matthew R. Ochs @ 2015-09-16 16:57 UTC (permalink / raw)
To: linux-scsi, James.Bottomley, nab, brking, imunsie, dja,
andrew.donnellan
Cc: mikey, linuxppc-dev, Manoj N. Kumar
Borrowing the TMF waitq's spinlock causes a stall condition when
waiting for the TMF to complete. To remedy, introduce our own spin
lock to serialize TMF and use the appropriate wait services.
Also add a timeout while waiting for a TMF completion. When a TMF
times out, report back a failure such that a bigger hammer reset
can occur.
Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
---
drivers/scsi/cxlflash/common.h | 1 +
drivers/scsi/cxlflash/main.c | 55 +++++++++++++++++++++++++-----------------
2 files changed, 34 insertions(+), 22 deletions(-)
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index 2855b09..c8327ac 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -126,6 +126,7 @@ struct cxlflash_cfg {
struct list_head lluns; /* list of llun_info structs */
wait_queue_head_t tmf_waitq;
+ spinlock_t tmf_slock;
bool tmf_active;
wait_queue_head_t reset_waitq;
enum cxlflash_state state;
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 600c7f9..29e40cc 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -249,11 +249,10 @@ static void cmd_complete(struct afu_cmd *cmd)
scp->scsi_done(scp);
if (cmd_is_tmf) {
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
cfg->tmf_active = false;
wake_up_all_locked(&cfg->tmf_waitq);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock,
- lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
}
} else
complete(&cmd->cevent);
@@ -420,6 +419,7 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
struct device *dev = &cfg->dev->dev;
ulong lock_flags;
int rc = 0;
+ ulong to;
cmd = cmd_checkout(afu);
if (unlikely(!cmd)) {
@@ -428,15 +428,15 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
goto out;
}
- /* If a Task Management Function is active, do not send one more.
- */
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ /* When Task Management Function is active do not send another */
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active)
- wait_event_interruptible_locked_irq(cfg->tmf_waitq,
- !cfg->tmf_active);
+ wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock);
cfg->tmf_active = true;
cmd->cmd_tmf = true;
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
cmd->rcb.ctx_id = afu->ctx_hndl;
cmd->rcb.port_sel = port_sel;
@@ -457,15 +457,24 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
rc = send_cmd(afu, cmd);
if (unlikely(rc)) {
cmd_checkin(cmd);
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
cfg->tmf_active = false;
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
goto out;
}
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
- wait_event_interruptible_locked_irq(cfg->tmf_waitq, !cfg->tmf_active);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
+ to = msecs_to_jiffies(5000);
+ to = wait_event_interruptible_lock_irq_timeout(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock,
+ to);
+ if (!to) {
+ cfg->tmf_active = false;
+ dev_err(dev, "%s: TMF timed out!\n", __func__);
+ rc = -1;
+ }
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
out:
return rc;
}
@@ -512,16 +521,17 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
- /* If a Task Management Function is active, wait for it to complete
+ /*
+ * If a Task Management Function is active, wait for it to complete
* before continuing with regular commands.
*/
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active) {
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
rc = SCSI_MLQUEUE_HOST_BUSY;
goto out;
}
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
switch (cfg->state) {
case STATE_RESET:
@@ -713,11 +723,12 @@ static void cxlflash_remove(struct pci_dev *pdev)
/* If a Task Management Function is active, wait for it to complete
* before continuing with remove.
*/
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active)
- wait_event_interruptible_locked_irq(cfg->tmf_waitq,
- !cfg->tmf_active);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
cfg->state = STATE_FAILTERM;
atomic_inc(&cfg->remove_active);
--
2.1.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v2 00/30] cxlflash: Miscellaneous bug fixes and corrections
@ 2015-09-16 21:23 Matthew R. Ochs
2015-09-16 21:30 ` [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF Matthew R. Ochs
0 siblings, 1 reply; 4+ messages in thread
From: Matthew R. Ochs @ 2015-09-16 21:23 UTC (permalink / raw)
To: linux-scsi, James Bottomley, Nicholas A. Bellinger, Brian King,
Ian Munsie, Daniel Axtens, Andrew Donnellan
Cc: Michael Neuling, linuxppc-dev
This patch set contains various fixes and corrections for issues that
were found during test and code review. The series is based upon the
code upstreamed in 4.3 and is intended for the rc phase. The entire
set is bisectable. Please reference the changelog below for details
on what has been altered from previous versions of this patch set.
v2 Changes:
- Incorporate comments from Ian Munsie
- Rework commit messages to be more descriptive
- Add state change serialization patch
Manoj Kumar (3):
cxlflash: Fix to avoid invalid port_sel value
cxlflash: Replace magic numbers with literals
cxlflash: Fix read capacity timeout
Matthew R. Ochs (27):
cxlflash: Fix potential oops following LUN removal
cxlflash: Fix data corruption when vLUN used over multiple cards
cxlflash: Fix to avoid sizeof(bool)
cxlflash: Fix context encode mask width
cxlflash: Fix to avoid CXL services during EEH
cxlflash: Check for removal when processing interrupt
cxlflash: Correct naming of limbo state and waitq
cxlflash: Make functions static
cxlflash: Refine host/device attributes
cxlflash: Fix to avoid spamming the kernel log
cxlflash: Fix to avoid stall while waiting on TMF
cxlflash: Fix location of setting resid
cxlflash: Fix host link up event handling
cxlflash: Fix async interrupt bypass logic
cxlflash: Remove dual port online dependency
cxlflash: Fix AFU version access/storage and add check
cxlflash: Correct usage of scsi_host_put()
cxlflash: Fix to prevent workq from accessing freed memory
cxlflash: Correct behavior in device reset handler following EEH
cxlflash: Remove unnecessary scsi_block_requests
cxlflash: Fix function prolog parameters and return codes
cxlflash: Fix MMIO and endianness errors
cxlflash: Fix to prevent EEH recovery failure
cxlflash: Correct spelling, grammar, and alignment mistakes
cxlflash: Fix to prevent stale AFU RRQ
cxlflash: Fix to avoid state change collision
MAINTAINERS: Add cxlflash driver
MAINTAINERS | 9 +
drivers/scsi/cxlflash/common.h | 29 +-
drivers/scsi/cxlflash/lunmgt.c | 9 +-
drivers/scsi/cxlflash/main.c | 1575 ++++++++++++++++++++-----------------
drivers/scsi/cxlflash/main.h | 1 +
drivers/scsi/cxlflash/sislite.h | 8 +-
drivers/scsi/cxlflash/superpipe.c | 177 +++--
drivers/scsi/cxlflash/superpipe.h | 11 +-
drivers/scsi/cxlflash/vlun.c | 39 +-
9 files changed, 1036 insertions(+), 822 deletions(-)
--
2.1.0
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF
2015-09-16 21:23 [PATCH v2 00/30] cxlflash: Miscellaneous bug fixes and corrections Matthew R. Ochs
@ 2015-09-16 21:30 ` Matthew R. Ochs
2015-09-21 18:24 ` Brian King
0 siblings, 1 reply; 4+ messages in thread
From: Matthew R. Ochs @ 2015-09-16 21:30 UTC (permalink / raw)
To: linux-scsi, James Bottomley, Nicholas A. Bellinger, Brian King,
Ian Munsie, Daniel Axtens, Andrew Donnellan
Cc: Michael Neuling, linuxppc-dev, Manoj N. Kumar
Borrowing the TMF waitq's spinlock causes a stall condition when
waiting for the TMF to complete. To remedy, introduce our own spin
lock to serialize TMF and use the appropriate wait services.
Also add a timeout while waiting for a TMF completion. When a TMF
times out, report back a failure such that a bigger hammer reset
can occur.
Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
---
drivers/scsi/cxlflash/common.h | 1 +
drivers/scsi/cxlflash/main.c | 55 +++++++++++++++++++++++++-----------------
2 files changed, 34 insertions(+), 22 deletions(-)
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index 2855b09..c8327ac 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -126,6 +126,7 @@ struct cxlflash_cfg {
struct list_head lluns; /* list of llun_info structs */
wait_queue_head_t tmf_waitq;
+ spinlock_t tmf_slock;
bool tmf_active;
wait_queue_head_t reset_waitq;
enum cxlflash_state state;
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index 600c7f9..29e40cc 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -249,11 +249,10 @@ static void cmd_complete(struct afu_cmd *cmd)
scp->scsi_done(scp);
if (cmd_is_tmf) {
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
cfg->tmf_active = false;
wake_up_all_locked(&cfg->tmf_waitq);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock,
- lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
}
} else
complete(&cmd->cevent);
@@ -420,6 +419,7 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
struct device *dev = &cfg->dev->dev;
ulong lock_flags;
int rc = 0;
+ ulong to;
cmd = cmd_checkout(afu);
if (unlikely(!cmd)) {
@@ -428,15 +428,15 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
goto out;
}
- /* If a Task Management Function is active, do not send one more.
- */
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ /* When Task Management Function is active do not send another */
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active)
- wait_event_interruptible_locked_irq(cfg->tmf_waitq,
- !cfg->tmf_active);
+ wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock);
cfg->tmf_active = true;
cmd->cmd_tmf = true;
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
cmd->rcb.ctx_id = afu->ctx_hndl;
cmd->rcb.port_sel = port_sel;
@@ -457,15 +457,24 @@ static int send_tmf(struct afu *afu, struct scsi_cmnd *scp, u64 tmfcmd)
rc = send_cmd(afu, cmd);
if (unlikely(rc)) {
cmd_checkin(cmd);
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
cfg->tmf_active = false;
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
goto out;
}
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
- wait_event_interruptible_locked_irq(cfg->tmf_waitq, !cfg->tmf_active);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
+ to = msecs_to_jiffies(5000);
+ to = wait_event_interruptible_lock_irq_timeout(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock,
+ to);
+ if (!to) {
+ cfg->tmf_active = false;
+ dev_err(dev, "%s: TMF timed out!\n", __func__);
+ rc = -1;
+ }
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
out:
return rc;
}
@@ -512,16 +521,17 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
- /* If a Task Management Function is active, wait for it to complete
+ /*
+ * If a Task Management Function is active, wait for it to complete
* before continuing with regular commands.
*/
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active) {
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
rc = SCSI_MLQUEUE_HOST_BUSY;
goto out;
}
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
switch (cfg->state) {
case STATE_RESET:
@@ -713,11 +723,12 @@ static void cxlflash_remove(struct pci_dev *pdev)
/* If a Task Management Function is active, wait for it to complete
* before continuing with remove.
*/
- spin_lock_irqsave(&cfg->tmf_waitq.lock, lock_flags);
+ spin_lock_irqsave(&cfg->tmf_slock, lock_flags);
if (cfg->tmf_active)
- wait_event_interruptible_locked_irq(cfg->tmf_waitq,
- !cfg->tmf_active);
- spin_unlock_irqrestore(&cfg->tmf_waitq.lock, lock_flags);
+ wait_event_interruptible_lock_irq(cfg->tmf_waitq,
+ !cfg->tmf_active,
+ cfg->tmf_slock);
+ spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
cfg->state = STATE_FAILTERM;
atomic_inc(&cfg->remove_active);
--
2.1.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF
2015-09-16 21:30 ` [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF Matthew R. Ochs
@ 2015-09-21 18:24 ` Brian King
2015-09-21 23:05 ` Matthew R. Ochs
0 siblings, 1 reply; 4+ messages in thread
From: Brian King @ 2015-09-21 18:24 UTC (permalink / raw)
To: Matthew R. Ochs, linux-scsi, James Bottomley,
Nicholas A. Bellinger, Ian Munsie, Daniel Axtens,
Andrew Donnellan
Cc: Michael Neuling, linuxppc-dev, Manoj N. Kumar
On 09/16/2015 04:30 PM, Matthew R. Ochs wrote:
> Borrowing the TMF waitq's spinlock causes a stall condition when
> waiting for the TMF to complete. To remedy, introduce our own spin
> lock to serialize TMF and use the appropriate wait services.
Can you clarify what stall condition you were seeing. Its not obvious
to me what this fixes. Do you have softlockup logs from the failure?
-Brian
--
Brian King
Power Linux I/O
IBM Linux Technology Center
^ permalink raw reply [flat|nested] 4+ messages in thread
* Re: [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF
2015-09-21 18:24 ` Brian King
@ 2015-09-21 23:05 ` Matthew R. Ochs
0 siblings, 0 replies; 4+ messages in thread
From: Matthew R. Ochs @ 2015-09-21 23:05 UTC (permalink / raw)
To: Brian King
Cc: linux-scsi, James Bottomley, Nicholas A. Bellinger, Ian Munsie,
Daniel Axtens, Andrew Donnellan, Michael Neuling, linuxppc-dev,
Manoj N. Kumar
> On Sep 21, 2015, at 1:24 PM, Brian King <brking@linux.vnet.ibm.com> wrote:
> On 09/16/2015 04:30 PM, Matthew R. Ochs wrote:
>> Borrowing the TMF waitq's spinlock causes a stall condition when
>> waiting for the TMF to complete. To remedy, introduce our own spin
>> lock to serialize TMF and use the appropriate wait services.
>
> Can you clarify what stall condition you were seeing. Its not obvious
> to me what this fixes. Do you have soft lockup logs from the failure?
I believe we saw cascading RCU stalls.
I couldn't find any more details in my notes or development commits.
Unfortunately the logs are long gone as this was fixed in June.
^ permalink raw reply [flat|nested] 4+ messages in thread
end of thread, other threads:[~2015-09-21 23:05 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2015-09-16 16:57 [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF Matthew R. Ochs
-- strict thread matches above, loose matches on Subject: below --
2015-09-16 21:23 [PATCH v2 00/30] cxlflash: Miscellaneous bug fixes and corrections Matthew R. Ochs
2015-09-16 21:30 ` [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF Matthew R. Ochs
2015-09-21 18:24 ` Brian King
2015-09-21 23:05 ` Matthew R. Ochs
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).