From: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>
To: linux-scsi@vger.kernel.org,
James Bottomley <James.Bottomley@HansenPartnership.com>,
"Nicholas A. Bellinger" <nab@linux-iscsi.org>,
Brian King <brking@linux.vnet.ibm.com>,
Ian Munsie <imunsie@au1.ibm.com>,
Daniel Axtens <dja@ozlabs.au.ibm.com>,
Andrew Donnellan <andrew.donnellan@au1.ibm.com>,
Tomas Henzl <thenzl@redhat.com>,
David Laight <David.Laight@ACULAB.COM>
Cc: Michael Neuling <mikey@neuling.org>,
linuxppc-dev@lists.ozlabs.org,
"Manoj N. Kumar" <manoj@linux.vnet.ibm.com>
Subject: [PATCH v3 28/32] cxlflash: Fix to avoid state change collision
Date: Thu, 24 Sep 2015 14:42:36 -0500 [thread overview]
Message-ID: <1443123756-17736-1-git-send-email-mrochs@linux.vnet.ibm.com> (raw)
In-Reply-To: <1443123193-16498-1-git-send-email-mrochs@linux.vnet.ibm.com>
The adapter state machine is susceptible to missing and/or
corrupting state updates at runtime. This can lead to a variety
of unintended issues and is due to the lack of a serialization
mechanism to protect the adapter state.
Use an adapter-wide mutex to serialize state changes.
Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
Conflicts:
drivers/scsi/cxlflash/main.c
---
drivers/scsi/cxlflash/common.h | 1 +
drivers/scsi/cxlflash/main.c | 48 ++++++++++++++++++++++++++++++---------
drivers/scsi/cxlflash/superpipe.c | 7 +++++-
3 files changed, 44 insertions(+), 12 deletions(-)
diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
index bbfe711..89c82d2 100644
--- a/drivers/scsi/cxlflash/common.h
+++ b/drivers/scsi/cxlflash/common.h
@@ -127,6 +127,7 @@ struct cxlflash_cfg {
bool tmf_active;
wait_queue_head_t reset_waitq;
enum cxlflash_state state;
+ struct mutex mutex;
};
struct afu_cmd {
diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
index ab11ee6..325ba31 100644
--- a/drivers/scsi/cxlflash/main.c
+++ b/drivers/scsi/cxlflash/main.c
@@ -496,6 +496,7 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
struct afu *afu = cfg->afu;
struct device *dev = &cfg->dev->dev;
+ enum cxlflash_state state;
struct afu_cmd *cmd;
u32 port_sel = scp->device->channel + 1;
int nseg, i, ncount;
@@ -525,7 +526,11 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
}
spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
- switch (cfg->state) {
+ mutex_lock(&cfg->mutex);
+ state = cfg->state;
+ mutex_unlock(&cfg->mutex);
+
+ switch (state) {
case STATE_RESET:
dev_dbg_ratelimited(dev, "%s: device is in reset!\n", __func__);
rc = SCSI_MLQUEUE_HOST_BUSY;
@@ -722,7 +727,9 @@ static void cxlflash_remove(struct pci_dev *pdev)
cfg->tmf_slock);
spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
+ mutex_lock(&cfg->mutex);
cfg->state = STATE_FAILTERM;
+ mutex_unlock(&cfg->mutex);
cxlflash_stop_term_user_contexts(cfg);
switch (cfg->init_state) {
@@ -1776,9 +1783,10 @@ err1:
* @mode: Type of sync to issue (lightweight, heavyweight, global).
*
* The AFU can only take 1 sync command at a time. This routine enforces this
- * limitation by using a mutex to provide exclusive access to the AFU during
- * the sync. This design point requires calling threads to not be on interrupt
- * context due to the possibility of sleeping during concurrent sync operations.
+ * limitation by holding the adapter mutex across the entirety of the function
+ * to provide exclusive access to the AFU during the sync. This design point
+ * requires calling threads to not be on interrupt context due to the
+ * possibility of sleeping during concurrent sync operations.
*
* AFU sync operations are only necessary and allowed when the device is
* operating normally. When not operating normally, sync requests can occur as
@@ -1798,14 +1806,13 @@ int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u,
struct afu_cmd *cmd = NULL;
int rc = 0;
int retry_cnt = 0;
- static DEFINE_MUTEX(sync_active);
+ mutex_lock(&cfg->mutex);
if (cfg->state != STATE_NORMAL) {
pr_debug("%s: Sync not required! (%u)\n", __func__, cfg->state);
- return 0;
+ goto out;
}
- mutex_lock(&sync_active);
retry:
cmd = cmd_checkout(afu);
if (unlikely(!cmd)) {
@@ -1847,7 +1854,7 @@ retry:
(cmd->sa.host_use_b[0] & B_ERROR)))
rc = -1;
out:
- mutex_unlock(&sync_active);
+ mutex_unlock(&cfg->mutex);
if (cmd)
cmd_checkin(cmd);
pr_debug("%s: returning rc=%d\n", __func__, rc);
@@ -1889,6 +1896,7 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
struct Scsi_Host *host = scp->device->host;
struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
struct afu *afu = cfg->afu;
+ enum cxlflash_state state;
int rcr = 0;
pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
@@ -1901,7 +1909,11 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
retry:
- switch (cfg->state) {
+ mutex_lock(&cfg->mutex);
+ state = cfg->state;
+ mutex_unlock(&cfg->mutex);
+
+ switch (state) {
case STATE_NORMAL:
rcr = send_tmf(afu, scp, TMF_LUN_RESET);
if (unlikely(rcr))
@@ -1943,6 +1955,7 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
+ mutex_lock(&cfg->mutex);
switch (cfg->state) {
case STATE_NORMAL:
cfg->state = STATE_RESET;
@@ -1956,7 +1969,9 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
wake_up_all(&cfg->reset_waitq);
break;
case STATE_RESET:
+ mutex_unlock(&cfg->mutex);
wait_event(cfg->reset_waitq, cfg->state != STATE_RESET);
+ mutex_lock(&cfg->mutex);
if (cfg->state == STATE_NORMAL)
break;
/* fall through */
@@ -1964,6 +1979,7 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
rc = FAILED;
break;
}
+ mutex_unlock(&cfg->mutex);
pr_debug("%s: returning rc=%d\n", __func__, rc);
return rc;
@@ -2301,10 +2317,11 @@ static void cxlflash_worker_thread(struct work_struct *work)
int port;
ulong lock_flags;
- /* Avoid MMIO if the device has failed */
+ mutex_lock(&cfg->mutex);
+ /* Avoid MMIO if the device has failed */
if (cfg->state != STATE_NORMAL)
- return;
+ goto out;
spin_lock_irqsave(cfg->host->host_lock, lock_flags);
@@ -2335,6 +2352,8 @@ static void cxlflash_worker_thread(struct work_struct *work)
if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0)
scsi_scan_host(cfg->host);
+out:
+ mutex_unlock(&cfg->mutex);
}
/**
@@ -2405,6 +2424,7 @@ static int cxlflash_probe(struct pci_dev *pdev,
INIT_WORK(&cfg->work_q, cxlflash_worker_thread);
cfg->lr_state = LINK_RESET_INVALID;
cfg->lr_port = -1;
+ mutex_init(&cfg->mutex);
mutex_init(&cfg->ctx_tbl_list_mutex);
mutex_init(&cfg->ctx_recovery_mutex);
init_rwsem(&cfg->ioctl_rwsem);
@@ -2492,7 +2512,9 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
switch (state) {
case pci_channel_io_frozen:
+ mutex_lock(&cfg->mutex);
cfg->state = STATE_RESET;
+ mutex_unlock(&cfg->mutex);
scsi_block_requests(cfg->host);
drain_ioctls(cfg);
rc = cxlflash_mark_contexts_error(cfg);
@@ -2503,7 +2525,9 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
stop_afu(cfg);
return PCI_ERS_RESULT_NEED_RESET;
case pci_channel_io_perm_failure:
+ mutex_lock(&cfg->mutex);
cfg->state = STATE_FAILTERM;
+ mutex_unlock(&cfg->mutex);
wake_up_all(&cfg->reset_waitq);
scsi_unblock_requests(cfg->host);
return PCI_ERS_RESULT_DISCONNECT;
@@ -2550,7 +2574,9 @@ static void cxlflash_pci_resume(struct pci_dev *pdev)
dev_dbg(dev, "%s: pdev=%p\n", __func__, pdev);
+ mutex_lock(&cfg->mutex);
cfg->state = STATE_NORMAL;
+ mutex_unlock(&cfg->mutex);
wake_up_all(&cfg->reset_waitq);
scsi_unblock_requests(cfg->host);
}
diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
index 0d92594..6aaeee0 100644
--- a/drivers/scsi/cxlflash/superpipe.c
+++ b/drivers/scsi/cxlflash/superpipe.c
@@ -1229,10 +1229,15 @@ static const struct file_operations null_fops = {
static int check_state(struct cxlflash_cfg *cfg, bool ioctl)
{
struct device *dev = &cfg->dev->dev;
+ enum cxlflash_state state;
int rc = 0;
retry:
- switch (cfg->state) {
+ mutex_lock(&cfg->mutex);
+ state = cfg->state;
+ mutex_unlock(&cfg->mutex);
+
+ switch (state) {
case STATE_RESET:
dev_dbg(dev, "%s: Reset state, going to wait...\n", __func__);
if (ioctl)
--
2.1.0
next prev parent reply other threads:[~2015-09-24 19:44 UTC|newest]
Thread overview: 48+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-24 19:33 [PATCH v3 00/32] cxlflash: Miscellaneous bug fixes and corrections Matthew R. Ochs
2015-09-24 19:36 ` [PATCH v3 01/32] cxlflash: Fix to avoid invalid port_sel value Matthew R. Ochs
2015-09-24 19:37 ` [PATCH v3 02/32] cxlflash: Replace magic numbers with literals Matthew R. Ochs
2015-09-24 19:37 ` [PATCH v3 03/32] cxlflash: Fix read capacity timeout Matthew R. Ochs
2015-09-24 19:37 ` [PATCH v3 04/32] cxlflash: Fix potential oops following LUN removal Matthew R. Ochs
2015-09-25 1:10 ` Brian King
2015-09-24 19:37 ` [PATCH v3 05/32] cxlflash: Fix data corruption when vLUN used over multiple cards Matthew R. Ochs
2015-09-24 19:37 ` [PATCH v3 06/32] cxlflash: Fix to avoid sizeof(bool) Matthew R. Ochs
2015-09-24 19:38 ` [PATCH v3 07/32] cxlflash: Fix context encode mask width Matthew R. Ochs
2015-09-24 19:38 ` [PATCH v3 08/32] cxlflash: Fix to avoid CXL services during EEH Matthew R. Ochs
2015-09-25 1:23 ` Brian King
2015-09-25 17:09 ` Matthew R. Ochs
2015-09-24 19:38 ` [PATCH v3 09/32] cxlflash: Correct naming of limbo state and waitq Matthew R. Ochs
2015-09-24 19:38 ` [PATCH v3 10/32] cxlflash: Make functions static Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 11/32] cxlflash: Refine host/device attributes Matthew R. Ochs
2015-09-24 19:47 ` Matthew R. Ochs
2015-09-25 1:28 ` Brian King
2015-09-24 19:39 ` [PATCH v3 12/32] cxlflash: Fix to avoid spamming the kernel log Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 13/32] cxlflash: Fix to avoid stall while waiting on TMF Matthew R. Ochs
2015-09-25 20:32 ` Brian King
2015-09-24 19:39 ` [PATCH v3 14/32] cxlflash: Fix location of setting resid Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 15/32] cxlflash: Fix host link up event handling Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 16/32] cxlflash: Fix async interrupt bypass logic Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 17/32] cxlflash: Remove dual port online dependency Matthew R. Ochs
2015-09-24 19:39 ` [PATCH v3 18/32] cxlflash: Fix AFU version access/storage and add check Matthew R. Ochs
2015-09-24 19:40 ` [PATCH v3 19/32] cxlflash: Correct usage of scsi_host_put() Matthew R. Ochs
2015-09-25 20:35 ` Brian King
2015-09-24 19:41 ` [PATCH v3 20/32] cxlflash: Fix to prevent workq from accessing freed memory Matthew R. Ochs
2015-09-25 20:37 ` Brian King
2015-09-24 19:41 ` [PATCH v3 21/32] cxlflash: Correct behavior in device reset handler following EEH Matthew R. Ochs
2015-09-24 19:41 ` [PATCH v3 22/32] cxlflash: Remove unnecessary scsi_block_requests Matthew R. Ochs
2015-09-24 19:41 ` [PATCH v3 23/32] cxlflash: Fix function prolog parameters and return codes Matthew R. Ochs
2015-09-24 19:41 ` [PATCH v3 24/32] cxlflash: Fix MMIO and endianness errors Matthew R. Ochs
2015-09-24 19:41 ` [PATCH v3 25/32] cxlflash: Fix to prevent EEH recovery failure Matthew R. Ochs
2015-09-24 19:42 ` [PATCH v3 26/32] cxlflash: Correct spelling, grammar, and alignment mistakes Matthew R. Ochs
2015-09-24 19:42 ` [PATCH v3 27/32] cxlflash: Fix to prevent stale AFU RRQ Matthew R. Ochs
2015-09-24 19:42 ` Matthew R. Ochs [this message]
2015-09-25 21:10 ` [PATCH v3 28/32] cxlflash: Fix to avoid state change collision Brian King
2015-09-25 22:31 ` Matthew R. Ochs
2015-09-24 19:44 ` [PATCH v3 29/32] MAINTAINERS: Add cxlflash driver Matthew R. Ochs
2015-09-25 1:08 ` Andrew Donnellan
2015-09-24 19:44 ` [PATCH v3 30/32] cxlflash: Fix to double the delay each time Matthew R. Ochs
2015-09-25 21:12 ` Brian King
2015-09-24 19:44 ` [PATCH v3 31/32] cxlflash: Fix to avoid corrupting adapter fops Matthew R. Ochs
2015-09-25 21:23 ` Brian King
2015-09-25 22:35 ` Matthew R. Ochs
2015-09-24 19:44 ` [PATCH v3 32/32] cxlflash: Correct trace string Matthew R. Ochs
2015-09-25 21:24 ` Brian King
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1443123756-17736-1-git-send-email-mrochs@linux.vnet.ibm.com \
--to=mrochs@linux.vnet.ibm.com \
--cc=David.Laight@ACULAB.COM \
--cc=James.Bottomley@HansenPartnership.com \
--cc=andrew.donnellan@au1.ibm.com \
--cc=brking@linux.vnet.ibm.com \
--cc=dja@ozlabs.au.ibm.com \
--cc=imunsie@au1.ibm.com \
--cc=linux-scsi@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=manoj@linux.vnet.ibm.com \
--cc=mikey@neuling.org \
--cc=nab@linux-iscsi.org \
--cc=thenzl@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).