From: Tomas Henzl <thenzl@redhat.com>
To: "Matthew R. Ochs" <mrochs@linux.vnet.ibm.com>,
linux-scsi@vger.kernel.org,
James Bottomley <James.Bottomley@HansenPartnership.com>,
"Nicholas A. Bellinger" <nab@linux-iscsi.org>,
Brian King <brking@linux.vnet.ibm.com>,
Ian Munsie <imunsie@au1.ibm.com>,
Daniel Axtens <dja@ozlabs.au.ibm.com>,
Andrew Donnellan <andrew.donnellan@au1.ibm.com>
Cc: Michael Neuling <mikey@neuling.org>,
linuxppc-dev@lists.ozlabs.org,
"Manoj N. Kumar" <manoj@linux.vnet.ibm.com>
Subject: Re: [PATCH v2 29/30] cxlflash: Fix to avoid state change collision
Date: Mon, 21 Sep 2015 14:44:33 +0200 [thread overview]
Message-ID: <55FFFBB1.9050802@redhat.com> (raw)
In-Reply-To: <1442439158-50453-1-git-send-email-mrochs@linux.vnet.ibm.com>
On 16.9.2015 23:32, Matthew R. Ochs wrote:
> The adapter state machine is susceptible to missing and/or
> corrupting state updates at runtime. This can lead to a variety
> of unintended issues and is due to the lack of a serialization
> mechanism to protect the adapter state.
>
> Use an adapter-wide mutex to serialize state changes.
I've just briefly looked into your code, but it seems to me that
an atomic variable would serve your needs also and might be
more effective resulting in a faster code execution?
If you keep the mutex way you don't need two mutexes
in cxlflash_afu_sync - you should remove the mutex &sync_active
--tm
>
> Signed-off-by: Matthew R. Ochs <mrochs@linux.vnet.ibm.com>
> Signed-off-by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>
> Suggested-by: Brian King <brking@linux.vnet.ibm.com>
> ---
> drivers/scsi/cxlflash/common.h | 1 +
> drivers/scsi/cxlflash/main.c | 40 +++++++++++++++++++++++++++++++++------
> drivers/scsi/cxlflash/superpipe.c | 7 ++++++-
> 3 files changed, 41 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/scsi/cxlflash/common.h b/drivers/scsi/cxlflash/common.h
> index e6041b9..c9b1ec6 100644
> --- a/drivers/scsi/cxlflash/common.h
> +++ b/drivers/scsi/cxlflash/common.h
> @@ -128,6 +128,7 @@ struct cxlflash_cfg {
> bool tmf_active;
> wait_queue_head_t reset_waitq;
> enum cxlflash_state state;
> + struct mutex mutex;
> };
>
> struct afu_cmd {
> diff --git a/drivers/scsi/cxlflash/main.c b/drivers/scsi/cxlflash/main.c
> index 0487fac..a94340d 100644
> --- a/drivers/scsi/cxlflash/main.c
> +++ b/drivers/scsi/cxlflash/main.c
> @@ -496,6 +496,7 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
> struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
> struct afu *afu = cfg->afu;
> struct device *dev = &cfg->dev->dev;
> + enum cxlflash_state state;
> struct afu_cmd *cmd;
> u32 port_sel = scp->device->channel + 1;
> int nseg, i, ncount;
> @@ -525,7 +526,11 @@ static int cxlflash_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *scp)
> }
> spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
>
> - switch (cfg->state) {
> + mutex_lock(&cfg->mutex);
> + state = cfg->state;
> + mutex_unlock(&cfg->mutex);
> +
> + switch (state) {
> case STATE_RESET:
> dev_dbg_ratelimited(dev, "%s: device is in reset!\n", __func__);
> rc = SCSI_MLQUEUE_HOST_BUSY;
> @@ -722,7 +727,9 @@ static void cxlflash_remove(struct pci_dev *pdev)
> cfg->tmf_slock);
> spin_unlock_irqrestore(&cfg->tmf_slock, lock_flags);
>
> + mutex_lock(&cfg->mutex);
> cfg->state = STATE_FAILTERM;
> + mutex_unlock(&cfg->mutex);
> atomic_inc(&cfg->remove_active);
> cxlflash_stop_term_user_contexts(cfg);
>
> @@ -1811,12 +1818,13 @@ int cxlflash_afu_sync(struct afu *afu, ctx_hndl_t ctx_hndl_u,
> int retry_cnt = 0;
> static DEFINE_MUTEX(sync_active);
>
> + mutex_lock(&sync_active);
> + mutex_lock(&cfg->mutex);
> if (cfg->state != STATE_NORMAL) {
> pr_debug("%s: Sync not required! (%u)\n", __func__, cfg->state);
> - return 0;
> + goto out;
> }
>
> - mutex_lock(&sync_active);
> retry:
> cmd = cmd_checkout(afu);
> if (unlikely(!cmd)) {
> @@ -1858,6 +1866,7 @@ retry:
> (cmd->sa.host_use_b[0] & B_ERROR)))
> rc = -1;
> out:
> + mutex_unlock(&cfg->mutex);
> mutex_unlock(&sync_active);
> if (cmd)
> cmd_checkin(cmd);
> @@ -1900,6 +1909,7 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
> struct Scsi_Host *host = scp->device->host;
> struct cxlflash_cfg *cfg = (struct cxlflash_cfg *)host->hostdata;
> struct afu *afu = cfg->afu;
> + enum cxlflash_state state;
> int rcr = 0;
>
> pr_debug("%s: (scp=%p) %d/%d/%d/%llu "
> @@ -1912,7 +1922,11 @@ static int cxlflash_eh_device_reset_handler(struct scsi_cmnd *scp)
> get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
>
> retry:
> - switch (cfg->state) {
> + mutex_lock(&cfg->mutex);
> + state = cfg->state;
> + mutex_unlock(&cfg->mutex);
> +
> + switch (state) {
> case STATE_NORMAL:
> rcr = send_tmf(afu, scp, TMF_LUN_RESET);
> if (unlikely(rcr))
> @@ -1954,6 +1968,7 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
> get_unaligned_be32(&((u32 *)scp->cmnd)[2]),
> get_unaligned_be32(&((u32 *)scp->cmnd)[3]));
>
> + mutex_lock(&cfg->mutex);
> switch (cfg->state) {
> case STATE_NORMAL:
> cfg->state = STATE_RESET;
> @@ -1967,7 +1982,9 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
> wake_up_all(&cfg->reset_waitq);
> break;
> case STATE_RESET:
> + mutex_unlock(&cfg->mutex);
> wait_event(cfg->reset_waitq, cfg->state != STATE_RESET);
> + mutex_lock(&cfg->mutex);
> if (cfg->state == STATE_NORMAL)
> break;
> /* fall through */
> @@ -1975,6 +1992,7 @@ static int cxlflash_eh_host_reset_handler(struct scsi_cmnd *scp)
> rc = FAILED;
> break;
> }
> + mutex_unlock(&cfg->mutex);
>
> pr_debug("%s: returning rc=%d\n", __func__, rc);
> return rc;
> @@ -2312,10 +2330,11 @@ static void cxlflash_worker_thread(struct work_struct *work)
> int port;
> ulong lock_flags;
>
> - /* Avoid MMIO if the device has failed */
> + mutex_lock(&cfg->mutex);
>
> + /* Avoid MMIO if the device has failed */
> if (cfg->state != STATE_NORMAL)
> - return;
> + goto out;
>
> spin_lock_irqsave(cfg->host->host_lock, lock_flags);
>
> @@ -2346,6 +2365,8 @@ static void cxlflash_worker_thread(struct work_struct *work)
>
> if (atomic_dec_if_positive(&cfg->scan_host_needed) >= 0)
> scsi_scan_host(cfg->host);
> +out:
> + mutex_unlock(&cfg->mutex);
> }
>
> /**
> @@ -2416,6 +2437,7 @@ static int cxlflash_probe(struct pci_dev *pdev,
> INIT_WORK(&cfg->work_q, cxlflash_worker_thread);
> cfg->lr_state = LINK_RESET_INVALID;
> cfg->lr_port = -1;
> + mutex_init(&cfg->mutex);
> mutex_init(&cfg->ctx_tbl_list_mutex);
> mutex_init(&cfg->ctx_recovery_mutex);
> init_rwsem(&cfg->ioctl_rwsem);
> @@ -2503,7 +2525,9 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
>
> switch (state) {
> case pci_channel_io_frozen:
> + mutex_lock(&cfg->mutex);
> cfg->state = STATE_RESET;
> + mutex_unlock(&cfg->mutex);
> scsi_block_requests(cfg->host);
> drain_ioctls(cfg);
> rc = cxlflash_mark_contexts_error(cfg);
> @@ -2514,7 +2538,9 @@ static pci_ers_result_t cxlflash_pci_error_detected(struct pci_dev *pdev,
> stop_afu(cfg);
> return PCI_ERS_RESULT_NEED_RESET;
> case pci_channel_io_perm_failure:
> + mutex_lock(&cfg->mutex);
> cfg->state = STATE_FAILTERM;
> + mutex_unlock(&cfg->mutex);
> wake_up_all(&cfg->reset_waitq);
> scsi_unblock_requests(cfg->host);
> return PCI_ERS_RESULT_DISCONNECT;
> @@ -2561,7 +2587,9 @@ static void cxlflash_pci_resume(struct pci_dev *pdev)
>
> dev_dbg(dev, "%s: pdev=%p\n", __func__, pdev);
>
> + mutex_lock(&cfg->mutex);
> cfg->state = STATE_NORMAL;
> + mutex_unlock(&cfg->mutex);
> wake_up_all(&cfg->reset_waitq);
> scsi_unblock_requests(cfg->host);
> }
> diff --git a/drivers/scsi/cxlflash/superpipe.c b/drivers/scsi/cxlflash/superpipe.c
> index 9844788..c3aaadf 100644
> --- a/drivers/scsi/cxlflash/superpipe.c
> +++ b/drivers/scsi/cxlflash/superpipe.c
> @@ -1229,10 +1229,15 @@ static const struct file_operations null_fops = {
> static int check_state(struct cxlflash_cfg *cfg, bool ioctl)
> {
> struct device *dev = &cfg->dev->dev;
> + enum cxlflash_state state;
> int rc = 0;
>
> retry:
> - switch (cfg->state) {
> + mutex_lock(&cfg->mutex);
> + state = cfg->state;
> + mutex_unlock(&cfg->mutex);
> +
> + switch (state) {
> case STATE_RESET:
> dev_dbg(dev, "%s: Reset state, going to wait...\n", __func__);
> if (ioctl)
next prev parent reply other threads:[~2015-09-21 12:44 UTC|newest]
Thread overview: 79+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-16 21:23 [PATCH v2 00/30] cxlflash: Miscellaneous bug fixes and corrections Matthew R. Ochs
2015-09-16 21:25 ` [PATCH v2 01/30] cxlflash: Fix to avoid invalid port_sel value Matthew R. Ochs
2015-09-18 1:16 ` Brian King
2015-09-16 21:26 ` [PATCH v2 02/30] cxlflash: Replace magic numbers with literals Matthew R. Ochs
2015-09-18 1:18 ` Brian King
2015-09-16 21:26 ` [PATCH v2 03/30] cxlflash: Fix read capacity timeout Matthew R. Ochs
2015-09-18 1:21 ` Brian King
2015-09-21 11:36 ` Tomas Henzl
2015-09-21 22:11 ` Matthew R. Ochs
2015-09-16 21:27 ` [PATCH v2 04/30] cxlflash: Fix potential oops following LUN removal Matthew R. Ochs
2015-09-18 1:26 ` Brian King
2015-09-18 23:18 ` Matthew R. Ochs
2015-09-21 12:11 ` Tomas Henzl
2015-09-21 22:32 ` Matthew R. Ochs
2015-09-16 21:27 ` [PATCH v2 05/30] cxlflash: Fix data corruption when vLUN used over multiple cards Matthew R. Ochs
2015-09-18 1:28 ` Brian King
2015-09-16 21:27 ` [PATCH v2 06/30] cxlflash: Fix to avoid sizeof(bool) Matthew R. Ochs
2015-09-18 1:29 ` Brian King
2015-09-16 21:27 ` [PATCH v2 07/30] cxlflash: Fix context encode mask width Matthew R. Ochs
2015-09-18 1:29 ` Brian King
2015-09-16 21:27 ` [PATCH v2 08/30] cxlflash: Fix to avoid CXL services during EEH Matthew R. Ochs
2015-09-18 13:37 ` Brian King
2015-09-18 23:54 ` Matthew R. Ochs
2015-09-16 21:28 ` [PATCH v2 09/30] cxlflash: Fix to stop interrupt processing on remove Matthew R. Ochs
2015-09-17 11:58 ` David Laight
2015-09-17 16:55 ` Matthew R. Ochs
2015-09-16 21:28 ` [PATCH v2 10/30] cxlflash: Correct naming of limbo state and waitq Matthew R. Ochs
2015-09-18 15:28 ` Brian King
2015-09-16 21:28 ` [PATCH v2 11/30] cxlflash: Make functions static Matthew R. Ochs
2015-09-18 15:34 ` Brian King
2015-09-21 12:18 ` Tomas Henzl
2015-09-21 22:36 ` Matthew R. Ochs
2015-09-16 21:29 ` [PATCH v2 12/30] cxlflash: Refine host/device attributes Matthew R. Ochs
2015-09-18 21:34 ` Brian King
2015-09-18 23:56 ` Matthew R. Ochs
2015-09-21 9:55 ` David Laight
2015-09-16 21:30 ` [PATCH v2 13/30] cxlflash: Fix to avoid spamming the kernel log Matthew R. Ochs
2015-09-18 21:39 ` Brian King
2015-09-16 21:30 ` [PATCH v2 14/30] cxlflash: Fix to avoid stall while waiting on TMF Matthew R. Ochs
2015-09-21 18:24 ` Brian King
2015-09-21 23:05 ` Matthew R. Ochs
2015-09-16 21:30 ` [PATCH v2 15/30] cxlflash: Fix location of setting resid Matthew R. Ochs
2015-09-21 18:28 ` Brian King
2015-09-16 21:30 ` [PATCH v2 16/30] cxlflash: Fix host link up event handling Matthew R. Ochs
2015-09-21 21:47 ` Brian King
2015-09-16 21:30 ` [PATCH v2 17/30] cxlflash: Fix async interrupt bypass logic Matthew R. Ochs
2015-09-21 21:48 ` Brian King
2015-09-16 21:30 ` [PATCH v2 18/30] cxlflash: Remove dual port online dependency Matthew R. Ochs
2015-09-21 22:02 ` Brian King
2015-09-22 20:44 ` Matthew R. Ochs
2015-09-22 20:50 ` Brian King
2015-09-16 21:30 ` [PATCH v2 19/30] cxlflash: Fix AFU version access/storage and add check Matthew R. Ochs
2015-09-22 20:47 ` Brian King
2015-09-16 21:30 ` [PATCH v2 20/30] cxlflash: Correct usage of scsi_host_put() Matthew R. Ochs
2015-09-22 20:53 ` Brian King
2015-09-22 21:49 ` Matthew R. Ochs
2015-09-16 21:31 ` [PATCH v2 21/30] cxlflash: Fix to prevent workq from accessing freed memory Matthew R. Ochs
2015-09-21 12:25 ` Tomas Henzl
2015-09-21 22:44 ` Matthew R. Ochs
2015-09-16 21:31 ` [PATCH v2 22/30] cxlflash: Correct behavior in device reset handler following EEH Matthew R. Ochs
2015-09-22 20:58 ` Brian King
2015-09-16 21:31 ` [PATCH v2 23/30] cxlflash: Remove unnecessary scsi_block_requests Matthew R. Ochs
2015-09-22 20:59 ` Brian King
2015-09-16 21:31 ` [PATCH v2 24/30] cxlflash: Fix function prolog parameters and return codes Matthew R. Ochs
2015-09-22 21:02 ` Brian King
2015-09-16 21:32 ` [PATCH v2 25/30] cxlflash: Fix MMIO and endianness errors Matthew R. Ochs
2015-09-23 15:03 ` Brian King
2015-09-16 21:32 ` [PATCH v2 26/30] cxlflash: Fix to prevent EEH recovery failure Matthew R. Ochs
2015-09-23 19:09 ` Brian King
2015-09-16 21:32 ` [PATCH v2 27/30] cxlflash: Correct spelling, grammar, and alignment mistakes Matthew R. Ochs
2015-09-23 19:13 ` Brian King
2015-09-16 21:32 ` [PATCH v2 28/30] cxlflash: Fix to prevent stale AFU RRQ Matthew R. Ochs
2015-09-23 19:18 ` Brian King
2015-09-16 21:32 ` [PATCH v2 29/30] cxlflash: Fix to avoid state change collision Matthew R. Ochs
2015-09-21 12:44 ` Tomas Henzl [this message]
2015-09-21 22:59 ` Matthew R. Ochs
2015-09-16 21:33 ` [PATCH v2 30/30] MAINTAINERS: Add cxlflash driver Matthew R. Ochs
2015-09-23 19:19 ` Brian King
-- strict thread matches above, loose matches on Subject: below --
2015-09-16 17:08 [PATCH v2 29/30] cxlflash: Fix to avoid state change collision Matthew R. Ochs
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55FFFBB1.9050802@redhat.com \
--to=thenzl@redhat.com \
--cc=James.Bottomley@HansenPartnership.com \
--cc=andrew.donnellan@au1.ibm.com \
--cc=brking@linux.vnet.ibm.com \
--cc=dja@ozlabs.au.ibm.com \
--cc=imunsie@au1.ibm.com \
--cc=linux-scsi@vger.kernel.org \
--cc=linuxppc-dev@lists.ozlabs.org \
--cc=manoj@linux.vnet.ibm.com \
--cc=mikey@neuling.org \
--cc=mrochs@linux.vnet.ibm.com \
--cc=nab@linux-iscsi.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).