From: vinayak holikatti <vinholikatti@gmail.com>
To: Sujit Reddy Thumma <sthumma@codeaurora.org>
Cc: Santosh Y <santoshsy@gmail.com>,
"James E.J. Bottomley" <JBottomley@parallels.com>,
linux-scsi@vger.kernel.org, linux-arm-msm@vger.kernel.org
Subject: Re: [PATCH V7 5/6] scsi: ufs: Fix device and host reset methods
Date: Mon, 23 Sep 2013 17:47:10 +0530 [thread overview]
Message-ID: <CAKVbJB-SLZjdxYDAi4DAsRBSHA98sRo91s5B8Aw4b1uoALXnnA@mail.gmail.com> (raw)
In-Reply-To: <1379589248-3317-6-git-send-email-sthumma@codeaurora.org>
On Thu, Sep 19, 2013 at 4:44 PM, Sujit Reddy Thumma
<sthumma@codeaurora.org> wrote:
> As of now SCSI initiated error handling is broken because,
> the reset APIs don't try to bring back the device initialized and
> ready for further transfers.
>
> In case of timeouts, the scsi error handler takes care of handling aborts
> and resets. Improve the error handling in such scenario by resetting the
> device and host and re-initializing them in proper manner.
>
> Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
> Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>
> Tested-by: Dolev Raviv <draviv@codeaurora.org>
> ---
> drivers/scsi/ufs/ufshcd.c | 240 ++++++++++++++++++++++++++++++++++++----------
> drivers/scsi/ufs/ufshcd.h | 2 +
> 2 files changed, 189 insertions(+), 53 deletions(-)
>
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index 52f66e4..5462310 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -74,9 +74,14 @@ enum {
>
> /* UFSHCD states */
> enum {
> - UFSHCD_STATE_OPERATIONAL,
> UFSHCD_STATE_RESET,
> UFSHCD_STATE_ERROR,
> + UFSHCD_STATE_OPERATIONAL,
> +};
> +
> +/* UFSHCD error handling flags */
> +enum {
> + UFSHCD_EH_IN_PROGRESS = (1 << 0),
> };
>
> /* Interrupt configuration options */
> @@ -86,6 +91,16 @@ enum {
> UFSHCD_INT_CLEAR,
> };
>
> +#define ufshcd_set_eh_in_progress(h) \
> + (h->eh_flags |= UFSHCD_EH_IN_PROGRESS)
> +#define ufshcd_eh_in_progress(h) \
> + (h->eh_flags & UFSHCD_EH_IN_PROGRESS)
> +#define ufshcd_clear_eh_in_progress(h) \
> + (h->eh_flags &= ~UFSHCD_EH_IN_PROGRESS)
> +
> +static void ufshcd_tmc_handler(struct ufs_hba *hba);
> +static void ufshcd_async_scan(void *data, async_cookie_t cookie);
> +
> /*
> * ufshcd_wait_for_register - wait for register value to change
> * @hba - per-adapter interface
> @@ -856,10 +871,25 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
>
> tag = cmd->request->tag;
>
> - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> + spin_lock_irqsave(hba->host->host_lock, flags);
> + switch (hba->ufshcd_state) {
> + case UFSHCD_STATE_OPERATIONAL:
> + break;
> + case UFSHCD_STATE_RESET:
> err = SCSI_MLQUEUE_HOST_BUSY;
> - goto out;
> + goto out_unlock;
> + case UFSHCD_STATE_ERROR:
> + set_host_byte(cmd, DID_ERROR);
> + cmd->scsi_done(cmd);
> + goto out_unlock;
> + default:
> + dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n",
> + __func__, hba->ufshcd_state);
> + set_host_byte(cmd, DID_BAD_TARGET);
> + cmd->scsi_done(cmd);
> + goto out_unlock;
> }
> + spin_unlock_irqrestore(hba->host->host_lock, flags);
>
> /* acquire the tag to make sure device cmds don't use it */
> if (test_and_set_bit_lock(tag, &hba->lrb_in_use)) {
> @@ -896,6 +926,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
> /* issue command to the controller */
> spin_lock_irqsave(hba->host->host_lock, flags);
> ufshcd_send_command(hba, tag);
> +out_unlock:
> spin_unlock_irqrestore(hba->host->host_lock, flags);
> out:
> return err;
> @@ -1707,8 +1738,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
> if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> scsi_unblock_requests(hba->host);
>
> - hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> -
> out:
> return err;
> }
> @@ -2455,8 +2484,12 @@ static void ufshcd_err_handler(struct ufs_hba *hba)
> }
> return;
> fatal_eh:
> - hba->ufshcd_state = UFSHCD_STATE_ERROR;
> - schedule_work(&hba->feh_workq);
> + /* handle fatal errors only when link is functional */
> + if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
> + /* block commands at driver layer until error is handled */
> + hba->ufshcd_state = UFSHCD_STATE_ERROR;
> + schedule_work(&hba->feh_workq);
> + }
> }
>
> /**
> @@ -2621,12 +2654,13 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
> }
>
> /**
> - * ufshcd_device_reset - reset device and abort all the pending commands
> + * ufshcd_eh_device_reset_handler - device reset handler registered to
> + * scsi layer.
> * @cmd: SCSI command pointer
> *
> * Returns SUCCESS/FAILED
> */
> -static int ufshcd_device_reset(struct scsi_cmnd *cmd)
> +static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
> {
> struct Scsi_Host *host;
> struct ufs_hba *hba;
> @@ -2635,6 +2669,7 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
> int err;
> u8 resp = 0xF;
> struct ufshcd_lrb *lrbp;
> + unsigned long flags;
>
> host = cmd->device->host;
> hba = shost_priv(host);
> @@ -2643,55 +2678,33 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
> lrbp = &hba->lrb[tag];
> err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp);
> if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> - err = FAILED;
> + if (!err)
> + err = resp;
> goto out;
> - } else {
> - err = SUCCESS;
> }
>
> - for (pos = 0; pos < hba->nutrs; pos++) {
> - if (test_bit(pos, &hba->outstanding_reqs) &&
> - (hba->lrb[tag].lun == hba->lrb[pos].lun)) {
> -
> - /* clear the respective UTRLCLR register bit */
> - ufshcd_utrl_clear(hba, pos);
> -
> - clear_bit(pos, &hba->outstanding_reqs);
> -
> - if (hba->lrb[pos].cmd) {
> - scsi_dma_unmap(hba->lrb[pos].cmd);
> - hba->lrb[pos].cmd->result =
> - DID_ABORT << 16;
> - hba->lrb[pos].cmd->scsi_done(cmd);
> - hba->lrb[pos].cmd = NULL;
> - clear_bit_unlock(pos, &hba->lrb_in_use);
> - wake_up(&hba->dev_cmd.tag_wq);
> - }
> + /* clear the commands that were pending for corresponding LUN */
> + for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
> + if (hba->lrb[pos].lun == lrbp->lun) {
> + err = ufshcd_clear_cmd(hba, pos);
> + if (err)
> + break;
> }
> - } /* end of for */
> + }
> + spin_lock_irqsave(host->host_lock, flags);
> + ufshcd_transfer_req_compl(hba);
> + spin_unlock_irqrestore(host->host_lock, flags);
> out:
> + if (!err) {
> + err = SUCCESS;
> + } else {
> + dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
> + err = FAILED;
> + }
> return err;
> }
>
> /**
> - * ufshcd_host_reset - Main reset function registered with scsi layer
> - * @cmd: SCSI command pointer
> - *
> - * Returns SUCCESS/FAILED
> - */
> -static int ufshcd_host_reset(struct scsi_cmnd *cmd)
> -{
> - struct ufs_hba *hba;
> -
> - hba = shost_priv(cmd->device->host);
> -
> - if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> - return SUCCESS;
> -
> - return ufshcd_do_reset(hba);
> -}
> -
> -/**
> * ufshcd_abort - abort a specific command
> * @cmd: SCSI command pointer
> *
> @@ -2789,6 +2802,122 @@ out:
> }
>
> /**
> + * ufshcd_host_reset_and_restore - reset and restore host controller
> + * @hba: per-adapter instance
> + *
> + * Note that host controller reset may issue DME_RESET to
> + * local and remote (device) Uni-Pro stack and the attributes
> + * are reset to default state.
> + *
> + * Returns zero on success, non-zero on failure
> + */
> +static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
> +{
> + int err;
> + async_cookie_t cookie;
> + unsigned long flags;
> +
> + /* Reset the host controller */
> + spin_lock_irqsave(hba->host->host_lock, flags);
> + ufshcd_hba_stop(hba);
> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> + err = ufshcd_hba_enable(hba);
> + if (err)
> + goto out;
> +
> + /* Establish the link again and restore the device */
> + cookie = async_schedule(ufshcd_async_scan, hba);
> + /* wait for async scan to be completed */
> + async_synchronize_cookie(++cookie);
> + if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
> + err = -EIO;
> +out:
> + if (err)
> + dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
> +
> + return err;
> +}
> +
> +/**
> + * ufshcd_reset_and_restore - reset and re-initialize host/device
> + * @hba: per-adapter instance
> + *
> + * Reset and recover device, host and re-establish link. This
> + * is helpful to recover the communication in fatal error conditions.
> + *
> + * Returns zero on success, non-zero on failure
> + */
> +static int ufshcd_reset_and_restore(struct ufs_hba *hba)
> +{
> + int err = 0;
> + unsigned long flags;
> +
> + err = ufshcd_host_reset_and_restore(hba);
> +
> + /*
> + * After reset the door-bell might be cleared, complete
> + * outstanding requests in s/w here.
> + */
> + spin_lock_irqsave(hba->host->host_lock, flags);
> + ufshcd_transfer_req_compl(hba);
> + ufshcd_tmc_handler(hba);
> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> + return err;
> +}
> +
> +/**
> + * ufshcd_eh_host_reset_handler - host reset handler registered to scsi layer
> + * @cmd - SCSI command pointer
> + *
> + * Returns SUCCESS/FAILED
> + */
> +static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
> +{
> + int err;
> + unsigned long flags;
> + struct ufs_hba *hba;
> +
> + hba = shost_priv(cmd->device->host);
> +
> + /*
> + * Check if there is any race with fatal error handling.
> + * If so, wait for it to complete. Even though fatal error
> + * handling does reset and restore in some cases, don't assume
> + * anything out of it. We are just avoiding race here.
> + */
> + do {
> + spin_lock_irqsave(hba->host->host_lock, flags);
> + if (!(work_pending(&hba->feh_workq) ||
> + hba->ufshcd_state == UFSHCD_STATE_RESET))
> + break;
> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> + dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
> + flush_work(&hba->feh_workq);
> + } while (1);
> +
> + hba->ufshcd_state = UFSHCD_STATE_RESET;
> + ufshcd_set_eh_in_progress(hba);
> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> + err = ufshcd_reset_and_restore(hba);
> +
> + spin_lock_irqsave(hba->host->host_lock, flags);
> + if (!err) {
> + err = SUCCESS;
> + hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> + } else {
> + err = FAILED;
> + hba->ufshcd_state = UFSHCD_STATE_ERROR;
> + }
> + ufshcd_clear_eh_in_progress(hba);
> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> + return err;
> +}
> +
> +/**
> * ufshcd_async_scan - asynchronous execution for link startup
> * @data: data pointer to pass to this function
> * @cookie: cookie data
> @@ -2813,8 +2942,13 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
> goto out;
>
> ufshcd_force_reset_auto_bkops(hba);
> - scsi_scan_host(hba->host);
> - pm_runtime_put_sync(hba->dev);
> + hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> +
> + /* If we are in error handling context no need to scan the host */
> + if (!ufshcd_eh_in_progress(hba)) {
> + scsi_scan_host(hba->host);
> + pm_runtime_put_sync(hba->dev);
> + }
> out:
> return;
> }
> @@ -2827,8 +2961,8 @@ static struct scsi_host_template ufshcd_driver_template = {
> .slave_alloc = ufshcd_slave_alloc,
> .slave_destroy = ufshcd_slave_destroy,
> .eh_abort_handler = ufshcd_abort,
> - .eh_device_reset_handler = ufshcd_device_reset,
> - .eh_host_reset_handler = ufshcd_host_reset,
> + .eh_device_reset_handler = ufshcd_eh_device_reset_handler,
> + .eh_host_reset_handler = ufshcd_eh_host_reset_handler,
> .this_id = -1,
> .sg_tablesize = SG_ALL,
> .cmd_per_lun = UFSHCD_CMD_PER_LUN,
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 84d09d1..48c7d9b 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -180,6 +180,7 @@ struct ufs_dev_cmd {
> * @pwr_done: completion for power mode change
> * @tm_condition: condition variable for task management
> * @ufshcd_state: UFSHCD states
> + * @eh_flags: Error handling flags
> * @intr_mask: Interrupt Mask Bits
> * @ee_ctrl_mask: Exception event control mask
> * @feh_workq: Work queue for fatal controller error handling
> @@ -227,6 +228,7 @@ struct ufs_hba {
> struct completion *pwr_done;
>
> u32 ufshcd_state;
> + u32 eh_flags;
> u32 intr_mask;
> u16 ee_ctrl_mask;
>
Acked-by: Vinayak Holikatti <vinholikatti@gmail.com>
next prev parent reply other threads:[~2013-09-23 12:17 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-09-19 11:14 [PATCH V7 0/6] scsi: ufs: TM, fatal-error handling and other fixes Sujit Reddy Thumma
2013-09-19 11:14 ` [PATCH V7 1/6] scsi: ufs: fix endianness sparse warnings Sujit Reddy Thumma
2013-09-23 12:14 ` vinayak holikatti
2013-11-18 9:04 ` vinayak holikatti
2014-05-26 5:34 ` Sujit Reddy Thumma
2013-09-19 11:14 ` [PATCH V7 2/6] scsi: ufs: make undeclared functions static Sujit Reddy Thumma
2013-09-23 12:14 ` vinayak holikatti
2013-09-19 11:14 ` [PATCH V7 3/6] scsi: ufs: Fix broken task management command implementation Sujit Reddy Thumma
2013-09-23 12:15 ` vinayak holikatti
2013-09-19 11:14 ` [PATCH V7 4/6] scsi: ufs: Fix hardware race conditions while aborting a command Sujit Reddy Thumma
2013-09-23 12:16 ` vinayak holikatti
2013-09-19 11:14 ` [PATCH V7 5/6] scsi: ufs: Fix device and host reset methods Sujit Reddy Thumma
2013-09-23 12:17 ` vinayak holikatti [this message]
2013-09-19 11:14 ` [PATCH V7 6/6] scsi: ufs: Improve UFS fatal error handling Sujit Reddy Thumma
2013-09-23 12:17 ` vinayak holikatti
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=CAKVbJB-SLZjdxYDAi4DAsRBSHA98sRo91s5B8Aw4b1uoALXnnA@mail.gmail.com \
--to=vinholikatti@gmail.com \
--cc=JBottomley@parallels.com \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=santoshsy@gmail.com \
--cc=sthumma@codeaurora.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).