From: Seungwon Jeon <tgih.jun@samsung.com>
To: 'Sujit Reddy Thumma' <sthumma@codeaurora.org>
Cc: 'Vinayak Holikatti' <vinholikatti@gmail.com>,
'Santosh Y' <santoshsy@gmail.com>,
"'James E.J. Bottomley'" <JBottomley@parallels.com>,
linux-scsi@vger.kernel.org, linux-arm-msm@vger.kernel.org
Subject: RE: [PATCH V3 3/4] scsi: ufs: Fix device and host reset methods
Date: Tue, 23 Jul 2013 17:27:26 +0900 [thread overview]
Message-ID: <001801ce877e$768e7640$63ab62c0$%jun@samsung.com> (raw)
In-Reply-To: <51E984E8.90405@codeaurora.org>
On Sat, July 20, 2013, Sujit Reddy Thumma wrote:
> On 7/19/2013 7:27 PM, Seungwon Jeon wrote:
> > On Tue, July 09, 2013, Sujit Reddy Thumma wrote:
> >> As of now SCSI initiated error handling is broken because,
> >> the reset APIs don't try to bring back the device initialized and
> >> ready for further transfers.
> >>
> >> In case of timeouts, the scsi error handler takes care of handling aborts
> >> and resets. Improve the error handling in such scenario by resetting the
> >> device and host and re-initializing them in proper manner.
> >>
> >> Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
> >> ---
> >> drivers/scsi/ufs/ufshcd.c | 467 +++++++++++++++++++++++++++++++++++++++------
> >> drivers/scsi/ufs/ufshcd.h | 2 +
> >> 2 files changed, 411 insertions(+), 58 deletions(-)
> >>
> >> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> >> index 51ce096..b4c9910 100644
> >> --- a/drivers/scsi/ufs/ufshcd.c
> >> +++ b/drivers/scsi/ufs/ufshcd.c
> >> @@ -69,9 +69,15 @@ enum {
> >>
> >> /* UFSHCD states */
> >> enum {
> >> - UFSHCD_STATE_OPERATIONAL,
> >> UFSHCD_STATE_RESET,
> >> UFSHCD_STATE_ERROR,
> >> + UFSHCD_STATE_OPERATIONAL,
> >> +};
> >> +
> >> +/* UFSHCD error handling flags */
> >> +enum {
> >> + UFSHCD_EH_HOST_RESET_PENDING = (1 << 0),
> >> + UFSHCD_EH_DEVICE_RESET_PENDING = (1 << 1),
> >> };
> >>
> >> /* Interrupt configuration options */
> >> @@ -87,6 +93,22 @@ enum {
> >> INT_AGGR_CONFIG,
> >> };
> >>
> >> +#define ufshcd_set_device_reset_pending(h) \
> >> + (h->eh_flags |= UFSHCD_EH_DEVICE_RESET_PENDING)
> >> +#define ufshcd_set_host_reset_pending(h) \
> >> + (h->eh_flags |= UFSHCD_EH_HOST_RESET_PENDING)
> >> +#define ufshcd_device_reset_pending(h) \
> >> + (h->eh_flags & UFSHCD_EH_DEVICE_RESET_PENDING)
> >> +#define ufshcd_host_reset_pending(h) \
> >> + (h->eh_flags & UFSHCD_EH_HOST_RESET_PENDING)
> >> +#define ufshcd_clear_device_reset_pending(h) \
> >> + (h->eh_flags &= ~UFSHCD_EH_DEVICE_RESET_PENDING)
> >> +#define ufshcd_clear_host_reset_pending(h) \
> >> + (h->eh_flags &= ~UFSHCD_EH_HOST_RESET_PENDING)
> >> +
> >> +static void ufshcd_tmc_handler(struct ufs_hba *hba);
> >> +static void ufshcd_async_scan(void *data, async_cookie_t cookie);
> >> +
> >> /*
> >> * ufshcd_wait_for_register - wait for register value to change
> >> * @hba - per-adapter interface
> >> @@ -851,9 +873,22 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
> >>
> >> tag = cmd->request->tag;
> >>
> >> - if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
> >> + switch (hba->ufshcd_state) {
> > Lock is no needed for ufshcd_state?
Please check?
> >
> >> + case UFSHCD_STATE_OPERATIONAL:
> >> + break;
> >> + case UFSHCD_STATE_RESET:
> >> err = SCSI_MLQUEUE_HOST_BUSY;
> >> goto out;
> >> + case UFSHCD_STATE_ERROR:
> >> + set_host_byte(cmd, DID_ERROR);
> >> + cmd->scsi_done(cmd);
> >> + goto out;
> >> + default:
> >> + dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n",
> >> + __func__, hba->ufshcd_state);
> >> + set_host_byte(cmd, DID_BAD_TARGET);
> >> + cmd->scsi_done(cmd);
> >> + goto out;
> >> }
> >>
> >> /* acquire the tag to make sure device cmds don't use it */
> >> @@ -1573,8 +1608,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
> >> if (hba->ufshcd_state == UFSHCD_STATE_RESET)
> >> scsi_unblock_requests(hba->host);
> >>
> >> - hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
> >> -
> >> out:
> >> return err;
> >> }
> >> @@ -2273,6 +2306,106 @@ out:
> >> }
> >>
> >> /**
> >> + * ufshcd_utrl_is_rsr_enabled - check if run-stop register is enabled
> >> + * @hba: per-adapter instance
> >> + */
> >> +static bool ufshcd_utrl_is_rsr_enabled(struct ufs_hba *hba)
> >> +{
> >> + return ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_LIST_RUN_STOP) & 0x1;
> >> +}
> >> +
> >> +/**
> >> + * ufshcd_utmrl_is_rsr_enabled - check if run-stop register is enabled
> >> + * @hba: per-adapter instance
> >> + */
> >> +static bool ufshcd_utmrl_is_rsr_enabled(struct ufs_hba *hba)
> >> +{
> >> + return ufshcd_readl(hba, REG_UTP_TASK_REQ_LIST_RUN_STOP) & 0x1;
> >> +}
> >> +
> >> +/**
> >> + * ufshcd_complete_pending_tasks - complete outstanding tasks
> >> + * @hba: per adapter instance
> >> + *
> >> + * Abort in-progress task management commands and wakeup
> >> + * waiting threads.
> >> + *
> >> + * Returns non-zero error value when failed to clear all the commands.
> >> + */
> >> +static int ufshcd_complete_pending_tasks(struct ufs_hba *hba)
> >> +{
> >> + u32 reg;
> >> + int err = 0;
> >> + unsigned long flags;
> >> +
> >> + if (!hba->outstanding_tasks)
> >> + goto out;
> >> +
> >> + /* Clear UTMRL only when run-stop is enabled */
> >> + if (ufshcd_utmrl_is_rsr_enabled(hba))
> >> + ufshcd_writel(hba, ~hba->outstanding_tasks,
> >> + REG_UTP_TASK_REQ_LIST_CLEAR);
> >> +
> >> + /* poll for max. 1 sec to clear door bell register by h/w */
> >> + reg = ufshcd_wait_for_register(hba,
> >> + REG_UTP_TASK_REQ_DOOR_BELL,
> >> + hba->outstanding_tasks, 0, 1000, 1000);
> >> + if (reg & hba->outstanding_tasks)
> >> + err = -ETIMEDOUT;
> >> +
> >> + spin_lock_irqsave(hba->host->host_lock, flags);
> >> + /* complete commands that were cleared out */
> >> + ufshcd_tmc_handler(hba);
> >> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> >> +out:
> >> + if (err)
> >> + dev_err(hba->dev, "%s: failed, still pending = 0x%.8x\n",
> >> + __func__, reg);
> >> + return err;
> >> +}
> >> +
> >> +/**
> >> + * ufshcd_complete_pending_reqs - complete outstanding requests
> >> + * @hba: per adapter instance
> >> + *
> >> + * Abort in-progress transfer request commands and return them to SCSI.
> >> + *
> >> + * Returns non-zero error value when failed to clear all the commands.
> >> + */
> >> +static int ufshcd_complete_pending_reqs(struct ufs_hba *hba)
> >> +{
> >> + u32 reg;
> >> + int err = 0;
> >> + unsigned long flags;
> >> +
> >> + /* check if we completed all of them */
> >> + if (!hba->outstanding_reqs)
> >> + goto out;
> >> +
> >> + /* Clear UTRL only when run-stop is enabled */
> >> + if (ufshcd_utrl_is_rsr_enabled(hba))
> >> + ufshcd_writel(hba, ~hba->outstanding_reqs,
> >> + REG_UTP_TRANSFER_REQ_LIST_CLEAR);
> >> +
> >> + /* poll for max. 1 sec to clear door bell register by h/w */
> >> + reg = ufshcd_wait_for_register(hba,
> >> + REG_UTP_TRANSFER_REQ_DOOR_BELL,
> >> + hba->outstanding_reqs, 0, 1000, 1000);
> >> + if (reg & hba->outstanding_reqs)
> >> + err = -ETIMEDOUT;
> >> +
> >> + spin_lock_irqsave(hba->host->host_lock, flags);
> >> + /* complete commands that were cleared out */
> >> + ufshcd_transfer_req_compl(hba);
> >> + spin_unlock_irqrestore(hba->host->host_lock, flags);
> >> +out:
> >> + if (err)
> >> + dev_err(hba->dev, "%s: failed, still pending = 0x%.8x\n",
> >> + __func__, reg);
> >> + return err;
> >> +}
> >> +
> >> +/**
> >> * ufshcd_fatal_err_handler - handle fatal errors
> >> * @hba: per adapter instance
> >> */
> >> @@ -2306,8 +2439,12 @@ static void ufshcd_err_handler(struct ufs_hba *hba)
> >> }
> >> return;
> >> fatal_eh:
> >> - hba->ufshcd_state = UFSHCD_STATE_ERROR;
> >> - schedule_work(&hba->feh_workq);
> >> + /* handle fatal errors only when link is functional */
> >> + if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
> >> + /* block commands at driver layer until error is handled */
> >> + hba->ufshcd_state = UFSHCD_STATE_ERROR;
> > Locking omitted for ufshcd_state?
> This is called in interrupt context with spin_lock held.
Right, I missed it.
>
> >
> >> + schedule_work(&hba->feh_workq);
> >> + }
> >> }
> >>
> >> /**
> >> @@ -2475,75 +2612,155 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int
> task_id,
> >> }
> >>
> >> /**
> >> - * ufshcd_device_reset - reset device and abort all the pending commands
> >> - * @cmd: SCSI command pointer
> >> + * ufshcd_dme_end_point_reset - Notify device Unipro to perform reset
> >> + * @hba: per adapter instance
> >> *
> >> - * Returns SUCCESS/FAILED
> >> + * UIC_CMD_DME_END_PT_RST resets the UFS device completely, the UFS flags,
> >> + * attributes and descriptors are reset to default state. Callers are
> >> + * expected to initialize the whole device again after this.
> >> + *
> >> + * Returns zero on success, non-zero on failure
> >> */
> >> -static int ufshcd_device_reset(struct scsi_cmnd *cmd)
> >> +static int ufshcd_dme_end_point_reset(struct ufs_hba *hba)
> >> {
> >> - struct Scsi_Host *host;
> >> - struct ufs_hba *hba;
> >> - unsigned int tag;
> >> - u32 pos;
> >> - int err;
> >> - u8 resp;
> >> - struct ufshcd_lrb *lrbp;
> >> + struct uic_command uic_cmd = {0};
> >> + int ret;
> >>
> >> - host = cmd->device->host;
> >> - hba = shost_priv(host);
> >> - tag = cmd->request->tag;
> >> + uic_cmd.command = UIC_CMD_DME_END_PT_RST;
> >>
> >> - lrbp = &hba->lrb[tag];
> >> - err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
> >> - UFS_LOGICAL_RESET, &resp);
> >> - if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> >> - err = FAILED;
> >> + ret = ufshcd_send_uic_cmd(hba, &uic_cmd);
> >> + if (ret)
> >> + dev_err(hba->dev, "%s: error code %d\n", __func__, ret);
> >> +
> >> + return ret;
> >> +}
> >> +
> >> +/**
> >> + * ufshcd_dme_reset - Local UniPro reset
> >> + * @hba: per adapter instance
> >> + *
> >> + * Returns zero on success, non-zero on failure
> >> + */
> >> +static int ufshcd_dme_reset(struct ufs_hba *hba)
> >> +{
> >> + struct uic_command uic_cmd = {0};
> >> + int ret;
> >> +
> >> + uic_cmd.command = UIC_CMD_DME_RESET;
> >> +
> >> + ret = ufshcd_send_uic_cmd(hba, &uic_cmd);
> >> + if (ret)
> >> + dev_err(hba->dev, "%s: error code %d\n", __func__, ret);
> >> +
> >> + return ret;
> >> +
> >> +}
> >> +
> >> +/**
> >> + * ufshcd_dme_enable - Local UniPro DME Enable
> >> + * @hba: per adapter instance
> >> + *
> >> + * Returns zero on success, non-zero on failure
> >> + */
> >> +static int ufshcd_dme_enable(struct ufs_hba *hba)
> >> +{
> >> + struct uic_command uic_cmd = {0};
> >> + int ret;
> >> + uic_cmd.command = UIC_CMD_DME_ENABLE;
> >> +
> >> + ret = ufshcd_send_uic_cmd(hba, &uic_cmd);
> >> + if (ret)
> >> + dev_err(hba->dev, "%s: error code %d\n", __func__, ret);
> >> +
> >> + return ret;
> >> +
> >> +}
> >> +
> >> +/**
> >> + * ufshcd_device_reset_and_restore - reset and restore device
> >> + * @hba: per-adapter instance
> >> + *
> >> + * Note that the device reset issues DME_END_POINT_RESET which
> >> + * may reset entire device and restore device attributes to
> >> + * default state.
> >> + *
> >> + * Returns zero on success, non-zero on failure
> >> + */
> >> +static int ufshcd_device_reset_and_restore(struct ufs_hba *hba)
> >> +{
> >> + int err = 0;
> >> + u32 reg;
> >> +
> >> + err = ufshcd_dme_end_point_reset(hba);
> >> + if (err)
> >> + goto out;
> >> +
> >> + /* restore communication with the device */
> >> + err = ufshcd_dme_reset(hba);
> >> + if (err)
> >> goto out;
> >> - } else {
> >> - err = SUCCESS;
> >> - }
> >>
> >> - for (pos = 0; pos < hba->nutrs; pos++) {
> >> - if (test_bit(pos, &hba->outstanding_reqs) &&
> >> - (hba->lrb[tag].lun == hba->lrb[pos].lun)) {
> >> + err = ufshcd_dme_enable(hba);
> >> + if (err)
> >> + goto out;
> >>
> >> - /* clear the respective UTRLCLR register bit */
> >> - ufshcd_utrl_clear(hba, pos);
> >> + err = ufshcd_dme_link_startup(hba);
> > UFS_LOGICAL_RESET is no more used?
>
> Yes, I don't see any use for this as of now (given that we are using
> dme_end_point_reset, refer to figure. 7.4 of UFS 1.1 spec). Also, the
> UFS spec. error handling section doesn't mention anything about
> LOGICAL_RESET. If you know a valid use case where we need to have LUN
> reset, please let me know I will bring it back.
As refered the scsi-mid layer and other host's implementation,
eh_device_reset_handler(= ufshcd_eh_device_reset_handler) may
have a role of LOGICAL_RESET for specific lun.
I found that ENDPOINT_RESET is recommended with IS.DFES in spec.
Let me add some comments additionally.
Both 'ufshcd_eh_device_reset_handler' and 'ufshcd_host_reset_and_restore' do almost same things.
At a glance, it's confused about their role and It is mixed.
'ufshcd_reset_and_restore' is eventually called, which is actual part of reset functionality; Once device reset is failed, then
host reset is tried.
Actually, that is being handled for each level of error recovery in scsi mid-layer. Please chekc 'drivers/scsi/scsi_error.c'.
[scsi_eh_ready_devs, scsi_abort_eh_cmnd]
In this stage, each reset functionality could be separated obviously.
>
> > ufshcd_device_reset_and_restore have a role of device reset.
> > Both ufshcd_dme_reset and ufshcd_dme_enable are valid for local one, not for remote.
> > Should we do those for host including link-startup here?
>
> Yes, it is needed. After DME_ENDPOINT_RESET the remote link goes into link down state.
I want to know more related description. I didn't find it. Could you point that?
> To initialize the link, the host needs to send
> DME_LINKSTARTUP, but according to Uni-Pro spec. the link-startup can
> only be sent when the local uni-pro is in link-down state. So first
If it's right you mentioned above, uni-pro state is already in link-down after DME_ENDPOINT_RESET.
Then, DME_RESET isn't needed.
> we need to get the local unipro from link-up to disabled to link-down
> using the DME_RESET and DME_ENABLE commands and then issue
> DME_LINKSTARTUP to re-initialize the link.
'ufshcd_hba_enable' can be used instead of both if these are really needed.
This will do dme_reset and dme_enable.
Thanks,
Seungwon Jeon
next prev parent reply other threads:[~2013-07-23 8:27 UTC|newest]
Thread overview: 27+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-07-09 9:16 [PATCH V3 0/4] scsi: ufs: Improve UFS error handling Sujit Reddy Thumma
2013-07-09 9:16 ` [PATCH V3 1/4] scsi: ufs: Fix broken task management command implementation Sujit Reddy Thumma
2013-07-09 10:42 ` merez
2013-07-19 13:56 ` Seungwon Jeon
2013-07-19 18:26 ` Sujit Reddy Thumma
2013-07-23 8:24 ` Seungwon Jeon
2013-07-23 15:40 ` Sujit Reddy Thumma
2013-07-09 9:16 ` [PATCH V3 2/4] scsi: ufs: Fix hardware race conditions while aborting a command Sujit Reddy Thumma
2013-07-09 10:42 ` merez
2013-07-19 13:56 ` Seungwon Jeon
2013-07-19 18:26 ` Sujit Reddy Thumma
2013-07-09 9:16 ` [PATCH V3 3/4] scsi: ufs: Fix device and host reset methods Sujit Reddy Thumma
2013-07-09 10:43 ` merez
2013-07-19 13:57 ` Seungwon Jeon
2013-07-19 18:26 ` Sujit Reddy Thumma
2013-07-23 8:27 ` Seungwon Jeon [this message]
2013-07-23 15:40 ` Sujit Reddy Thumma
2013-07-24 13:39 ` Seungwon Jeon
2013-07-29 9:45 ` Sujit Reddy Thumma
2013-07-09 9:16 ` [PATCH V3 4/4] scsi: ufs: Improve UFS fatal error handling Sujit Reddy Thumma
2013-07-09 10:43 ` merez
2013-07-19 13:58 ` Seungwon Jeon
2013-07-19 18:26 ` Sujit Reddy Thumma
2013-07-23 8:34 ` Seungwon Jeon
2013-07-23 15:41 ` Sujit Reddy Thumma
2013-07-24 13:39 ` Seungwon Jeon
2013-07-29 9:45 ` Sujit Reddy Thumma
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to='001801ce877e$768e7640$63ab62c0$%jun@samsung.com' \
--to=tgih.jun@samsung.com \
--cc=JBottomley@parallels.com \
--cc=linux-arm-msm@vger.kernel.org \
--cc=linux-scsi@vger.kernel.org \
--cc=santoshsy@gmail.com \
--cc=sthumma@codeaurora.org \
--cc=vinholikatti@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).