linux-scsi.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 1/4] scsi: ufs: Fix broken task management command implementation
  2013-06-13 14:30 [PATCH 0/4] scsi: ufs: Improve UFS " Sujit Reddy Thumma
@ 2013-06-13 14:30 ` Sujit Reddy Thumma
  2013-06-27 11:19   ` Santosh Y
  0 siblings, 1 reply; 12+ messages in thread
From: Sujit Reddy Thumma @ 2013-06-13 14:30 UTC (permalink / raw)
  To: Vinayak Holikatti, Santosh Y, linux-scsi
  Cc: JBottomley, Sujit Reddy Thumma, linux-arm-msm

Currently, sending Task Management (TM) command to the card might
be broken in some scenarios as listed below:

- If there are more than 8 TM commands the implementation returns
error to the caller.
Fix: Wait for one of the slots to be emptied and send the command.

- Sometimes it is necessary for the caller to know the TM service
response code to determine the task status.
Fix: Propogate the service response to the caller.

- If the TM command times out no proper error recovery is implemented.
Fix: Clear the command in the controller door-bell register, so that
further commands for the same slot don't fail.

- While preparing the TM command descriptor the tag used should be the
free slot of TM command and not the task tag of the command which
the TM command is trying to manage.
Fix: Use free slot tag instead of task tag of SCSI command

- Since the TM command involves H/W communication, abruptly ending the
request on kill interrupt signal might cause h/w malfunction.
Fix: Wait for hardware completion interrupt with TASK_UNINTERRUPTIBLE set.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
---
 drivers/scsi/ufs/ufshcd.c |  174 +++++++++++++++++++++++++++++----------------
 drivers/scsi/ufs/ufshcd.h |    6 +-
 2 files changed, 117 insertions(+), 63 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index aa1f11e..7d043f4 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -53,6 +53,9 @@
 /* Query request timeout */
 #define QUERY_REQ_TIMEOUT 30 /* msec */
 
+/* Task management command timeout */
+#define TM_CMD_TIMEOUT	100 /* msecs */
+
 #define SCSI_CMD_QUEUE_SIZE (hba->nutrs - 1)
 /* Reserved tag for internal commands */
 #define INTERNAL_CMD_TAG   (hba->nutrs - 1)
@@ -188,13 +191,32 @@ ufshcd_get_tmr_ocs(struct utp_task_req_desc *task_req_descp)
 /**
  * ufshcd_get_tm_free_slot - get a free slot for task management request
  * @hba: per adapter instance
+ * @free_slot: pointer to variable with available slot value
  *
- * Returns maximum number of task management request slots in case of
- * task management queue full or returns the free slot number
+ * Get a free tag and lock it until ufshcd_put_tm_slot() is called.
+ * Returns 0 if free slot is not available, else return 1 with tag value
+ * in @free_slot.
  */
-static inline int ufshcd_get_tm_free_slot(struct ufs_hba *hba)
+static bool ufshcd_get_tm_free_slot(struct ufs_hba *hba, int *free_slot)
+{
+	int tag;
+
+	do {
+		tag = find_first_zero_bit(
+				&hba->outstanding_tasks, hba->nutmrs);
+		if (tag >= hba->nutmrs)
+			return false;
+	} while (test_and_set_bit_lock(tag, &hba->outstanding_tasks));
+
+	if (free_slot)
+		*free_slot = tag;
+
+	return true;
+}
+
+static inline void ufshcd_put_tm_slot(struct ufs_hba *hba, int slot)
 {
-	return find_first_zero_bit(&hba->outstanding_tasks, hba->nutmrs);
+	clear_bit_unlock(slot, &hba->outstanding_tasks);
 }
 
 /**
@@ -1745,10 +1767,11 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev)
  * ufshcd_task_req_compl - handle task management request completion
  * @hba: per adapter instance
  * @index: index of the completed request
+ * @resp: task management service response
  *
- * Returns SUCCESS/FAILED
+ * Returns non-zero value on error, zero on success
  */
-static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
+static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index, u8 *resp)
 {
 	struct utp_task_req_desc *task_req_descp;
 	struct utp_upiu_task_rsp *task_rsp_upiup;
@@ -1758,9 +1781,6 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
 
 	spin_lock_irqsave(hba->host->host_lock, flags);
 
-	/* Clear completed tasks from outstanding_tasks */
-	__clear_bit(index, &hba->outstanding_tasks);
-
 	task_req_descp = hba->utmrdl_base_addr;
 	ocs_value = ufshcd_get_tmr_ocs(&task_req_descp[index]);
 
@@ -1769,19 +1789,15 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
 				task_req_descp[index].task_rsp_upiu;
 		task_result = be32_to_cpu(task_rsp_upiup->header.dword_1);
 		task_result = ((task_result & MASK_TASK_RESPONSE) >> 8);
-
-		if (task_result != UPIU_TASK_MANAGEMENT_FUNC_COMPL &&
-		    task_result != UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED)
-			task_result = FAILED;
-		else
-			task_result = SUCCESS;
+		if (resp)
+			*resp = (u8)task_result;
 	} else {
-		task_result = FAILED;
-		dev_err(hba->dev,
-			"trc: Invalid ocs = %x\n", ocs_value);
+		dev_err(hba->dev, "%s: failed, ocs = 0x%x\n",
+				__func__, ocs_value);
 	}
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	return task_result;
+
+	return ocs_value;
 }
 
 /**
@@ -2258,7 +2274,7 @@ static void ufshcd_tmc_handler(struct ufs_hba *hba)
 
 	tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
 	hba->tm_condition = tm_doorbell ^ hba->outstanding_tasks;
-	wake_up_interruptible(&hba->ufshcd_tm_wait_queue);
+	wake_up(&hba->tm_wq);
 }
 
 /**
@@ -2308,17 +2324,42 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba)
 	return retval;
 }
 
+static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag)
+{
+	int err = 0;
+	u32 reg;
+	u32 mask = 1 << tag;
+	unsigned long flags;
+
+	if (!test_bit(tag, &hba->outstanding_reqs))
+		goto out;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_writel(hba, ~(1 << tag), REG_UTP_TASK_REQ_LIST_CLEAR);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	/* poll for max. 1 sec to clear door bell register by h/w */
+	reg = ufshcd_wait_for_register(hba,
+			REG_UTP_TASK_REQ_DOOR_BELL,
+			mask, 0, 1000, 1000);
+	if ((reg & mask) == mask)
+		err = -ETIMEDOUT;
+out:
+	return err;
+}
+
 /**
  * ufshcd_issue_tm_cmd - issues task management commands to controller
  * @hba: per adapter instance
- * @lrbp: pointer to local reference block
+ * @lun_id: LUN ID to which TM command is sent
+ * @task_id: task ID to which the TM command is applicable
+ * @tm_function: task management function opcode
+ * @tm_response: task management service response return value
  *
- * Returns SUCCESS/FAILED
+ * Returns non-zero value on error, zero on success.
  */
-static int
-ufshcd_issue_tm_cmd(struct ufs_hba *hba,
-		    struct ufshcd_lrb *lrbp,
-		    u8 tm_function)
+static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
+		u8 tm_function, u8 *tm_response)
 {
 	struct utp_task_req_desc *task_req_descp;
 	struct utp_upiu_task_req *task_req_upiup;
@@ -2329,17 +2370,10 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 
 	host = hba->host;
 
-	spin_lock_irqsave(host->host_lock, flags);
-
-	/* If task management queue is full */
-	free_slot = ufshcd_get_tm_free_slot(hba);
-	if (free_slot >= hba->nutmrs) {
-		spin_unlock_irqrestore(host->host_lock, flags);
-		dev_err(hba->dev, "Task management queue full\n");
-		err = FAILED;
-		goto out;
-	}
+	/* Get free slot, sleep if slots are unavailable */
+	wait_event(hba->tm_tag_wq, ufshcd_get_tm_free_slot(hba, &free_slot));
 
+	spin_lock_irqsave(host->host_lock, flags);
 	task_req_descp = hba->utmrdl_base_addr;
 	task_req_descp += free_slot;
 
@@ -2353,38 +2387,39 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 		(struct utp_upiu_task_req *) task_req_descp->task_req_upiu;
 	task_req_upiup->header.dword_0 =
 		UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
-					      lrbp->lun, lrbp->task_tag);
+				lun_id, free_slot);
 	task_req_upiup->header.dword_1 =
 		UPIU_HEADER_DWORD(0, tm_function, 0, 0);
 
-	task_req_upiup->input_param1 = lrbp->lun;
-	task_req_upiup->input_param1 =
-		cpu_to_be32(task_req_upiup->input_param1);
-	task_req_upiup->input_param2 = lrbp->task_tag;
-	task_req_upiup->input_param2 =
-		cpu_to_be32(task_req_upiup->input_param2);
+	task_req_upiup->input_param1 = cpu_to_be32(lun_id);
+	task_req_upiup->input_param2 = cpu_to_be32(task_id);
 
-	/* send command to the controller */
-	__set_bit(free_slot, &hba->outstanding_tasks);
+	/*
+	 * outstanding_tasks variable is already updated with corresponding
+	 * bit while acquiring free slot. Ring doorbell here.
+	 */
 	ufshcd_writel(hba, 1 << free_slot, REG_UTP_TASK_REQ_DOOR_BELL);
 
 	spin_unlock_irqrestore(host->host_lock, flags);
 
 	/* wait until the task management command is completed */
-	err =
-	wait_event_interruptible_timeout(hba->ufshcd_tm_wait_queue,
-					 (test_bit(free_slot,
-					 &hba->tm_condition) != 0),
-					 60 * HZ);
+	err = wait_event_timeout(hba->tm_wq,
+			test_bit(free_slot, &hba->tm_condition),
+			msecs_to_jiffies(TM_CMD_TIMEOUT));
 	if (!err) {
-		dev_err(hba->dev,
-			"Task management command timed-out\n");
-		err = FAILED;
-		goto out;
+		dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n",
+				__func__, tm_function);
+		if (ufshcd_clear_tm_cmd(hba, free_slot))
+			dev_WARN(hba->dev, "%s: unable clear tm cmd (slot %d) after timeout\n",
+					__func__, free_slot);
+		err = -ETIMEDOUT;
+	} else {
+		err = ufshcd_task_req_compl(hba, free_slot, tm_response);
 	}
 	clear_bit(free_slot, &hba->tm_condition);
-	err = ufshcd_task_req_compl(hba, free_slot);
-out:
+	ufshcd_put_tm_slot(hba, free_slot);
+	wake_up(&hba->tm_tag_wq);
+
 	return err;
 }
 
@@ -2401,14 +2436,22 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
 	unsigned int tag;
 	u32 pos;
 	int err;
+	u8 resp;
+	struct ufshcd_lrb *lrbp;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
 	tag = cmd->request->tag;
 
-	err = ufshcd_issue_tm_cmd(hba, &hba->lrb[tag], UFS_LOGICAL_RESET);
-	if (err == FAILED)
+	lrbp = &hba->lrb[tag];
+	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
+			UFS_LOGICAL_RESET, &resp);
+	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+		err = FAILED;
 		goto out;
+	} else {
+		err = SUCCESS;
+	}
 
 	for (pos = 0; pos < SCSI_CMD_QUEUE_SIZE; pos++) {
 		if (test_bit(pos, &hba->outstanding_reqs) &&
@@ -2468,6 +2511,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	unsigned long flags;
 	unsigned int tag;
 	int err;
+	u8 resp;
+	struct ufshcd_lrb *lrbp;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
@@ -2483,9 +2528,15 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	}
 	spin_unlock_irqrestore(host->host_lock, flags);
 
-	err = ufshcd_issue_tm_cmd(hba, &hba->lrb[tag], UFS_ABORT_TASK);
-	if (err == FAILED)
+	lrbp = &hba->lrb[tag];
+	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
+			UFS_ABORT_TASK, &resp);
+	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+		err = FAILED;
 		goto out;
+	} else {
+		err = SUCCESS;
+	}
 
 	scsi_dma_unmap(cmd);
 
@@ -2714,7 +2765,8 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
 	host->max_cmd_len = MAX_CDB_SIZE;
 
 	/* Initailize wait queue for task management */
-	init_waitqueue_head(&hba->ufshcd_tm_wait_queue);
+	init_waitqueue_head(&hba->tm_wq);
+	init_waitqueue_head(&hba->tm_tag_wq);
 
 	/* Initialize work queues */
 	INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 682c285..8ca08fe 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -172,7 +172,8 @@ struct ufs_internal_cmd {
  * @irq: Irq number of the controller
  * @active_uic_cmd: handle of active UIC command
  * @uic_cmd_mutex: mutex for uic command
- * @ufshcd_tm_wait_queue: wait queue for task management
+ * @tm_wq: wait queue for task management
+ * @tm_tag_wq: wait queue for free task management slots
  * @tm_condition: condition variable for task management
  * @ufshcd_state: UFSHCD states
  * @intr_mask: Interrupt Mask Bits
@@ -213,7 +214,8 @@ struct ufs_hba {
 	struct uic_command *active_uic_cmd;
 	struct mutex uic_cmd_mutex;
 
-	wait_queue_head_t ufshcd_tm_wait_queue;
+	wait_queue_head_t tm_wq;
+	wait_queue_head_t tm_tag_wq;
 	unsigned long tm_condition;
 
 	u32 ufshcd_state;
-- 
QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
of Code Aurora Forum, hosted by The Linux Foundation.

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] scsi: ufs: Fix broken task management command implementation
  2013-06-13 14:30 ` [PATCH 1/4] scsi: ufs: Fix broken task management command implementation Sujit Reddy Thumma
@ 2013-06-27 11:19   ` Santosh Y
  2013-06-28 11:32     ` Sujit Reddy Thumma
  0 siblings, 1 reply; 12+ messages in thread
From: Santosh Y @ 2013-06-27 11:19 UTC (permalink / raw)
  To: Sujit Reddy Thumma
  Cc: Vinayak Holikatti, linux-scsi, JBottomley, linux-arm-msm

On Thu, Jun 13, 2013 at 8:00 PM, Sujit Reddy Thumma
<sthumma@codeaurora.org> wrote:
> Currently, sending Task Management (TM) command to the card might
> be broken in some scenarios as listed below:
>
> - If there are more than 8 TM commands the implementation returns
> error to the caller.
> Fix: Wait for one of the slots to be emptied and send the command.
>
> - Sometimes it is necessary for the caller to know the TM service
> response code to determine the task status.
> Fix: Propogate the service response to the caller.
>
> - If the TM command times out no proper error recovery is implemented.
> Fix: Clear the command in the controller door-bell register, so that
> further commands for the same slot don't fail.
>
> - While preparing the TM command descriptor the tag used should be the
> free slot of TM command and not the task tag of the command which
> the TM command is trying to manage.
> Fix: Use free slot tag instead of task tag of SCSI command
>
> - Since the TM command involves H/W communication, abruptly ending the
> request on kill interrupt signal might cause h/w malfunction.
> Fix: Wait for hardware completion interrupt with TASK_UNINTERRUPTIBLE set.
>
> Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
> ---
>  drivers/scsi/ufs/ufshcd.c |  174 +++++++++++++++++++++++++++++----------------
>  drivers/scsi/ufs/ufshcd.h |    6 +-
>  2 files changed, 117 insertions(+), 63 deletions(-)
>
> diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> index aa1f11e..7d043f4 100644
> --- a/drivers/scsi/ufs/ufshcd.c
> +++ b/drivers/scsi/ufs/ufshcd.c
> @@ -53,6 +53,9 @@
>  /* Query request timeout */
>  #define QUERY_REQ_TIMEOUT 30 /* msec */
>
> +/* Task management command timeout */
> +#define TM_CMD_TIMEOUT 100 /* msecs */
> +
>  #define SCSI_CMD_QUEUE_SIZE (hba->nutrs - 1)
>  /* Reserved tag for internal commands */
>  #define INTERNAL_CMD_TAG   (hba->nutrs - 1)
> @@ -188,13 +191,32 @@ ufshcd_get_tmr_ocs(struct utp_task_req_desc *task_req_descp)
>  /**
>   * ufshcd_get_tm_free_slot - get a free slot for task management request
>   * @hba: per adapter instance
> + * @free_slot: pointer to variable with available slot value
>   *
> - * Returns maximum number of task management request slots in case of
> - * task management queue full or returns the free slot number
> + * Get a free tag and lock it until ufshcd_put_tm_slot() is called.
> + * Returns 0 if free slot is not available, else return 1 with tag value
> + * in @free_slot.
>   */
> -static inline int ufshcd_get_tm_free_slot(struct ufs_hba *hba)
> +static bool ufshcd_get_tm_free_slot(struct ufs_hba *hba, int *free_slot)
> +{
> +       int tag;
> +
> +       do {
> +               tag = find_first_zero_bit(
> +                               &hba->outstanding_tasks, hba->nutmrs);
> +               if (tag >= hba->nutmrs)
> +                       return false;
> +       } while (test_and_set_bit_lock(tag, &hba->outstanding_tasks));
> +
> +       if (free_slot)
> +               *free_slot = tag;
> +
> +       return true;
> +}
> +
> +static inline void ufshcd_put_tm_slot(struct ufs_hba *hba, int slot)
>  {
> -       return find_first_zero_bit(&hba->outstanding_tasks, hba->nutmrs);
> +       clear_bit_unlock(slot, &hba->outstanding_tasks);
>  }
>
>  /**
> @@ -1745,10 +1767,11 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev)
>   * ufshcd_task_req_compl - handle task management request completion
>   * @hba: per adapter instance
>   * @index: index of the completed request
> + * @resp: task management service response
>   *
> - * Returns SUCCESS/FAILED
> + * Returns non-zero value on error, zero on success
>   */
> -static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
> +static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index, u8 *resp)
>  {
>         struct utp_task_req_desc *task_req_descp;
>         struct utp_upiu_task_rsp *task_rsp_upiup;
> @@ -1758,9 +1781,6 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
>
>         spin_lock_irqsave(hba->host->host_lock, flags);
>
> -       /* Clear completed tasks from outstanding_tasks */
> -       __clear_bit(index, &hba->outstanding_tasks);
> -
>         task_req_descp = hba->utmrdl_base_addr;
>         ocs_value = ufshcd_get_tmr_ocs(&task_req_descp[index]);
>
> @@ -1769,19 +1789,15 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
>                                 task_req_descp[index].task_rsp_upiu;
>                 task_result = be32_to_cpu(task_rsp_upiup->header.dword_1);
>                 task_result = ((task_result & MASK_TASK_RESPONSE) >> 8);
> -
> -               if (task_result != UPIU_TASK_MANAGEMENT_FUNC_COMPL &&
> -                   task_result != UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED)
> -                       task_result = FAILED;
> -               else
> -                       task_result = SUCCESS;
> +               if (resp)
> +                       *resp = (u8)task_result;
>         } else {
> -               task_result = FAILED;
> -               dev_err(hba->dev,
> -                       "trc: Invalid ocs = %x\n", ocs_value);
> +               dev_err(hba->dev, "%s: failed, ocs = 0x%x\n",
> +                               __func__, ocs_value);
>         }
>         spin_unlock_irqrestore(hba->host->host_lock, flags);
> -       return task_result;
> +
> +       return ocs_value;
>  }
>
>  /**
> @@ -2258,7 +2274,7 @@ static void ufshcd_tmc_handler(struct ufs_hba *hba)
>
>         tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
>         hba->tm_condition = tm_doorbell ^ hba->outstanding_tasks;
> -       wake_up_interruptible(&hba->ufshcd_tm_wait_queue);
> +       wake_up(&hba->tm_wq);
>  }
>
>  /**
> @@ -2308,17 +2324,42 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba)
>         return retval;
>  }
>
> +static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag)
> +{
> +       int err = 0;
> +       u32 reg;
> +       u32 mask = 1 << tag;
> +       unsigned long flags;
> +
> +       if (!test_bit(tag, &hba->outstanding_reqs))
> +               goto out;
> +
> +       spin_lock_irqsave(hba->host->host_lock, flags);
> +       ufshcd_writel(hba, ~(1 << tag), REG_UTP_TASK_REQ_LIST_CLEAR);
> +       spin_unlock_irqrestore(hba->host->host_lock, flags);
> +
> +       /* poll for max. 1 sec to clear door bell register by h/w */
> +       reg = ufshcd_wait_for_register(hba,
> +                       REG_UTP_TASK_REQ_DOOR_BELL,
> +                       mask, 0, 1000, 1000);
> +       if ((reg & mask) == mask)
> +               err = -ETIMEDOUT;
> +out:
> +       return err;
> +}
> +
>  /**
>   * ufshcd_issue_tm_cmd - issues task management commands to controller
>   * @hba: per adapter instance
> - * @lrbp: pointer to local reference block
> + * @lun_id: LUN ID to which TM command is sent
> + * @task_id: task ID to which the TM command is applicable
> + * @tm_function: task management function opcode
> + * @tm_response: task management service response return value
>   *
> - * Returns SUCCESS/FAILED
> + * Returns non-zero value on error, zero on success.
>   */
> -static int
> -ufshcd_issue_tm_cmd(struct ufs_hba *hba,
> -                   struct ufshcd_lrb *lrbp,
> -                   u8 tm_function)
> +static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
> +               u8 tm_function, u8 *tm_response)
>  {
>         struct utp_task_req_desc *task_req_descp;
>         struct utp_upiu_task_req *task_req_upiup;
> @@ -2329,17 +2370,10 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
>
>         host = hba->host;
>
> -       spin_lock_irqsave(host->host_lock, flags);
> -
> -       /* If task management queue is full */
> -       free_slot = ufshcd_get_tm_free_slot(hba);
> -       if (free_slot >= hba->nutmrs) {
> -               spin_unlock_irqrestore(host->host_lock, flags);
> -               dev_err(hba->dev, "Task management queue full\n");
> -               err = FAILED;
> -               goto out;
> -       }
> +       /* Get free slot, sleep if slots are unavailable */
> +       wait_event(hba->tm_tag_wq, ufshcd_get_tm_free_slot(hba, &free_slot));
>
> +       spin_lock_irqsave(host->host_lock, flags);
>         task_req_descp = hba->utmrdl_base_addr;
>         task_req_descp += free_slot;
>
> @@ -2353,38 +2387,39 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
>                 (struct utp_upiu_task_req *) task_req_descp->task_req_upiu;
>         task_req_upiup->header.dword_0 =
>                 UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
> -                                             lrbp->lun, lrbp->task_tag);
> +                               lun_id, free_slot);

Actually it still doesn't fix the problem. The *task tag* used here
should be unique across the SCSI/Query and Task Managment UPIUs.

>         task_req_upiup->header.dword_1 =
>                 UPIU_HEADER_DWORD(0, tm_function, 0, 0);
>
> -       task_req_upiup->input_param1 = lrbp->lun;
> -       task_req_upiup->input_param1 =
> -               cpu_to_be32(task_req_upiup->input_param1);
> -       task_req_upiup->input_param2 = lrbp->task_tag;
> -       task_req_upiup->input_param2 =
> -               cpu_to_be32(task_req_upiup->input_param2);
> +       task_req_upiup->input_param1 = cpu_to_be32(lun_id);
> +       task_req_upiup->input_param2 = cpu_to_be32(task_id);
>
> -       /* send command to the controller */
> -       __set_bit(free_slot, &hba->outstanding_tasks);
> +       /*
> +        * outstanding_tasks variable is already updated with corresponding
> +        * bit while acquiring free slot. Ring doorbell here.
> +        */
>         ufshcd_writel(hba, 1 << free_slot, REG_UTP_TASK_REQ_DOOR_BELL);
>
>         spin_unlock_irqrestore(host->host_lock, flags);
>
>         /* wait until the task management command is completed */
> -       err =
> -       wait_event_interruptible_timeout(hba->ufshcd_tm_wait_queue,
> -                                        (test_bit(free_slot,
> -                                        &hba->tm_condition) != 0),
> -                                        60 * HZ);
> +       err = wait_event_timeout(hba->tm_wq,
> +                       test_bit(free_slot, &hba->tm_condition),
> +                       msecs_to_jiffies(TM_CMD_TIMEOUT));
>         if (!err) {
> -               dev_err(hba->dev,
> -                       "Task management command timed-out\n");
> -               err = FAILED;
> -               goto out;
> +               dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n",
> +                               __func__, tm_function);
> +               if (ufshcd_clear_tm_cmd(hba, free_slot))
> +                       dev_WARN(hba->dev, "%s: unable clear tm cmd (slot %d) after timeout\n",
> +                                       __func__, free_slot);
> +               err = -ETIMEDOUT;
> +       } else {
> +               err = ufshcd_task_req_compl(hba, free_slot, tm_response);
>         }
>         clear_bit(free_slot, &hba->tm_condition);
> -       err = ufshcd_task_req_compl(hba, free_slot);
> -out:
> +       ufshcd_put_tm_slot(hba, free_slot);
> +       wake_up(&hba->tm_tag_wq);
> +
>         return err;
>  }
>
> @@ -2401,14 +2436,22 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
>         unsigned int tag;
>         u32 pos;
>         int err;
> +       u8 resp;
> +       struct ufshcd_lrb *lrbp;
>
>         host = cmd->device->host;
>         hba = shost_priv(host);
>         tag = cmd->request->tag;
>
> -       err = ufshcd_issue_tm_cmd(hba, &hba->lrb[tag], UFS_LOGICAL_RESET);
> -       if (err == FAILED)
> +       lrbp = &hba->lrb[tag];
> +       err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
> +                       UFS_LOGICAL_RESET, &resp);
> +       if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> +               err = FAILED;
>                 goto out;
> +       } else {
> +               err = SUCCESS;
> +       }
>
>         for (pos = 0; pos < SCSI_CMD_QUEUE_SIZE; pos++) {
>                 if (test_bit(pos, &hba->outstanding_reqs) &&
> @@ -2468,6 +2511,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>         unsigned long flags;
>         unsigned int tag;
>         int err;
> +       u8 resp;
> +       struct ufshcd_lrb *lrbp;
>
>         host = cmd->device->host;
>         hba = shost_priv(host);
> @@ -2483,9 +2528,15 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
>         }
>         spin_unlock_irqrestore(host->host_lock, flags);
>
> -       err = ufshcd_issue_tm_cmd(hba, &hba->lrb[tag], UFS_ABORT_TASK);
> -       if (err == FAILED)
> +       lrbp = &hba->lrb[tag];
> +       err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
> +                       UFS_ABORT_TASK, &resp);
> +       if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
> +               err = FAILED;
>                 goto out;
> +       } else {
> +               err = SUCCESS;
> +       }
>
>         scsi_dma_unmap(cmd);
>
> @@ -2714,7 +2765,8 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
>         host->max_cmd_len = MAX_CDB_SIZE;
>
>         /* Initailize wait queue for task management */
> -       init_waitqueue_head(&hba->ufshcd_tm_wait_queue);
> +       init_waitqueue_head(&hba->tm_wq);
> +       init_waitqueue_head(&hba->tm_tag_wq);
>
>         /* Initialize work queues */
>         INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
> diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> index 682c285..8ca08fe 100644
> --- a/drivers/scsi/ufs/ufshcd.h
> +++ b/drivers/scsi/ufs/ufshcd.h
> @@ -172,7 +172,8 @@ struct ufs_internal_cmd {
>   * @irq: Irq number of the controller
>   * @active_uic_cmd: handle of active UIC command
>   * @uic_cmd_mutex: mutex for uic command
> - * @ufshcd_tm_wait_queue: wait queue for task management
> + * @tm_wq: wait queue for task management
> + * @tm_tag_wq: wait queue for free task management slots
>   * @tm_condition: condition variable for task management
>   * @ufshcd_state: UFSHCD states
>   * @intr_mask: Interrupt Mask Bits
> @@ -213,7 +214,8 @@ struct ufs_hba {
>         struct uic_command *active_uic_cmd;
>         struct mutex uic_cmd_mutex;
>
> -       wait_queue_head_t ufshcd_tm_wait_queue;
> +       wait_queue_head_t tm_wq;
> +       wait_queue_head_t tm_tag_wq;
>         unsigned long tm_condition;
>
>         u32 ufshcd_state;
> --
> QUALCOMM INDIA, on behalf of Qualcomm Innovation Center, Inc. is a member
> of Code Aurora Forum, hosted by The Linux Foundation.
>



-- 
~Santosh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] scsi: ufs: Fix broken task management command implementation
  2013-06-27 11:19   ` Santosh Y
@ 2013-06-28 11:32     ` Sujit Reddy Thumma
  2013-07-02 15:51       ` Santosh Y
  0 siblings, 1 reply; 12+ messages in thread
From: Sujit Reddy Thumma @ 2013-06-28 11:32 UTC (permalink / raw)
  To: Santosh Y; +Cc: Vinayak Holikatti, linux-scsi, JBottomley, linux-arm-msm

On 6/27/2013 4:49 PM, Santosh Y wrote:
>> >+       spin_lock_irqsave(host->host_lock, flags);
>> >         task_req_descp = hba->utmrdl_base_addr;
>> >         task_req_descp += free_slot;
>> >
>> >@@ -2353,38 +2387,39 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
>> >                 (struct utp_upiu_task_req *) task_req_descp->task_req_upiu;
>> >         task_req_upiup->header.dword_0 =
>> >                 UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
>> >-                                             lrbp->lun, lrbp->task_tag);
>> >+                               lun_id, free_slot);
> Actually it still doesn't fix the problem. The*task tag*  used here
> should be unique across the SCSI/Query and Task Managment UPIUs.

I am sorry, I didn't get that. Why should it be unique across the 
SCSI/Query? For example, if a machine supports 32 request slots and 8 
task management slots, then the task management command tag can be 
anything out of 8 slots.

-- 
Regards,
Sujit

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] scsi: ufs: Fix broken task management command implementation
  2013-06-28 11:32     ` Sujit Reddy Thumma
@ 2013-07-02 15:51       ` Santosh Y
  2013-07-03 15:52         ` Sujit Reddy Thumma
  0 siblings, 1 reply; 12+ messages in thread
From: Santosh Y @ 2013-07-02 15:51 UTC (permalink / raw)
  To: Sujit Reddy Thumma
  Cc: Vinayak Holikatti, linux-scsi, JBottomley, linux-arm-msm

On Fri, Jun 28, 2013 at 5:02 PM, Sujit Reddy Thumma
<sthumma@codeaurora.org> wrote:
> On 6/27/2013 4:49 PM, Santosh Y wrote:
>>>
>>> >+       spin_lock_irqsave(host->host_lock, flags);
>>> >         task_req_descp = hba->utmrdl_base_addr;
>>> >         task_req_descp += free_slot;
>>> >
>>> >@@ -2353,38 +2387,39 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
>>> >                 (struct utp_upiu_task_req *)
>>> > task_req_descp->task_req_upiu;
>>> >         task_req_upiup->header.dword_0 =
>>> >                 UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
>>> >-                                             lrbp->lun,
>>> > lrbp->task_tag);
>>> >+                               lun_id, free_slot);
>>
>> Actually it still doesn't fix the problem. The*task tag*  used here
>>
>> should be unique across the SCSI/Query and Task Managment UPIUs.
>
>
> I am sorry, I didn't get that. Why should it be unique across the
> SCSI/Query? For example, if a machine supports 32 request slots and 8 task
> management slots, then the task management command tag can be anything out
> of 8 slots.
>

The spec(ufs 1.1) has the requirement under  '10.5.2 Basic Header
Format'->'Task Tag'.
Couple of devices I came across had similar behavior. The tracking of
UPIUs --even belonging to a separate group-- seemed to be based on the
'task tag' value rather than 'type of UPIU'->'task tag'.

-- 
~Santosh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] scsi: ufs: Fix broken task management command implementation
  2013-07-02 15:51       ` Santosh Y
@ 2013-07-03 15:52         ` Sujit Reddy Thumma
  2013-07-03 16:23           ` Santosh Y
  0 siblings, 1 reply; 12+ messages in thread
From: Sujit Reddy Thumma @ 2013-07-03 15:52 UTC (permalink / raw)
  To: Santosh Y; +Cc: Vinayak Holikatti, linux-scsi, JBottomley, linux-arm-msm

On 7/2/2013 9:21 PM, Santosh Y wrote:
> On Fri, Jun 28, 2013 at 5:02 PM, Sujit Reddy Thumma
> <sthumma@codeaurora.org> wrote:
>> On 6/27/2013 4:49 PM, Santosh Y wrote:
>>>>
>>>>> +       spin_lock_irqsave(host->host_lock, flags);
>>>>>          task_req_descp = hba->utmrdl_base_addr;
>>>>>          task_req_descp += free_slot;
>>>>>
>>>>> @@ -2353,38 +2387,39 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
>>>>>                  (struct utp_upiu_task_req *)
>>>>> task_req_descp->task_req_upiu;
>>>>>          task_req_upiup->header.dword_0 =
>>>>>                  UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
>>>>> -                                             lrbp->lun,
>>>>> lrbp->task_tag);
>>>>> +                               lun_id, free_slot);
>>>
>>> Actually it still doesn't fix the problem. The*task tag*  used here
>>>
>>> should be unique across the SCSI/Query and Task Managment UPIUs.
>>
>>
>> I am sorry, I didn't get that. Why should it be unique across the
>> SCSI/Query? For example, if a machine supports 32 request slots and 8 task
>> management slots, then the task management command tag can be anything out
>> of 8 slots.
>>
>
> The spec(ufs 1.1) has the requirement under  '10.5.2 Basic Header
> Format'->'Task Tag'.
> Couple of devices I came across had similar behavior. The tracking of
> UPIUs --even belonging to a separate group-- seemed to be based on the
> 'task tag' value rather than 'type of UPIU'->'task tag'.

Thanks for the clarification. So to make the task tag unique we should 
do something like below:

@@ -2940,9 +2941,10 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba 
*hba, int lun_id, int task_id,
         /* Configure task request UPIU */
         task_req_upiup =
                 (struct utp_upiu_task_req *) task_req_descp->task_req_upiu;
+       task_tag = hba->nutrs + free_slot;
         task_req_upiup->header.dword_0 =
                 UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
-                               lun_id, free_slot);
+                               lun_id, task_tag);

Will this work for the devices you came across?


-- 
Regards,
Sujit

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] scsi: ufs: Fix broken task management command implementation
  2013-07-03 15:52         ` Sujit Reddy Thumma
@ 2013-07-03 16:23           ` Santosh Y
  2013-07-03 16:42             ` Sujit Reddy Thumma
  0 siblings, 1 reply; 12+ messages in thread
From: Santosh Y @ 2013-07-03 16:23 UTC (permalink / raw)
  To: Sujit Reddy Thumma
  Cc: Vinayak Holikatti, linux-scsi, JBottomley, linux-arm-msm

On Wed, Jul 3, 2013 at 9:22 PM, Sujit Reddy Thumma
<sthumma@codeaurora.org> wrote:
> On 7/2/2013 9:21 PM, Santosh Y wrote:
>>
>> On Fri, Jun 28, 2013 at 5:02 PM, Sujit Reddy Thumma
>> <sthumma@codeaurora.org> wrote:
>>>
>>> On 6/27/2013 4:49 PM, Santosh Y wrote:
>>>>>
>>>>>
>>>>>> +       spin_lock_irqsave(host->host_lock, flags);
>>>>>>          task_req_descp = hba->utmrdl_base_addr;
>>>>>>          task_req_descp += free_slot;
>>>>>>
>>>>>> @@ -2353,38 +2387,39 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
>>>>>>                  (struct utp_upiu_task_req *)
>>>>>> task_req_descp->task_req_upiu;
>>>>>>          task_req_upiup->header.dword_0 =
>>>>>>                  UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
>>>>>> -                                             lrbp->lun,
>>>>>> lrbp->task_tag);
>>>>>> +                               lun_id, free_slot);
>>>>
>>>>
>>>> Actually it still doesn't fix the problem. The*task tag*  used here
>>>>
>>>> should be unique across the SCSI/Query and Task Managment UPIUs.
>>>
>>>
>>>
>>> I am sorry, I didn't get that. Why should it be unique across the
>>> SCSI/Query? For example, if a machine supports 32 request slots and 8
>>> task
>>> management slots, then the task management command tag can be anything
>>> out
>>> of 8 slots.
>>>
>>
>> The spec(ufs 1.1) has the requirement under  '10.5.2 Basic Header
>> Format'->'Task Tag'.
>> Couple of devices I came across had similar behavior. The tracking of
>> UPIUs --even belonging to a separate group-- seemed to be based on the
>> 'task tag' value rather than 'type of UPIU'->'task tag'.
>
>
> Thanks for the clarification. So to make the task tag unique we should do
> something like below:
>
> @@ -2940,9 +2941,10 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba,
> int lun_id, int task_id,
>         /* Configure task request UPIU */
>         task_req_upiup =
>
>                 (struct utp_upiu_task_req *) task_req_descp->task_req_upiu;
> +       task_tag = hba->nutrs + free_slot;

Yes, this was exactly my internal fix at the time :-)

>
>         task_req_upiup->header.dword_0 =
>                 UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
> -                               lun_id, free_slot);
> +                               lun_id, task_tag);
>
> Will this work for the devices you came across?
>
>
> --
> Regards,
> Sujit



-- 
~Santosh

^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 1/4] scsi: ufs: Fix broken task management command implementation
  2013-07-03 16:23           ` Santosh Y
@ 2013-07-03 16:42             ` Sujit Reddy Thumma
  0 siblings, 0 replies; 12+ messages in thread
From: Sujit Reddy Thumma @ 2013-07-03 16:42 UTC (permalink / raw)
  To: Santosh Y; +Cc: Vinayak Holikatti, linux-scsi, JBottomley, linux-arm-msm

On 7/3/2013 9:53 PM, Santosh Y wrote:
> On Wed, Jul 3, 2013 at 9:22 PM, Sujit Reddy Thumma
> <sthumma@codeaurora.org> wrote:
>> On 7/2/2013 9:21 PM, Santosh Y wrote:
>>>
>>> On Fri, Jun 28, 2013 at 5:02 PM, Sujit Reddy Thumma
>>> <sthumma@codeaurora.org> wrote:
>>>>
>>>> On 6/27/2013 4:49 PM, Santosh Y wrote:
>>>>>>
>>>>>>
>>>>>>> +       spin_lock_irqsave(host->host_lock, flags);
>>>>>>>           task_req_descp = hba->utmrdl_base_addr;
>>>>>>>           task_req_descp += free_slot;
>>>>>>>
>>>>>>> @@ -2353,38 +2387,39 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
>>>>>>>                   (struct utp_upiu_task_req *)
>>>>>>> task_req_descp->task_req_upiu;
>>>>>>>           task_req_upiup->header.dword_0 =
>>>>>>>                   UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
>>>>>>> -                                             lrbp->lun,
>>>>>>> lrbp->task_tag);
>>>>>>> +                               lun_id, free_slot);
>>>>>
>>>>>
>>>>> Actually it still doesn't fix the problem. The*task tag*  used here
>>>>>
>>>>> should be unique across the SCSI/Query and Task Managment UPIUs.
>>>>
>>>>
>>>>
>>>> I am sorry, I didn't get that. Why should it be unique across the
>>>> SCSI/Query? For example, if a machine supports 32 request slots and 8
>>>> task
>>>> management slots, then the task management command tag can be anything
>>>> out
>>>> of 8 slots.
>>>>
>>>
>>> The spec(ufs 1.1) has the requirement under  '10.5.2 Basic Header
>>> Format'->'Task Tag'.
>>> Couple of devices I came across had similar behavior. The tracking of
>>> UPIUs --even belonging to a separate group-- seemed to be based on the
>>> 'task tag' value rather than 'type of UPIU'->'task tag'.
>>
>>
>> Thanks for the clarification. So to make the task tag unique we should do
>> something like below:
>>
>> @@ -2940,9 +2941,10 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba,
>> int lun_id, int task_id,
>>          /* Configure task request UPIU */
>>          task_req_upiup =
>>
>>                  (struct utp_upiu_task_req *) task_req_descp->task_req_upiu;
>> +       task_tag = hba->nutrs + free_slot;
>
> Yes, this was exactly my internal fix at the time :-)

Okay. I will update the patch with this. Thanks.

>
>>
>>          task_req_upiup->header.dword_0 =
>>                  UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
>> -                               lun_id, free_slot);
>> +                               lun_id, task_tag);
>>
>> Will this work for the devices you came across?
>>


-- 
Regards,
Sujit

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 0/4] scsi: ufs: TM, fatal-error handling and other fixes
@ 2013-08-14 17:10 Santosh Y
  2013-08-14 17:10 ` [PATCH 1/4] scsi: ufs: Fix broken task management command implementation Santosh Y
                   ` (3 more replies)
  0 siblings, 4 replies; 12+ messages in thread
From: Santosh Y @ 2013-08-14 17:10 UTC (permalink / raw)
  To: james.bottomley; +Cc: linux-scsi, vinholikatti, Santosh Y

Hi James,

Please merge the following patches to 'misc' branch.
The patches depend on http://thread.gmane.org/gmane.linux.scsi/83565
patch series.

Thanks,
Santosh


Sujit Reddy Thumma (4):
  scsi: ufs: Fix broken task management command implementation
  scsi: ufs: Fix hardware race conditions while aborting a command
  scsi: ufs: Fix device and host reset methods
  scsi: ufs: Improve UFS fatal error handling

 drivers/scsi/ufs/ufshcd.c | 684 ++++++++++++++++++++++++++++++++--------------
 drivers/scsi/ufs/ufshcd.h |  20 +-
 2 files changed, 501 insertions(+), 203 deletions(-)

-- 
1.8.3.4


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/4] scsi: ufs: Fix broken task management command implementation
  2013-08-14 17:10 [PATCH 0/4] scsi: ufs: TM, fatal-error handling and other fixes Santosh Y
@ 2013-08-14 17:10 ` Santosh Y
  2013-08-14 17:10 ` [PATCH 2/4] scsi: ufs: Fix hardware race conditions while aborting a command Santosh Y
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 12+ messages in thread
From: Santosh Y @ 2013-08-14 17:10 UTC (permalink / raw)
  To: james.bottomley; +Cc: linux-scsi, vinholikatti, Sujit Reddy Thumma, Santosh Y

From: Sujit Reddy Thumma <sthumma@codeaurora.org>

Currently, sending Task Management (TM) command to the card might
be broken in some scenarios as listed below:

Problem: If there are more than 8 TM commands the implementation
         returns error to the caller.
Fix:     Wait for one of the slots to be emptied and send the command.

Problem: Sometimes it is necessary for the caller to know the TM service
         response code to determine the task status.
Fix:     Propogate the service response to the caller.

Problem: If the TM command times out no proper error recovery is
         implemented.
Fix:     Clear the command in the controller door-bell register, so that
         further commands for the same slot don't fail.

Problem: While preparing the TM command descriptor, the task tag used
         should be unique across SCSI/NOP/QUERY/TM commands and not the
	 task tag of the command which the TM command is trying to manage.
Fix:     Use a unique task tag instead of task tag of SCSI command.

Problem: Since the TM command involves H/W communication, abruptly ending
         the request on kill interrupt signal might cause h/w malfunction.
Fix:     Wait for hardware completion interrupt with TASK_UNINTERRUPTIBLE
         set.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>
Tested-by: Dolev Raviv <draviv@codeaurora.org>
Signed-off-by: Santosh Y <santoshsy@gmail.com>

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index b36ca9a..e0810a3 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -53,6 +53,9 @@
 /* Query request timeout */
 #define QUERY_REQ_TIMEOUT 30 /* msec */
 
+/* Task management command timeout */
+#define TM_CMD_TIMEOUT	100 /* msecs */
+
 /* Expose the flag value from utp_upiu_query.value */
 #define MASK_QUERY_UPIU_FLAG_LOC 0xFF
 
@@ -183,13 +186,35 @@ ufshcd_get_tmr_ocs(struct utp_task_req_desc *task_req_descp)
 /**
  * ufshcd_get_tm_free_slot - get a free slot for task management request
  * @hba: per adapter instance
+ * @free_slot: pointer to variable with available slot value
  *
- * Returns maximum number of task management request slots in case of
- * task management queue full or returns the free slot number
+ * Get a free tag and lock it until ufshcd_put_tm_slot() is called.
+ * Returns 0 if free slot is not available, else return 1 with tag value
+ * in @free_slot.
  */
-static inline int ufshcd_get_tm_free_slot(struct ufs_hba *hba)
+static bool ufshcd_get_tm_free_slot(struct ufs_hba *hba, int *free_slot)
 {
-	return find_first_zero_bit(&hba->outstanding_tasks, hba->nutmrs);
+	int tag;
+	bool ret = false;
+
+	if (!free_slot)
+		goto out;
+
+	do {
+		tag = find_first_zero_bit(&hba->tm_slots_in_use, hba->nutmrs);
+		if (tag >= hba->nutmrs)
+			goto out;
+	} while (test_and_set_bit_lock(tag, &hba->tm_slots_in_use));
+
+	*free_slot = tag;
+	ret = true;
+out:
+	return ret;
+}
+
+static inline void ufshcd_put_tm_slot(struct ufs_hba *hba, int slot)
+{
+	clear_bit_unlock(slot, &hba->tm_slots_in_use);
 }
 
 /**
@@ -1700,10 +1725,11 @@ static void ufshcd_slave_destroy(struct scsi_device *sdev)
  * ufshcd_task_req_compl - handle task management request completion
  * @hba: per adapter instance
  * @index: index of the completed request
+ * @resp: task management service response
  *
- * Returns SUCCESS/FAILED
+ * Returns non-zero value on error, zero on success
  */
-static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
+static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index, u8 *resp)
 {
 	struct utp_task_req_desc *task_req_descp;
 	struct utp_upiu_task_rsp *task_rsp_upiup;
@@ -1724,19 +1750,15 @@ static int ufshcd_task_req_compl(struct ufs_hba *hba, u32 index)
 				task_req_descp[index].task_rsp_upiu;
 		task_result = be32_to_cpu(task_rsp_upiup->header.dword_1);
 		task_result = ((task_result & MASK_TASK_RESPONSE) >> 8);
-
-		if (task_result != UPIU_TASK_MANAGEMENT_FUNC_COMPL &&
-		    task_result != UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED)
-			task_result = FAILED;
-		else
-			task_result = SUCCESS;
+		if (resp)
+			*resp = (u8)task_result;
 	} else {
-		task_result = FAILED;
-		dev_err(hba->dev,
-			"trc: Invalid ocs = %x\n", ocs_value);
+		dev_err(hba->dev, "%s: failed, ocs = 0x%x\n",
+				__func__, ocs_value);
 	}
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
-	return task_result;
+
+	return ocs_value;
 }
 
 /**
@@ -2237,7 +2259,7 @@ static void ufshcd_tmc_handler(struct ufs_hba *hba)
 
 	tm_doorbell = ufshcd_readl(hba, REG_UTP_TASK_REQ_DOOR_BELL);
 	hba->tm_condition = tm_doorbell ^ hba->outstanding_tasks;
-	wake_up_interruptible(&hba->ufshcd_tm_wait_queue);
+	wake_up(&hba->tm_wq);
 }
 
 /**
@@ -2287,38 +2309,58 @@ static irqreturn_t ufshcd_intr(int irq, void *__hba)
 	return retval;
 }
 
+static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag)
+{
+	int err = 0;
+	u32 mask = 1 << tag;
+	unsigned long flags;
+
+	if (!test_bit(tag, &hba->outstanding_tasks))
+		goto out;
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_writel(hba, ~(1 << tag), REG_UTP_TASK_REQ_LIST_CLEAR);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	/* poll for max. 1 sec to clear door bell register by h/w */
+	err = ufshcd_wait_for_register(hba,
+			REG_UTP_TASK_REQ_DOOR_BELL,
+			mask, 0, 1000, 1000);
+out:
+	return err;
+}
+
 /**
  * ufshcd_issue_tm_cmd - issues task management commands to controller
  * @hba: per adapter instance
- * @lrbp: pointer to local reference block
+ * @lun_id: LUN ID to which TM command is sent
+ * @task_id: task ID to which the TM command is applicable
+ * @tm_function: task management function opcode
+ * @tm_response: task management service response return value
  *
- * Returns SUCCESS/FAILED
+ * Returns non-zero value on error, zero on success.
  */
-static int
-ufshcd_issue_tm_cmd(struct ufs_hba *hba,
-		    struct ufshcd_lrb *lrbp,
-		    u8 tm_function)
+static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
+		u8 tm_function, u8 *tm_response)
 {
 	struct utp_task_req_desc *task_req_descp;
 	struct utp_upiu_task_req *task_req_upiup;
 	struct Scsi_Host *host;
 	unsigned long flags;
-	int free_slot = 0;
+	int free_slot;
 	int err;
+	int task_tag;
 
 	host = hba->host;
 
-	spin_lock_irqsave(host->host_lock, flags);
-
-	/* If task management queue is full */
-	free_slot = ufshcd_get_tm_free_slot(hba);
-	if (free_slot >= hba->nutmrs) {
-		spin_unlock_irqrestore(host->host_lock, flags);
-		dev_err(hba->dev, "Task management queue full\n");
-		err = FAILED;
-		goto out;
-	}
+	/*
+	 * Get free slot, sleep if slots are unavailable.
+	 * Even though we use wait_event() which sleeps indefinitely,
+	 * the maximum wait time is bounded by %TM_CMD_TIMEOUT.
+	 */
+	wait_event(hba->tm_tag_wq, ufshcd_get_tm_free_slot(hba, &free_slot));
 
+	spin_lock_irqsave(host->host_lock, flags);
 	task_req_descp = hba->utmrdl_base_addr;
 	task_req_descp += free_slot;
 
@@ -2330,18 +2372,15 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	/* Configure task request UPIU */
 	task_req_upiup =
 		(struct utp_upiu_task_req *) task_req_descp->task_req_upiu;
+	task_tag = hba->nutrs + free_slot;
 	task_req_upiup->header.dword_0 =
 		UPIU_HEADER_DWORD(UPIU_TRANSACTION_TASK_REQ, 0,
-					      lrbp->lun, lrbp->task_tag);
+				lun_id, task_tag);
 	task_req_upiup->header.dword_1 =
 		UPIU_HEADER_DWORD(0, tm_function, 0, 0);
 
-	task_req_upiup->input_param1 = lrbp->lun;
-	task_req_upiup->input_param1 =
-		cpu_to_be32(task_req_upiup->input_param1);
-	task_req_upiup->input_param2 = lrbp->task_tag;
-	task_req_upiup->input_param2 =
-		cpu_to_be32(task_req_upiup->input_param2);
+	task_req_upiup->input_param1 = cpu_to_be32(lun_id);
+	task_req_upiup->input_param2 = cpu_to_be32(task_id);
 
 	/* send command to the controller */
 	__set_bit(free_slot, &hba->outstanding_tasks);
@@ -2350,20 +2389,24 @@ ufshcd_issue_tm_cmd(struct ufs_hba *hba,
 	spin_unlock_irqrestore(host->host_lock, flags);
 
 	/* wait until the task management command is completed */
-	err =
-	wait_event_interruptible_timeout(hba->ufshcd_tm_wait_queue,
-					 (test_bit(free_slot,
-					 &hba->tm_condition) != 0),
-					 60 * HZ);
+	err = wait_event_timeout(hba->tm_wq,
+			test_bit(free_slot, &hba->tm_condition),
+			msecs_to_jiffies(TM_CMD_TIMEOUT));
 	if (!err) {
-		dev_err(hba->dev,
-			"Task management command timed-out\n");
-		err = FAILED;
-		goto out;
+		dev_err(hba->dev, "%s: task management cmd 0x%.2x timed-out\n",
+				__func__, tm_function);
+		if (ufshcd_clear_tm_cmd(hba, free_slot))
+			dev_WARN(hba->dev, "%s: unable clear tm cmd (slot %d) after timeout\n",
+					__func__, free_slot);
+		err = -ETIMEDOUT;
+	} else {
+		err = ufshcd_task_req_compl(hba, free_slot, tm_response);
 	}
+
 	clear_bit(free_slot, &hba->tm_condition);
-	err = ufshcd_task_req_compl(hba, free_slot);
-out:
+	ufshcd_put_tm_slot(hba, free_slot);
+	wake_up(&hba->tm_tag_wq);
+
 	return err;
 }
 
@@ -2380,14 +2423,21 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
 	unsigned int tag;
 	u32 pos;
 	int err;
+	u8 resp = 0xF;
+	struct ufshcd_lrb *lrbp;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
 	tag = cmd->request->tag;
 
-	err = ufshcd_issue_tm_cmd(hba, &hba->lrb[tag], UFS_LOGICAL_RESET);
-	if (err == FAILED)
+	lrbp = &hba->lrb[tag];
+	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp);
+	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+		err = FAILED;
 		goto out;
+	} else {
+		err = SUCCESS;
+	}
 
 	for (pos = 0; pos < hba->nutrs; pos++) {
 		if (test_bit(pos, &hba->outstanding_reqs) &&
@@ -2444,6 +2494,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	unsigned long flags;
 	unsigned int tag;
 	int err;
+	u8 resp = 0xF;
+	struct ufshcd_lrb *lrbp;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
@@ -2459,9 +2511,15 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	}
 	spin_unlock_irqrestore(host->host_lock, flags);
 
-	err = ufshcd_issue_tm_cmd(hba, &hba->lrb[tag], UFS_ABORT_TASK);
-	if (err == FAILED)
+	lrbp = &hba->lrb[tag];
+	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
+			UFS_ABORT_TASK, &resp);
+	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+		err = FAILED;
 		goto out;
+	} else {
+		err = SUCCESS;
+	}
 
 	scsi_dma_unmap(cmd);
 
@@ -2682,7 +2740,8 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
 	host->max_cmd_len = MAX_CDB_SIZE;
 
 	/* Initailize wait queue for task management */
-	init_waitqueue_head(&hba->ufshcd_tm_wait_queue);
+	init_waitqueue_head(&hba->tm_wq);
+	init_waitqueue_head(&hba->tm_tag_wq);
 
 	/* Initialize work queues */
 	INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 59c9c48..fe7c947 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -174,8 +174,10 @@ struct ufs_dev_cmd {
  * @irq: Irq number of the controller
  * @active_uic_cmd: handle of active UIC command
  * @uic_cmd_mutex: mutex for uic command
- * @ufshcd_tm_wait_queue: wait queue for task management
+ * @tm_wq: wait queue for task management
+ * @tm_tag_wq: wait queue for free task management slots
  * @tm_condition: condition variable for task management
+ * @tm_slots_in_use: bit map of task management request slots in use
  * @ufshcd_state: UFSHCD states
  * @intr_mask: Interrupt Mask Bits
  * @ee_ctrl_mask: Exception event control mask
@@ -216,8 +218,10 @@ struct ufs_hba {
 	struct uic_command *active_uic_cmd;
 	struct mutex uic_cmd_mutex;
 
-	wait_queue_head_t ufshcd_tm_wait_queue;
+	wait_queue_head_t tm_wq;
+	wait_queue_head_t tm_tag_wq;
 	unsigned long tm_condition;
+	unsigned long tm_slots_in_use;
 
 	u32 ufshcd_state;
 	u32 intr_mask;
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 2/4] scsi: ufs: Fix hardware race conditions while aborting a command
  2013-08-14 17:10 [PATCH 0/4] scsi: ufs: TM, fatal-error handling and other fixes Santosh Y
  2013-08-14 17:10 ` [PATCH 1/4] scsi: ufs: Fix broken task management command implementation Santosh Y
@ 2013-08-14 17:10 ` Santosh Y
  2013-08-14 17:10 ` [PATCH 3/4] scsi: ufs: Fix device and host reset methods Santosh Y
  2013-08-14 17:10 ` [PATCH 4/4] scsi: ufs: Improve UFS fatal error handling Santosh Y
  3 siblings, 0 replies; 12+ messages in thread
From: Santosh Y @ 2013-08-14 17:10 UTC (permalink / raw)
  To: james.bottomley; +Cc: linux-scsi, vinholikatti, Sujit Reddy Thumma, Santosh Y

From: Sujit Reddy Thumma <sthumma@codeaurora.org>

There is a possible race condition in the hardware when the abort
command is issued to terminate the ongoing SCSI command as described
below:

- A bit in the door-bell register is set in the controller for a
  new SCSI command.
- In some rare situations, before controller get a chance to issue
  the command to the device, the software issued an abort command.
- If the device recieves abort command first then it returns success
  because the command itself is not present.
- Now if the controller commits the command to device it will be
  processed.
- Software thinks that command is aborted and proceed while still
  the device is processing it.
- The software, controller and device may go out of sync because of
  this race condition.

To avoid this, query task presence in the device before sending abort
task command so that after the abort operation, the command is guaranteed
to be non-existent in both controller and the device.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>
Tested-by: Dolev Raviv <draviv@codeaurora.org>
Signed-off-by: Santosh Y <santoshsy@gmail.com>

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index e0810a3..24c5ab3 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2485,6 +2485,12 @@ static int ufshcd_host_reset(struct scsi_cmnd *cmd)
  * ufshcd_abort - abort a specific command
  * @cmd: SCSI command pointer
  *
+ * Abort the pending command in device by sending UFS_ABORT_TASK task management
+ * command, and in host controller by clearing the door-bell register. There can
+ * be race between controller sending the command to the device while abort is
+ * issued. To avoid that, first issue UFS_QUERY_TASK to check if the command is
+ * really issued and then try to abort it.
+ *
  * Returns SUCCESS/FAILED
  */
 static int ufshcd_abort(struct scsi_cmnd *cmd)
@@ -2493,7 +2499,8 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	struct ufs_hba *hba;
 	unsigned long flags;
 	unsigned int tag;
-	int err;
+	int err = 0;
+	int poll_cnt;
 	u8 resp = 0xF;
 	struct ufshcd_lrb *lrbp;
 
@@ -2501,33 +2508,59 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	hba = shost_priv(host);
 	tag = cmd->request->tag;
 
-	spin_lock_irqsave(host->host_lock, flags);
+	/* If command is already aborted/completed, return SUCCESS */
+	if (!(test_bit(tag, &hba->outstanding_reqs)))
+		goto out;
 
-	/* check if command is still pending */
-	if (!(test_bit(tag, &hba->outstanding_reqs))) {
-		err = FAILED;
-		spin_unlock_irqrestore(host->host_lock, flags);
+	lrbp = &hba->lrb[tag];
+	for (poll_cnt = 100; poll_cnt; poll_cnt--) {
+		err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
+				UFS_QUERY_TASK, &resp);
+		if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_SUCCEEDED) {
+			/* cmd pending in the device */
+			break;
+		} else if (!err && resp == UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
+			u32 reg;
+
+			/*
+			 * cmd not pending in the device, check if it is
+			 * in transition.
+			 */
+			reg = ufshcd_readl(hba, REG_UTP_TRANSFER_REQ_DOOR_BELL);
+			if (reg & (1 << tag)) {
+				/* sleep for max. 200us to stabilize */
+				usleep_range(100, 200);
+				continue;
+			}
+			/* command completed already */
+			goto out;
+		} else {
+			if (!err)
+				err = resp; /* service response error */
+			goto out;
+		}
+	}
+
+	if (!poll_cnt) {
+		err = -EBUSY;
 		goto out;
 	}
-	spin_unlock_irqrestore(host->host_lock, flags);
 
-	lrbp = &hba->lrb[tag];
 	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, lrbp->task_tag,
 			UFS_ABORT_TASK, &resp);
 	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
-		err = FAILED;
+		if (!err)
+			err = resp; /* service response error */
 		goto out;
-	} else {
-		err = SUCCESS;
 	}
 
+	err = ufshcd_clear_cmd(hba, tag);
+	if (err)
+		goto out;
+
 	scsi_dma_unmap(cmd);
 
 	spin_lock_irqsave(host->host_lock, flags);
-
-	/* clear the respective UTRLCLR register bit */
-	ufshcd_utrl_clear(hba, tag);
-
 	__clear_bit(tag, &hba->outstanding_reqs);
 	hba->lrb[tag].cmd = NULL;
 	spin_unlock_irqrestore(host->host_lock, flags);
@@ -2535,6 +2568,13 @@ static int ufshcd_abort(struct scsi_cmnd *cmd)
 	clear_bit_unlock(tag, &hba->lrb_in_use);
 	wake_up(&hba->dev_cmd.tag_wq);
 out:
+	if (!err) {
+		err = SUCCESS;
+	} else {
+		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
+		err = FAILED;
+	}
+
 	return err;
 }
 
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 3/4] scsi: ufs: Fix device and host reset methods
  2013-08-14 17:10 [PATCH 0/4] scsi: ufs: TM, fatal-error handling and other fixes Santosh Y
  2013-08-14 17:10 ` [PATCH 1/4] scsi: ufs: Fix broken task management command implementation Santosh Y
  2013-08-14 17:10 ` [PATCH 2/4] scsi: ufs: Fix hardware race conditions while aborting a command Santosh Y
@ 2013-08-14 17:10 ` Santosh Y
  2013-08-14 17:10 ` [PATCH 4/4] scsi: ufs: Improve UFS fatal error handling Santosh Y
  3 siblings, 0 replies; 12+ messages in thread
From: Santosh Y @ 2013-08-14 17:10 UTC (permalink / raw)
  To: james.bottomley; +Cc: linux-scsi, vinholikatti, Sujit Reddy Thumma, Santosh Y

From: Sujit Reddy Thumma <sthumma@codeaurora.org>

As of now SCSI initiated error handling is broken because,
the reset APIs don't try to bring back the device initialized and
ready for further transfers.

In case of timeouts, the scsi error handler takes care of handling aborts
and resets. Improve the error handling in such scenario by resetting the
device and host and re-initializing them in proper manner.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>
Tested-by: Dolev Raviv <draviv@codeaurora.org>
Signed-off-by: Santosh Y <santoshsy@gmail.com>

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 24c5ab3..dfa61be 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -69,9 +69,14 @@ enum {
 
 /* UFSHCD states */
 enum {
-	UFSHCD_STATE_OPERATIONAL,
 	UFSHCD_STATE_RESET,
 	UFSHCD_STATE_ERROR,
+	UFSHCD_STATE_OPERATIONAL,
+};
+
+/* UFSHCD error handling flags */
+enum {
+	UFSHCD_EH_IN_PROGRESS = (1 << 0),
 };
 
 /* Interrupt configuration options */
@@ -87,6 +92,16 @@ enum {
 	INT_AGGR_CONFIG,
 };
 
+#define ufshcd_set_eh_in_progress(h) \
+	(h->eh_flags |= UFSHCD_EH_IN_PROGRESS)
+#define ufshcd_eh_in_progress(h) \
+	(h->eh_flags & UFSHCD_EH_IN_PROGRESS)
+#define ufshcd_clear_eh_in_progress(h) \
+	(h->eh_flags &= ~UFSHCD_EH_IN_PROGRESS)
+
+static void ufshcd_tmc_handler(struct ufs_hba *hba);
+static void ufshcd_async_scan(void *data, async_cookie_t cookie);
+
 /*
  * ufshcd_wait_for_register - wait for register value to change
  * @hba - per-adapter interface
@@ -840,10 +855,25 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 
 	tag = cmd->request->tag;
 
-	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL) {
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	switch (hba->ufshcd_state) {
+	case UFSHCD_STATE_OPERATIONAL:
+		break;
+	case UFSHCD_STATE_RESET:
 		err = SCSI_MLQUEUE_HOST_BUSY;
-		goto out;
+		goto out_unlock;
+	case UFSHCD_STATE_ERROR:
+		set_host_byte(cmd, DID_ERROR);
+		cmd->scsi_done(cmd);
+		goto out_unlock;
+	default:
+		dev_WARN_ONCE(hba->dev, 1, "%s: invalid state %d\n",
+				__func__, hba->ufshcd_state);
+		set_host_byte(cmd, DID_BAD_TARGET);
+		cmd->scsi_done(cmd);
+		goto out_unlock;
 	}
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
 
 	/* acquire the tag to make sure device cmds don't use it */
 	if (test_and_set_bit_lock(tag, &hba->lrb_in_use)) {
@@ -880,6 +910,7 @@ static int ufshcd_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd)
 	/* issue command to the controller */
 	spin_lock_irqsave(hba->host->host_lock, flags);
 	ufshcd_send_command(hba, tag);
+out_unlock:
 	spin_unlock_irqrestore(hba->host->host_lock, flags);
 out:
 	return err;
@@ -1495,8 +1526,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
 	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
 		scsi_unblock_requests(hba->host);
 
-	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
-
 out:
 	return err;
 }
@@ -2245,8 +2274,12 @@ static void ufshcd_err_handler(struct ufs_hba *hba)
 	}
 	return;
 fatal_eh:
-	hba->ufshcd_state = UFSHCD_STATE_ERROR;
-	schedule_work(&hba->feh_workq);
+	/* handle fatal errors only when link is functional */
+	if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
+		/* block commands at driver layer until error is handled */
+		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+		schedule_work(&hba->feh_workq);
+	}
 }
 
 /**
@@ -2411,12 +2444,13 @@ static int ufshcd_issue_tm_cmd(struct ufs_hba *hba, int lun_id, int task_id,
 }
 
 /**
- * ufshcd_device_reset - reset device and abort all the pending commands
+ * ufshcd_eh_device_reset_handler - device reset handler registered to
+ *                                    scsi layer.
  * @cmd: SCSI command pointer
  *
  * Returns SUCCESS/FAILED
  */
-static int ufshcd_device_reset(struct scsi_cmnd *cmd)
+static int ufshcd_eh_device_reset_handler(struct scsi_cmnd *cmd)
 {
 	struct Scsi_Host *host;
 	struct ufs_hba *hba;
@@ -2425,6 +2459,7 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
 	int err;
 	u8 resp = 0xF;
 	struct ufshcd_lrb *lrbp;
+	unsigned long flags;
 
 	host = cmd->device->host;
 	hba = shost_priv(host);
@@ -2433,55 +2468,33 @@ static int ufshcd_device_reset(struct scsi_cmnd *cmd)
 	lrbp = &hba->lrb[tag];
 	err = ufshcd_issue_tm_cmd(hba, lrbp->lun, 0, UFS_LOGICAL_RESET, &resp);
 	if (err || resp != UPIU_TASK_MANAGEMENT_FUNC_COMPL) {
-		err = FAILED;
+		if (!err)
+			err = resp;
 		goto out;
-	} else {
-		err = SUCCESS;
 	}
 
-	for (pos = 0; pos < hba->nutrs; pos++) {
-		if (test_bit(pos, &hba->outstanding_reqs) &&
-		    (hba->lrb[tag].lun == hba->lrb[pos].lun)) {
-
-			/* clear the respective UTRLCLR register bit */
-			ufshcd_utrl_clear(hba, pos);
-
-			clear_bit(pos, &hba->outstanding_reqs);
-
-			if (hba->lrb[pos].cmd) {
-				scsi_dma_unmap(hba->lrb[pos].cmd);
-				hba->lrb[pos].cmd->result =
-					DID_ABORT << 16;
-				hba->lrb[pos].cmd->scsi_done(cmd);
-				hba->lrb[pos].cmd = NULL;
-				clear_bit_unlock(pos, &hba->lrb_in_use);
-				wake_up(&hba->dev_cmd.tag_wq);
-			}
+	/* clear the commands that were pending for corresponding LUN */
+	for_each_set_bit(pos, &hba->outstanding_reqs, hba->nutrs) {
+		if (hba->lrb[pos].lun == lrbp->lun) {
+			err = ufshcd_clear_cmd(hba, pos);
+			if (err)
+				break;
 		}
-	} /* end of for */
+	}
+	spin_lock_irqsave(host->host_lock, flags);
+	ufshcd_transfer_req_compl(hba);
+	spin_unlock_irqrestore(host->host_lock, flags);
 out:
+	if (!err) {
+		err = SUCCESS;
+	} else {
+		dev_err(hba->dev, "%s: failed with err %d\n", __func__, err);
+		err = FAILED;
+	}
 	return err;
 }
 
 /**
- * ufshcd_host_reset - Main reset function registered with scsi layer
- * @cmd: SCSI command pointer
- *
- * Returns SUCCESS/FAILED
- */
-static int ufshcd_host_reset(struct scsi_cmnd *cmd)
-{
-	struct ufs_hba *hba;
-
-	hba = shost_priv(cmd->device->host);
-
-	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-		return SUCCESS;
-
-	return ufshcd_do_reset(hba);
-}
-
-/**
  * ufshcd_abort - abort a specific command
  * @cmd: SCSI command pointer
  *
@@ -2579,6 +2592,122 @@ out:
 }
 
 /**
+ * ufshcd_host_reset_and_restore - reset and restore host controller
+ * @hba: per-adapter instance
+ *
+ * Note that host controller reset may issue DME_RESET to
+ * local and remote (device) Uni-Pro stack and the attributes
+ * are reset to default state.
+ *
+ * Returns zero on success, non-zero on failure
+ */
+static int ufshcd_host_reset_and_restore(struct ufs_hba *hba)
+{
+	int err;
+	async_cookie_t cookie;
+	unsigned long flags;
+
+	/* Reset the host controller */
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_hba_stop(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	err = ufshcd_hba_enable(hba);
+	if (err)
+		goto out;
+
+	/* Establish the link again and restore the device */
+	cookie = async_schedule(ufshcd_async_scan, hba);
+	/* wait for async scan to be completed */
+	async_synchronize_cookie(++cookie);
+	if (hba->ufshcd_state != UFSHCD_STATE_OPERATIONAL)
+		err = -EIO;
+out:
+	if (err)
+		dev_err(hba->dev, "%s: Host init failed %d\n", __func__, err);
+
+	return err;
+}
+
+/**
+ * ufshcd_reset_and_restore - reset and re-initialize host/device
+ * @hba: per-adapter instance
+ *
+ * Reset and recover device, host and re-establish link. This
+ * is helpful to recover the communication in fatal error conditions.
+ *
+ * Returns zero on success, non-zero on failure
+ */
+static int ufshcd_reset_and_restore(struct ufs_hba *hba)
+{
+	int err = 0;
+	unsigned long flags;
+
+	err = ufshcd_host_reset_and_restore(hba);
+
+	/*
+	 * After reset the door-bell might be cleared, complete
+	 * outstanding requests in s/w here.
+	 */
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return err;
+}
+
+/**
+ * ufshcd_eh_host_reset_handler - host reset handler registered to scsi layer
+ * @cmd - SCSI command pointer
+ *
+ * Returns SUCCESS/FAILED
+ */
+static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
+{
+	int err;
+	unsigned long flags;
+	struct ufs_hba *hba;
+
+	hba = shost_priv(cmd->device->host);
+
+	/*
+	 * Check if there is any race with fatal error handling.
+	 * If so, wait for it to complete. Even though fatal error
+	 * handling does reset and restore in some cases, don't assume
+	 * anything out of it. We are just avoiding race here.
+	 */
+	do {
+		spin_lock_irqsave(hba->host->host_lock, flags);
+		if (!(work_pending(&hba->feh_workq) ||
+				hba->ufshcd_state == UFSHCD_STATE_RESET))
+			break;
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
+		flush_work_sync(&hba->feh_workq);
+	} while (1);
+
+	hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_eh_in_progress(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	err = ufshcd_reset_and_restore(hba);
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	if (!err) {
+		err = SUCCESS;
+		hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+	} else {
+		err = FAILED;
+		hba->ufshcd_state = UFSHCD_STATE_ERROR;
+	}
+	ufshcd_clear_eh_in_progress(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	return err;
+}
+
+/**
  * ufshcd_async_scan - asynchronous execution for link startup
  * @data: data pointer to pass to this function
  * @cookie: cookie data
@@ -2601,8 +2730,13 @@ static void ufshcd_async_scan(void *data, async_cookie_t cookie)
 		goto out;
 
 	ufshcd_force_reset_auto_bkops(hba);
-	scsi_scan_host(hba->host);
-	pm_runtime_put_sync(hba->dev);
+	hba->ufshcd_state = UFSHCD_STATE_OPERATIONAL;
+
+	/* If we are in error handling context no need to scan the host */
+	if (!ufshcd_eh_in_progress(hba)) {
+		scsi_scan_host(hba->host);
+		pm_runtime_put_sync(hba->dev);
+	}
 out:
 	return;
 }
@@ -2615,8 +2749,8 @@ static struct scsi_host_template ufshcd_driver_template = {
 	.slave_alloc		= ufshcd_slave_alloc,
 	.slave_destroy		= ufshcd_slave_destroy,
 	.eh_abort_handler	= ufshcd_abort,
-	.eh_device_reset_handler = ufshcd_device_reset,
-	.eh_host_reset_handler	= ufshcd_host_reset,
+	.eh_device_reset_handler = ufshcd_eh_device_reset_handler,
+	.eh_host_reset_handler   = ufshcd_eh_host_reset_handler,
 	.this_id		= -1,
 	.sg_tablesize		= SG_ALL,
 	.cmd_per_lun		= UFSHCD_CMD_PER_LUN,
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index fe7c947..1e0585c 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -179,6 +179,7 @@ struct ufs_dev_cmd {
  * @tm_condition: condition variable for task management
  * @tm_slots_in_use: bit map of task management request slots in use
  * @ufshcd_state: UFSHCD states
+ * @eh_flags: Error handling flags
  * @intr_mask: Interrupt Mask Bits
  * @ee_ctrl_mask: Exception event control mask
  * @feh_workq: Work queue for fatal controller error handling
@@ -224,6 +225,7 @@ struct ufs_hba {
 	unsigned long tm_slots_in_use;
 
 	u32 ufshcd_state;
+	u32 eh_flags;
 	u32 intr_mask;
 	u16 ee_ctrl_mask;
 
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [PATCH 4/4] scsi: ufs: Improve UFS fatal error handling
  2013-08-14 17:10 [PATCH 0/4] scsi: ufs: TM, fatal-error handling and other fixes Santosh Y
                   ` (2 preceding siblings ...)
  2013-08-14 17:10 ` [PATCH 3/4] scsi: ufs: Fix device and host reset methods Santosh Y
@ 2013-08-14 17:10 ` Santosh Y
  3 siblings, 0 replies; 12+ messages in thread
From: Santosh Y @ 2013-08-14 17:10 UTC (permalink / raw)
  To: james.bottomley; +Cc: linux-scsi, vinholikatti, Sujit Reddy Thumma, Santosh Y

From: Sujit Reddy Thumma <sthumma@codeaurora.org>

Error handling in UFS driver is broken and resets the host controller
for fatal errors without re-initialization. Correct the fatal error
handling sequence according to UFS Host Controller Interface (HCI)
v1.1 specification.

o Processed requests which are completed w/wo error are reported to
  SCSI layer and any pending commands that are not started are aborted
  in the controller and re-queued into scsi mid-layer queue.

o Upon determining fatal error condition the host controller may hang
  forever until a reset is applied. Block SCSI layer for sending new
  requests and apply reset in a separate error handling work.

o SCSI is informed about the expected Unit-Attention exception from the
  device for the immediate command after a reset so that the SCSI layer
  take necessary steps to establish communication with the device.

Signed-off-by: Sujit Reddy Thumma <sthumma@codeaurora.org>
Reviewed-by: Yaniv Gardi <ygardi@codeaurora.org>
Tested-by: Dolev Raviv <draviv@codeaurora.org>
Signed-off-by: Santosh Y <santoshsy@gmail.com>

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index dfa61be..a0f5ac2 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -79,6 +79,14 @@ enum {
 	UFSHCD_EH_IN_PROGRESS = (1 << 0),
 };
 
+/* UFSHCD UIC layer error flags */
+enum {
+	UFSHCD_UIC_DL_PA_INIT_ERROR = (1 << 0), /* Data link layer error */
+	UFSHCD_UIC_NL_ERROR = (1 << 1), /* Network layer error */
+	UFSHCD_UIC_TL_ERROR = (1 << 2), /* Transport Layer error */
+	UFSHCD_UIC_DME_ERROR = (1 << 3), /* DME error */
+};
+
 /* Interrupt configuration options */
 enum {
 	UFSHCD_INT_DISABLE,
@@ -101,6 +109,8 @@ enum {
 
 static void ufshcd_tmc_handler(struct ufs_hba *hba);
 static void ufshcd_async_scan(void *data, async_cookie_t cookie);
+static int ufshcd_reset_and_restore(struct ufs_hba *hba);
+static int ufshcd_clear_tm_cmd(struct ufs_hba *hba, int tag);
 
 /*
  * ufshcd_wait_for_register - wait for register value to change
@@ -1523,9 +1533,6 @@ static int ufshcd_make_hba_operational(struct ufs_hba *hba)
 		goto out;
 	}
 
-	if (hba->ufshcd_state == UFSHCD_STATE_RESET)
-		scsi_unblock_requests(hba->host);
-
 out:
 	return err;
 }
@@ -1651,66 +1658,6 @@ static int ufshcd_verify_dev_init(struct ufs_hba *hba)
 }
 
 /**
- * ufshcd_do_reset - reset the host controller
- * @hba: per adapter instance
- *
- * Returns SUCCESS/FAILED
- */
-static int ufshcd_do_reset(struct ufs_hba *hba)
-{
-	struct ufshcd_lrb *lrbp;
-	unsigned long flags;
-	int tag;
-
-	/* block commands from midlayer */
-	scsi_block_requests(hba->host);
-
-	spin_lock_irqsave(hba->host->host_lock, flags);
-	hba->ufshcd_state = UFSHCD_STATE_RESET;
-
-	/* send controller to reset state */
-	ufshcd_hba_stop(hba);
-	spin_unlock_irqrestore(hba->host->host_lock, flags);
-
-	/* abort outstanding commands */
-	for (tag = 0; tag < hba->nutrs; tag++) {
-		if (test_bit(tag, &hba->outstanding_reqs)) {
-			lrbp = &hba->lrb[tag];
-			if (lrbp->cmd) {
-				scsi_dma_unmap(lrbp->cmd);
-				lrbp->cmd->result = DID_RESET << 16;
-				lrbp->cmd->scsi_done(lrbp->cmd);
-				lrbp->cmd = NULL;
-				clear_bit_unlock(tag, &hba->lrb_in_use);
-			}
-		}
-	}
-
-	/* complete device management command */
-	if (hba->dev_cmd.complete)
-		complete(hba->dev_cmd.complete);
-
-	/* clear outstanding request/task bit maps */
-	hba->outstanding_reqs = 0;
-	hba->outstanding_tasks = 0;
-
-	/* Host controller enable */
-	if (ufshcd_hba_enable(hba)) {
-		dev_err(hba->dev,
-			"Reset: Controller initialization failed\n");
-		return FAILED;
-	}
-
-	if (ufshcd_link_startup(hba)) {
-		dev_err(hba->dev,
-			"Reset: Link start-up failed\n");
-		return FAILED;
-	}
-
-	return SUCCESS;
-}
-
-/**
  * ufshcd_slave_alloc - handle initial SCSI device configurations
  * @sdev: pointer to SCSI device
  *
@@ -1727,6 +1674,9 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
 	sdev->use_10_for_ms = 1;
 	scsi_set_tag_type(sdev, MSG_SIMPLE_TAG);
 
+	/* allow SCSI layer to restart the device in case of errors */
+	sdev->allow_restart = 1;
+
 	/*
 	 * Inform SCSI Midlayer that the LUN queue depth is same as the
 	 * controller queue depth. If a LUN queue depth is less than the
@@ -1930,6 +1880,9 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
 	case OCS_ABORTED:
 		result |= DID_ABORT << 16;
 		break;
+	case OCS_INVALID_COMMAND_STATUS:
+		result |= DID_REQUEUE << 16;
+		break;
 	case OCS_INVALID_CMD_TABLE_ATTR:
 	case OCS_INVALID_PRDT_ATTR:
 	case OCS_MISMATCH_DATA_BUF_SIZE:
@@ -2241,45 +2194,145 @@ out:
 }
 
 /**
- * ufshcd_fatal_err_handler - handle fatal errors
- * @hba: per adapter instance
+ * ufshcd_err_handler - handle UFS errors that require s/w attention
+ * @work: pointer to work structure
  */
-static void ufshcd_fatal_err_handler(struct work_struct *work)
+static void ufshcd_err_handler(struct work_struct *work)
 {
 	struct ufs_hba *hba;
-	hba = container_of(work, struct ufs_hba, feh_workq);
+	unsigned long flags;
+	u32 err_xfer = 0;
+	u32 err_tm = 0;
+	int err = 0;
+	int tag;
+
+	hba = container_of(work, struct ufs_hba, eh_work);
 
 	pm_runtime_get_sync(hba->dev);
-	/* check if reset is already in progress */
-	if (hba->ufshcd_state != UFSHCD_STATE_RESET)
-		ufshcd_do_reset(hba);
+
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	if (hba->ufshcd_state == UFSHCD_STATE_RESET) {
+		spin_unlock_irqrestore(hba->host->host_lock, flags);
+		goto out;
+	}
+
+	hba->ufshcd_state = UFSHCD_STATE_RESET;
+	ufshcd_set_eh_in_progress(hba);
+
+	/* Complete requests that have door-bell cleared by h/w */
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	/* Clear pending transfer requests */
+	for_each_set_bit(tag, &hba->outstanding_reqs, hba->nutrs)
+		if (ufshcd_clear_cmd(hba, tag))
+			err_xfer |= 1 << tag;
+
+	/* Clear pending task management requests */
+	for_each_set_bit(tag, &hba->outstanding_tasks, hba->nutmrs)
+		if (ufshcd_clear_tm_cmd(hba, tag))
+			err_tm |= 1 << tag;
+
+	/* Complete the requests that are cleared by s/w */
+	spin_lock_irqsave(hba->host->host_lock, flags);
+	ufshcd_transfer_req_compl(hba);
+	ufshcd_tmc_handler(hba);
+	spin_unlock_irqrestore(hba->host->host_lock, flags);
+
+	/* Fatal errors need reset */
+	if (err_xfer || err_tm || (hba->saved_err & INT_FATAL_ERRORS) ||
+			((hba->saved_err & UIC_ERROR) &&
+			 (hba->saved_uic_err & UFSHCD_UIC_DL_PA_INIT_ERROR))) {
+		err = ufshcd_reset_and_restore(hba);
+		if (err) {
+			dev_err(hba->dev, "%s: reset and restore failed\n",
+					__func__);
+			hba->ufshcd_state = UFSHCD_STATE_ERROR;
+		}
+		/*
+		 * Inform scsi mid-layer that we did reset and allow to handle
+		 * Unit Attention properly.
+		 */
+		scsi_report_bus_reset(hba->host, 0);
+		hba->saved_err = 0;
+		hba->saved_uic_err = 0;
+	}
+	ufshcd_clear_eh_in_progress(hba);
+
+out:
+	scsi_unblock_requests(hba->host);
 	pm_runtime_put_sync(hba->dev);
 }
 
 /**
- * ufshcd_err_handler - Check for fatal errors
- * @work: pointer to a work queue structure
+ * ufshcd_update_uic_error - check and set fatal UIC error flags.
+ * @hba: per-adapter instance
  */
-static void ufshcd_err_handler(struct ufs_hba *hba)
+static void ufshcd_update_uic_error(struct ufs_hba *hba)
 {
 	u32 reg;
 
+	/* PA_INIT_ERROR is fatal and needs UIC reset */
+	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
+	if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
+		hba->uic_error |= UFSHCD_UIC_DL_PA_INIT_ERROR;
+
+	/* UIC NL/TL/DME errors needs software retry */
+	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_NETWORK_LAYER);
+	if (reg)
+		hba->uic_error |= UFSHCD_UIC_NL_ERROR;
+
+	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_TRANSPORT_LAYER);
+	if (reg)
+		hba->uic_error |= UFSHCD_UIC_TL_ERROR;
+
+	reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DME);
+	if (reg)
+		hba->uic_error |= UFSHCD_UIC_DME_ERROR;
+
+	dev_dbg(hba->dev, "%s: UIC error flags = 0x%08x\n",
+			__func__, hba->uic_error);
+}
+
+/**
+ * ufshcd_check_errors - Check for errors that need s/w attention
+ * @hba: per-adapter instance
+ */
+static void ufshcd_check_errors(struct ufs_hba *hba)
+{
+	bool queue_eh_work = false;
+
 	if (hba->errors & INT_FATAL_ERRORS)
-		goto fatal_eh;
+		queue_eh_work = true;
 
 	if (hba->errors & UIC_ERROR) {
-		reg = ufshcd_readl(hba, REG_UIC_ERROR_CODE_DATA_LINK_LAYER);
-		if (reg & UIC_DATA_LINK_LAYER_ERROR_PA_INIT)
-			goto fatal_eh;
+		hba->uic_error = 0;
+		ufshcd_update_uic_error(hba);
+		if (hba->uic_error)
+			queue_eh_work = true;
 	}
-	return;
-fatal_eh:
-	/* handle fatal errors only when link is functional */
-	if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
-		/* block commands at driver layer until error is handled */
-		hba->ufshcd_state = UFSHCD_STATE_ERROR;
-		schedule_work(&hba->feh_workq);
+
+	if (queue_eh_work) {
+		/* handle fatal errors only when link is functional */
+		if (hba->ufshcd_state == UFSHCD_STATE_OPERATIONAL) {
+			/* block commands from scsi mid-layer */
+			scsi_block_requests(hba->host);
+
+			/* transfer error masks to sticky bits */
+			hba->saved_err |= hba->errors;
+			hba->saved_uic_err |= hba->uic_error;
+
+			hba->ufshcd_state = UFSHCD_STATE_ERROR;
+			schedule_work(&hba->eh_work);
+		}
 	}
+	/*
+	 * if (!queue_eh_work) -
+	 * Other errors are either non-fatal where host recovers
+	 * itself without s/w intervention or errors that will be
+	 * handled by the SCSI core layer.
+	 */
 }
 
 /**
@@ -2304,7 +2357,7 @@ static void ufshcd_sl_intr(struct ufs_hba *hba, u32 intr_status)
 {
 	hba->errors = UFSHCD_ERROR_MASK & intr_status;
 	if (hba->errors)
-		ufshcd_err_handler(hba);
+		ufshcd_check_errors(hba);
 
 	if (intr_status & UIC_COMMAND_COMPL)
 		ufshcd_uic_cmd_compl(hba);
@@ -2679,12 +2732,12 @@ static int ufshcd_eh_host_reset_handler(struct scsi_cmnd *cmd)
 	 */
 	do {
 		spin_lock_irqsave(hba->host->host_lock, flags);
-		if (!(work_pending(&hba->feh_workq) ||
+		if (!(work_pending(&hba->eh_work) ||
 				hba->ufshcd_state == UFSHCD_STATE_RESET))
 			break;
 		spin_unlock_irqrestore(hba->host->host_lock, flags);
 		dev_dbg(hba->dev, "%s: reset in progress\n", __func__);
-		flush_work_sync(&hba->feh_workq);
+		flush_work_sync(&hba->eh_work);
 	} while (1);
 
 	hba->ufshcd_state = UFSHCD_STATE_RESET;
@@ -2918,7 +2971,7 @@ int ufshcd_init(struct device *dev, struct ufs_hba **hba_handle,
 	init_waitqueue_head(&hba->tm_tag_wq);
 
 	/* Initialize work queues */
-	INIT_WORK(&hba->feh_workq, ufshcd_fatal_err_handler);
+	INIT_WORK(&hba->eh_work, ufshcd_err_handler);
 	INIT_WORK(&hba->eeh_work, ufshcd_exception_event_handler);
 
 	/* Initialize UIC command mutex */
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index 1e0585c..8f5624e 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -182,9 +182,12 @@ struct ufs_dev_cmd {
  * @eh_flags: Error handling flags
  * @intr_mask: Interrupt Mask Bits
  * @ee_ctrl_mask: Exception event control mask
- * @feh_workq: Work queue for fatal controller error handling
+ * @eh_work: Worker to handle UFS errors that require s/w attention
  * @eeh_work: Worker to handle exception events
  * @errors: HBA errors
+ * @uic_error: UFS interconnect layer error status
+ * @saved_err: sticky error mask
+ * @saved_uic_err: sticky UIC error mask
  * @dev_cmd: ufs device management command information
  * @auto_bkops_enabled: to track whether bkops is enabled in device
  */
@@ -230,11 +233,14 @@ struct ufs_hba {
 	u16 ee_ctrl_mask;
 
 	/* Work Queues */
-	struct work_struct feh_workq;
+	struct work_struct eh_work;
 	struct work_struct eeh_work;
 
 	/* HBA Errors */
 	u32 errors;
+	u32 uic_error;
+	u32 saved_err;
+	u32 saved_uic_err;
 
 	/* Device management request data */
 	struct ufs_dev_cmd dev_cmd;
-- 
1.8.3.4


^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2013-08-14 17:11 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-08-14 17:10 [PATCH 0/4] scsi: ufs: TM, fatal-error handling and other fixes Santosh Y
2013-08-14 17:10 ` [PATCH 1/4] scsi: ufs: Fix broken task management command implementation Santosh Y
2013-08-14 17:10 ` [PATCH 2/4] scsi: ufs: Fix hardware race conditions while aborting a command Santosh Y
2013-08-14 17:10 ` [PATCH 3/4] scsi: ufs: Fix device and host reset methods Santosh Y
2013-08-14 17:10 ` [PATCH 4/4] scsi: ufs: Improve UFS fatal error handling Santosh Y
  -- strict thread matches above, loose matches on Subject: below --
2013-06-13 14:30 [PATCH 0/4] scsi: ufs: Improve UFS " Sujit Reddy Thumma
2013-06-13 14:30 ` [PATCH 1/4] scsi: ufs: Fix broken task management command implementation Sujit Reddy Thumma
2013-06-27 11:19   ` Santosh Y
2013-06-28 11:32     ` Sujit Reddy Thumma
2013-07-02 15:51       ` Santosh Y
2013-07-03 15:52         ` Sujit Reddy Thumma
2013-07-03 16:23           ` Santosh Y
2013-07-03 16:42             ` Sujit Reddy Thumma

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).