public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH] scsi_lib: Allow the ALUA transitioning state enough time
@ 2022-07-29 21:41 Brian Bunker
  2022-08-08 17:03 ` Martin Wilck
  2022-08-12  2:01 ` Martin K. Petersen
  0 siblings, 2 replies; 3+ messages in thread
From: Brian Bunker @ 2022-07-29 21:41 UTC (permalink / raw)
  To: linux-scsi; +Cc: Brian Bunker, Krishna Kant, Seamus Connor

The error path for the SCSI check condition of not ready, target in
ALUA state transition, will result in the failure of that path after
the retries are exhausted. In most cases that is well ahead of the
transition timeout established in the SCSI ALUA device handler.

Instead, reprep the command and re-add it to the queue after a 1 second
delay. This will allow the handler to take care of the timeout and
only fail the path if the target has exceeded the transition expiry
timeout (default 60 seconds). If the expiry timeout is exceeded, the
handler will change the path state from transitioning to standby
leading to a path failure eliminating the potential of this re-prep
to continue endlessly. In most cases the target will exit the
transitioning state well before the expiry timeout but after the
retries are exhausted as mentioned.

Additionally remove the scsi_io_completion_reprep function which
provides little value.

Acked-by: Krishna Kant <krishna.kant@purestorage.com>
Acked-by: Seamus Connor <sconnor@purestorage.com>
Signed-off-by: Brian Bunker <brian@purestorage.com>
---
 drivers/scsi/scsi_lib.c | 44 +++++++++++++++++++++++------------------
 1 file changed, 25 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 6ffc9e4258a8..1afb267ff9a2 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -118,7 +118,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int reason)
 	}
 }
 
-static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
+static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long msecs)
 {
 	struct request *rq = scsi_cmd_to_rq(cmd);
 
@@ -128,7 +128,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
 	} else {
 		WARN_ON_ONCE(true);
 	}
-	blk_mq_requeue_request(rq, true);
+
+	if (msecs) {
+		blk_mq_requeue_request(rq, false);
+		blk_mq_delay_kick_requeue_list(rq->q, msecs);
+	} else
+		blk_mq_requeue_request(rq, true);
 }
 
 /**
@@ -658,14 +663,6 @@ static unsigned int scsi_rq_err_bytes(const struct request *rq)
 	return bytes;
 }
 
-/* Helper for scsi_io_completion() when "reprep" action required. */
-static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
-				      struct request_queue *q)
-{
-	/* A new command will be prepared and issued. */
-	scsi_mq_requeue_cmd(cmd);
-}
-
 static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
 {
 	struct request *req = scsi_cmd_to_rq(cmd);
@@ -683,14 +680,21 @@ static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
 	return false;
 }
 
+/*
+ * When ALUA transition state is returned, reprep the cmd to
+ * use the ALUA handlers transition timeout. Delay the reprep
+ * 1 sec to avoid aggressive retries of the target in that
+ * state.
+ */
+#define ALUA_TRANSITION_REPREP_DELAY	1000
+
 /* Helper for scsi_io_completion() when special action required. */
 static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 {
-	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = scsi_cmd_to_rq(cmd);
 	int level = 0;
-	enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
-	      ACTION_DELAYED_RETRY} action;
+	enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP,
+	      ACTION_RETRY, ACTION_DELAYED_RETRY} action;
 	struct scsi_sense_hdr sshdr;
 	bool sense_valid;
 	bool sense_current = true;      /* false implies "deferred sense" */
@@ -779,8 +783,8 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 					action = ACTION_DELAYED_RETRY;
 					break;
 				case 0x0a: /* ALUA state transition */
-					blk_stat = BLK_STS_TRANSPORT;
-					fallthrough;
+					action = ACTION_DELAYED_REPREP;
+					break;
 				default:
 					action = ACTION_FAIL;
 					break;
@@ -839,7 +843,10 @@ static void scsi_io_completion_action(struct scsi_cmnd *cmd, int result)
 			return;
 		fallthrough;
 	case ACTION_REPREP:
-		scsi_io_completion_reprep(cmd, q);
+		scsi_mq_requeue_cmd(cmd, 0);
+		break;
+	case ACTION_DELAYED_REPREP:
+		scsi_mq_requeue_cmd(cmd, ALUA_TRANSITION_REPREP_DELAY);
 		break;
 	case ACTION_RETRY:
 		/* Retry the same command immediately */
@@ -933,7 +940,7 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
  * command block will be released and the queue function will be goosed. If we
  * are not done then we have to figure out what to do next:
  *
- *   a) We can call scsi_io_completion_reprep().  The request will be
+ *   a) We can call scsi_mq_requeue_cmd().  The request will be
  *	unprepared and put back on the queue.  Then a new command will
  *	be created for it.  This should be used if we made forward
  *	progress, or if we want to switch from READ(10) to READ(6) for
@@ -949,7 +956,6 @@ static int scsi_io_completion_nz_result(struct scsi_cmnd *cmd, int result,
 void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 {
 	int result = cmd->result;
-	struct request_queue *q = cmd->device->request_queue;
 	struct request *req = scsi_cmd_to_rq(cmd);
 	blk_status_t blk_stat = BLK_STS_OK;
 
@@ -986,7 +992,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes)
 	 * request just queue the command up again.
 	 */
 	if (likely(result == 0))
-		scsi_io_completion_reprep(cmd, q);
+		scsi_mq_requeue_cmd(cmd, 0);
 	else
 		scsi_io_completion_action(cmd, result);
 }
-- 
2.33.1


^ permalink raw reply related	[flat|nested] 3+ messages in thread

* Re: [PATCH] scsi_lib: Allow the ALUA transitioning state enough time
  2022-07-29 21:41 [PATCH] scsi_lib: Allow the ALUA transitioning state enough time Brian Bunker
@ 2022-08-08 17:03 ` Martin Wilck
  2022-08-12  2:01 ` Martin K. Petersen
  1 sibling, 0 replies; 3+ messages in thread
From: Martin Wilck @ 2022-08-08 17:03 UTC (permalink / raw)
  To: Brian Bunker, linux-scsi, Martin K. Petersen; +Cc: Krishna Kant, Seamus Connor

On Fri, 2022-07-29 at 14:41 -0700, Brian Bunker wrote:
> The error path for the SCSI check condition of not ready, target in
> ALUA state transition, will result in the failure of that path after
> the retries are exhausted. In most cases that is well ahead of the
> transition timeout established in the SCSI ALUA device handler.
> 
> Instead, reprep the command and re-add it to the queue after a 1
> second
> delay. This will allow the handler to take care of the timeout and
> only fail the path if the target has exceeded the transition expiry
> timeout (default 60 seconds). If the expiry timeout is exceeded, the
> handler will change the path state from transitioning to standby
> leading to a path failure eliminating the potential of this re-prep
> to continue endlessly. In most cases the target will exit the
> transitioning state well before the expiry timeout but after the
> retries are exhausted as mentioned.
> 
> Additionally remove the scsi_io_completion_reprep function which
> provides little value.
> 
> Acked-by: Krishna Kant <krishna.kant@purestorage.com>
> Acked-by: Seamus Connor <sconnor@purestorage.com>
> Signed-off-by: Brian Bunker <brian@purestorage.com>

Reviewed-by: Martin Wilck <mwilck@suse.com>

> ---
>  drivers/scsi/scsi_lib.c | 44 +++++++++++++++++++++++----------------
> --
>  1 file changed, 25 insertions(+), 19 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 6ffc9e4258a8..1afb267ff9a2 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -118,7 +118,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int
> reason)
>         }
>  }
>  
> -static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
> +static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long
> msecs)
>  {
>         struct request *rq = scsi_cmd_to_rq(cmd);
>  
> @@ -128,7 +128,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd
> *cmd)
>         } else {
>                 WARN_ON_ONCE(true);
>         }
> -       blk_mq_requeue_request(rq, true);
> +
> +       if (msecs) {
> +               blk_mq_requeue_request(rq, false);
> +               blk_mq_delay_kick_requeue_list(rq->q, msecs);
> +       } else
> +               blk_mq_requeue_request(rq, true);
>  }
>  
>  /**
> @@ -658,14 +663,6 @@ static unsigned int scsi_rq_err_bytes(const
> struct request *rq)
>         return bytes;
>  }
>  
> -/* Helper for scsi_io_completion() when "reprep" action required. */
> -static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
> -                                     struct request_queue *q)
> -{
> -       /* A new command will be prepared and issued. */
> -       scsi_mq_requeue_cmd(cmd);
> -}
> -
>  static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
>  {
>         struct request *req = scsi_cmd_to_rq(cmd);
> @@ -683,14 +680,21 @@ static bool scsi_cmd_runtime_exceeced(struct
> scsi_cmnd *cmd)
>         return false;
>  }
>  
> +/*
> + * When ALUA transition state is returned, reprep the cmd to
> + * use the ALUA handlers transition timeout. Delay the reprep
> + * 1 sec to avoid aggressive retries of the target in that
> + * state.
> + */
> +#define ALUA_TRANSITION_REPREP_DELAY   1000
> +
>  /* Helper for scsi_io_completion() when special action required. */
>  static void scsi_io_completion_action(struct scsi_cmnd *cmd, int
> result)
>  {
> -       struct request_queue *q = cmd->device->request_queue;
>         struct request *req = scsi_cmd_to_rq(cmd);
>         int level = 0;
> -       enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
> -             ACTION_DELAYED_RETRY} action;
> +       enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP,
> +             ACTION_RETRY, ACTION_DELAYED_RETRY} action;
>         struct scsi_sense_hdr sshdr;
>         bool sense_valid;
>         bool sense_current = true;      /* false implies "deferred
> sense" */
> @@ -779,8 +783,8 @@ static void scsi_io_completion_action(struct
> scsi_cmnd *cmd, int result)
>                                         action =
> ACTION_DELAYED_RETRY;
>                                         break;
>                                 case 0x0a: /* ALUA state transition
> */
> -                                       blk_stat = BLK_STS_TRANSPORT;
> -                                       fallthrough;
> +                                       action =
> ACTION_DELAYED_REPREP;
> +                                       break;
>                                 default:
>                                         action = ACTION_FAIL;
>                                         break;
> @@ -839,7 +843,10 @@ static void scsi_io_completion_action(struct
> scsi_cmnd *cmd, int result)
>                         return;
>                 fallthrough;
>         case ACTION_REPREP:
> -               scsi_io_completion_reprep(cmd, q);
> +               scsi_mq_requeue_cmd(cmd, 0);
> +               break;
> +       case ACTION_DELAYED_REPREP:
> +               scsi_mq_requeue_cmd(cmd,
> ALUA_TRANSITION_REPREP_DELAY);
>                 break;
>         case ACTION_RETRY:
>                 /* Retry the same command immediately */
> @@ -933,7 +940,7 @@ static int scsi_io_completion_nz_result(struct
> scsi_cmnd *cmd, int result,
>   * command block will be released and the queue function will be
> goosed. If we
>   * are not done then we have to figure out what to do next:
>   *
> - *   a) We can call scsi_io_completion_reprep().  The request will
> be
> + *   a) We can call scsi_mq_requeue_cmd().  The request will be
>   *     unprepared and put back on the queue.  Then a new command
> will
>   *     be created for it.  This should be used if we made forward
>   *     progress, or if we want to switch from READ(10) to READ(6)
> for
> @@ -949,7 +956,6 @@ static int scsi_io_completion_nz_result(struct
> scsi_cmnd *cmd, int result,
>  void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int
> good_bytes)
>  {
>         int result = cmd->result;
> -       struct request_queue *q = cmd->device->request_queue;
>         struct request *req = scsi_cmd_to_rq(cmd);
>         blk_status_t blk_stat = BLK_STS_OK;
>  
> @@ -986,7 +992,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd,
> unsigned int good_bytes)
>          * request just queue the command up again.
>          */
>         if (likely(result == 0))
> -               scsi_io_completion_reprep(cmd, q);
> +               scsi_mq_requeue_cmd(cmd, 0);
>         else
>                 scsi_io_completion_action(cmd, result);
>  }


^ permalink raw reply	[flat|nested] 3+ messages in thread

* Re: [PATCH] scsi_lib: Allow the ALUA transitioning state enough time
  2022-07-29 21:41 [PATCH] scsi_lib: Allow the ALUA transitioning state enough time Brian Bunker
  2022-08-08 17:03 ` Martin Wilck
@ 2022-08-12  2:01 ` Martin K. Petersen
  1 sibling, 0 replies; 3+ messages in thread
From: Martin K. Petersen @ 2022-08-12  2:01 UTC (permalink / raw)
  To: Brian Bunker; +Cc: linux-scsi, Krishna Kant, Seamus Connor


Brian,

> The error path for the SCSI check condition of not ready, target in
> ALUA state transition, will result in the failure of that path after
> the retries are exhausted. In most cases that is well ahead of the
> transition timeout established in the SCSI ALUA device handler.

Applied to 5.20/scsi-staging, thanks!

-- 
Martin K. Petersen	Oracle Linux Engineering

^ permalink raw reply	[flat|nested] 3+ messages in thread

end of thread, other threads:[~2022-08-12  2:01 UTC | newest]

Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2022-07-29 21:41 [PATCH] scsi_lib: Allow the ALUA transitioning state enough time Brian Bunker
2022-08-08 17:03 ` Martin Wilck
2022-08-12  2:01 ` Martin K. Petersen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox