Re: [PATCH] scsi_lib: Don't fail the path in ALUA transition state

From: Martin Wilck
Date: Wed Jun 08 2022 - 13:22:55 EST


On Tue, 2022-06-07 at 12:58 -0700, Brian Bunker wrote:
> The error path for the SCSI check condition of not ready, target in
> ALUA state transition, will result in the failure of that path after
> the retries are exhausted. In most cases that is well ahead of the
> transition timeout established in the SCSI ALUA device handler.
>
> Instead, reprep the command and re-add it to the queue after a 1
> second
> delay. This will allow the handler to take care of the timeout and
> only fail the path in the transition state if the target has exceeded
> the transition timeout (default 60 seconds).
>
> Acked-by: Krishna Kant <krishna.kant@xxxxxxxxxxxxxxx>
> Acked-by: Seamus Connor <sconnor@xxxxxxxxxxxxxxx>
> Signed-off-by: Brian Bunker <brian@xxxxxxxxxxxxxxx>

Reviewed-by: Martin Wilck <mwilck@xxxxxxxx>

> ---
>  drivers/scsi/scsi_lib.c | 44 +++++++++++++++++++++++----------------
> --
>  1 file changed, 25 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
> index 6ffc9e4258a8..1afb267ff9a2 100644
> --- a/drivers/scsi/scsi_lib.c
> +++ b/drivers/scsi/scsi_lib.c
> @@ -118,7 +118,7 @@ scsi_set_blocked(struct scsi_cmnd *cmd, int
> reason)
>         }
>  }
>  
> -static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd)
> +static void scsi_mq_requeue_cmd(struct scsi_cmnd *cmd, unsigned long
> msecs)
>  {
>         struct request *rq = scsi_cmd_to_rq(cmd);
>  
> @@ -128,7 +128,12 @@ static void scsi_mq_requeue_cmd(struct scsi_cmnd
> *cmd)
>         } else {
>                 WARN_ON_ONCE(true);
>         }
> -       blk_mq_requeue_request(rq, true);
> +
> +       if (msecs) {
> +               blk_mq_requeue_request(rq, false);
> +               blk_mq_delay_kick_requeue_list(rq->q, msecs);
> +       } else
> +               blk_mq_requeue_request(rq, true);
>  }
>  
>  /**
> @@ -658,14 +663,6 @@ static unsigned int scsi_rq_err_bytes(const
> struct request *rq)
>         return bytes;
>  }
>  
> -/* Helper for scsi_io_completion() when "reprep" action required. */
> -static void scsi_io_completion_reprep(struct scsi_cmnd *cmd,
> -                                     struct request_queue *q)
> -{
> -       /* A new command will be prepared and issued. */
> -       scsi_mq_requeue_cmd(cmd);
> -}
> -
>  static bool scsi_cmd_runtime_exceeced(struct scsi_cmnd *cmd)
>  {
>         struct request *req = scsi_cmd_to_rq(cmd);
> @@ -683,14 +680,21 @@ static bool scsi_cmd_runtime_exceeced(struct
> scsi_cmnd *cmd)
>         return false;
>  }
>  
> +/*
> + * When ALUA transition state is returned, reprep the cmd to
> + * use the ALUA handlers transition timeout. Delay the reprep
> + * 1 sec to avoid aggressive retries of the target in that
> + * state.
> + */
> +#define ALUA_TRANSITION_REPREP_DELAY   1000
> +
>  /* Helper for scsi_io_completion() when special action required. */
>  static void scsi_io_completion_action(struct scsi_cmnd *cmd, int
> result)
>  {
> -       struct request_queue *q = cmd->device->request_queue;
>         struct request *req = scsi_cmd_to_rq(cmd);
>         int level = 0;
> -       enum {ACTION_FAIL, ACTION_REPREP, ACTION_RETRY,
> -             ACTION_DELAYED_RETRY} action;
> +       enum {ACTION_FAIL, ACTION_REPREP, ACTION_DELAYED_REPREP,
> +             ACTION_RETRY, ACTION_DELAYED_RETRY} action;
>         struct scsi_sense_hdr sshdr;
>         bool sense_valid;
>         bool sense_current = true;      /* false implies "deferred
> sense" */
> @@ -779,8 +783,8 @@ static void scsi_io_completion_action(struct
> scsi_cmnd *cmd, int result)
>                                         action =
> ACTION_DELAYED_RETRY;
>                                         break;
>                                 case 0x0a: /* ALUA state transition
> */
> -                                       blk_stat = BLK_STS_TRANSPORT;
> -                                       fallthrough;
> +                                       action =
> ACTION_DELAYED_REPREP;
> +                                       break;
>                                 default:
>                                         action = ACTION_FAIL;
>                                         break;
> @@ -839,7 +843,10 @@ static void scsi_io_completion_action(struct
> scsi_cmnd *cmd, int result)
>                         return;
>                 fallthrough;
>         case ACTION_REPREP:
> -               scsi_io_completion_reprep(cmd, q);
> +               scsi_mq_requeue_cmd(cmd, 0);
> +               break;
> +       case ACTION_DELAYED_REPREP:
> +               scsi_mq_requeue_cmd(cmd,
> ALUA_TRANSITION_REPREP_DELAY);
>                 break;
>         case ACTION_RETRY:
>                 /* Retry the same command immediately */
> @@ -933,7 +940,7 @@ static int scsi_io_completion_nz_result(struct
> scsi_cmnd *cmd, int result,
>   * command block will be released and the queue function will be
> goosed. If we
>   * are not done then we have to figure out what to do next:
>   *
> - *   a) We can call scsi_io_completion_reprep().  The request will
> be
> + *   a) We can call scsi_mq_requeue_cmd().  The request will be
>   *     unprepared and put back on the queue.  Then a new command
> will
>   *     be created for it.  This should be used if we made forward
>   *     progress, or if we want to switch from READ(10) to READ(6)
> for
> @@ -949,7 +956,6 @@ static int scsi_io_completion_nz_result(struct
> scsi_cmnd *cmd, int result,
>  void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int
> good_bytes)
>  {
>         int result = cmd->result;
> -       struct request_queue *q = cmd->device->request_queue;
>         struct request *req = scsi_cmd_to_rq(cmd);
>         blk_status_t blk_stat = BLK_STS_OK;
>  
> @@ -986,7 +992,7 @@ void scsi_io_completion(struct scsi_cmnd *cmd,
> unsigned int good_bytes)
>          * request just queue the command up again.
>          */
>         if (likely(result == 0))
> -               scsi_io_completion_reprep(cmd, q);
> +               scsi_mq_requeue_cmd(cmd, 0);
>         else
>                 scsi_io_completion_action(cmd, result);
>  }