All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Uday Shankar <ushankar@purestorage.com>
Cc: Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org, ming.lei@redhat.com
Subject: Re: [PATCH 4/4] ublk: support device recovery without I/O queueing
Date: Tue, 2 Jul 2024 21:46:00 +0800	[thread overview]
Message-ID: <ZoQEmCuPIq0thaON@fedora> (raw)
In-Reply-To: <20240617194451.435445-5-ushankar@purestorage.com>

On Mon, Jun 17, 2024 at 01:44:51PM -0600, Uday Shankar wrote:
> ublk currently supports the following behaviors on ublk server exit:
> 
> A: outstanding I/Os get errors, subsequently issued I/Os get errors
> B: outstanding I/Os get errors, subsequently issued I/Os queue
> C: outstanding I/Os get reissued, subsequently issued I/Os queue
> 
> and the following behaviors for recovery of preexisting block devices by
> a future incarnation of the ublk server:
> 
> 1: ublk devices stopped on ublk server exit (no recovery possible)
> 2: ublk devices are recoverable using start/end_recovery commands
> 
> The userspace interface allows selection of combinations of these
> behaviors using flags specified at device creation time, namely:
> 
> default behavior: A + 1
> UBLK_F_USER_RECOVERY: B + 2
> UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_REISSUE: C + 2
> 
> The behavior A + 2 is currently unsupported. Add support for this
> behavior under the new flag UBLK_F_USER_RECOVERY_NOQUEUE.
> 
> Signed-off-by: Uday Shankar <ushankar@purestorage.com>
> ---
>  drivers/block/ublk_drv.c      | 53 +++++++++++++++++++++++++++--------
>  include/uapi/linux/ublk_cmd.h | 18 ++++++++++++
>  2 files changed, 60 insertions(+), 11 deletions(-)
> 
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index 0496fa372cc1..4fec8b48d30e 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -57,10 +57,12 @@
>  		| UBLK_F_UNPRIVILEGED_DEV \
>  		| UBLK_F_CMD_IOCTL_ENCODE \
>  		| UBLK_F_USER_COPY \
> -		| UBLK_F_ZONED)
> +		| UBLK_F_ZONED \
> +		| UBLK_F_USER_RECOVERY_NOQUEUE)
>  
>  #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
> -		| UBLK_F_USER_RECOVERY_REISSUE)
> +		| UBLK_F_USER_RECOVERY_REISSUE \
> +		| UBLK_F_USER_RECOVERY_NOQUEUE)
>  
>  /* All UBLK_PARAM_TYPE_* should be included here */
>  #define UBLK_PARAM_TYPE_ALL                                \
> @@ -679,7 +681,14 @@ static inline bool ublk_nosrv_should_queue_io(struct ublk_device *ub)
>  static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub)
>  {
>  	return (!(ub->dev_info.flags & UBLK_F_USER_RECOVERY)) &&
> -	       (!(ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE));
> +	       (!(ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE)) &&
> +	       (!(ub->dev_info.flags & UBLK_F_USER_RECOVERY_NOQUEUE));
> +}
> +
> +static inline bool ublk_dev_in_recoverable_state(struct ublk_device *ub)
> +{
> +	return ub->dev_info.state == UBLK_S_DEV_QUIESCED ||
> +	       ub->dev_info.state == UBLK_S_DEV_FAIL_IO;
>  }
>  
>  static void ublk_free_disk(struct gendisk *disk)
> @@ -1243,6 +1252,11 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
>  	struct request *rq = bd->rq;
>  	blk_status_t res;
>  
> +	if (ubq->dev->dev_info.state == UBLK_S_DEV_FAIL_IO) {
> +		return BLK_STS_TARGET;
> +	}
> +	WARN_ON_ONCE(ubq->dev->dev_info.state != UBLK_S_DEV_LIVE);
> +

I'd suggest to add one per-ublk-queue flag for this purpose instead of
device state, then fetching device can be avoided in fast io path, please see
similar example of `->force_abort`.

>  	/* fill iod to slot in io cmd buffer */
>  	res = ublk_setup_iod(ubq, rq);
>  	if (unlikely(res != BLK_STS_OK))
> @@ -1602,7 +1616,15 @@ static void ublk_nosrv_work(struct work_struct *work)
>  	mutex_lock(&ub->mutex);
>  	if (ub->dev_info.state != UBLK_S_DEV_LIVE)
>  		goto unlock;
> -	__ublk_quiesce_dev(ub);
> +
> +	if (ublk_nosrv_should_queue_io(ub)) {

Here ublk_nosrv_should_queue_io() doesn't cover UBLK_F_USER_RECOVERY_REISSUE.

> +		__ublk_quiesce_dev(ub);
> +	} else {
> +		blk_mq_quiesce_queue(ub->ub_disk->queue);
> +		ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
> +		blk_mq_unquiesce_queue(ub->ub_disk->queue);
> +	}

If the above extra device state is saved, the whole change can be simplified,
and __ublk_quiesce_dev() still can be called for
UBLK_F_USER_RECOVERY_NOQUEUE, and UBLK_S_DEV_QUIESCED can cover this new flag,
meantime setting one per-ublk-queue flag, such as, ->fail_io_in_recovery.

> +
>   unlock:
>  	mutex_unlock(&ub->mutex);
>  	ublk_cancel_dev(ub);
> @@ -2351,6 +2373,7 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
>  	case 0:
>  	case UBLK_F_USER_RECOVERY:
>  	case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE):
> +	case UBLK_F_USER_RECOVERY_NOQUEUE:
>  		break;
>  	default:
>  		pr_warn("%s: invalid recovery flags %llx\n", __func__,
> @@ -2682,14 +2705,18 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
>  	 *     and related io_uring ctx is freed so file struct of /dev/ublkcX is
>  	 *     released.
>  	 *
> +	 * and one of the following holds
> +	 *
>  	 * (2) UBLK_S_DEV_QUIESCED is set, which means the quiesce_work:
>  	 *     (a)has quiesced request queue
>  	 *     (b)has requeued every inflight rqs whose io_flags is ACTIVE
>  	 *     (c)has requeued/aborted every inflight rqs whose io_flags is NOT ACTIVE
>  	 *     (d)has completed/camceled all ioucmds owned by ther dying process
> +	 *
> +	 * (3) UBLK_S_DEV_FAIL_IO is set, which means the queue is not
> +	 *     quiesced, but all I/O is being immediately errored
>  	 */
> -	if (test_bit(UB_STATE_OPEN, &ub->state) ||
> -			ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
> +	if (test_bit(UB_STATE_OPEN, &ub->state) || !ublk_dev_in_recoverable_state(ub)) {
>  		ret = -EBUSY;
>  		goto out_unlock;
>  	}
> @@ -2727,18 +2754,22 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
>  	if (ublk_nosrv_should_stop_dev(ub))
>  		goto out_unlock;
>  
> -	if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
> +	if (!ublk_dev_in_recoverable_state(ub)) {
>  		ret = -EBUSY;
>  		goto out_unlock;
>  	}
>  	ub->dev_info.ublksrv_pid = ublksrv_pid;
>  	pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
>  			__func__, ublksrv_pid, header->dev_id);
> -	blk_mq_unquiesce_queue(ub->ub_disk->queue);
> -	pr_devel("%s: queue unquiesced, dev id %d.\n",
> -			__func__, header->dev_id);
> -	blk_mq_kick_requeue_list(ub->ub_disk->queue);
> +
>  	ub->dev_info.state = UBLK_S_DEV_LIVE;
> +	if (ublk_nosrv_should_queue_io(ub)) {
> +		blk_mq_unquiesce_queue(ub->ub_disk->queue);
> +		pr_devel("%s: queue unquiesced, dev id %d.\n",
> +				__func__, header->dev_id);
> +		blk_mq_kick_requeue_list(ub->ub_disk->queue);
> +	}
> +
>  	ret = 0;
>   out_unlock:
>  	mutex_unlock(&ub->mutex);
> diff --git a/include/uapi/linux/ublk_cmd.h b/include/uapi/linux/ublk_cmd.h
> index c8dc5f8ea699..c4512b3a3c52 100644
> --- a/include/uapi/linux/ublk_cmd.h
> +++ b/include/uapi/linux/ublk_cmd.h
> @@ -147,8 +147,18 @@
>   */
>  #define UBLK_F_NEED_GET_DATA (1UL << 2)
>  
> +/*
> + * - Block devices are recoverable if ublk server exits and restarts
> + * - Outstanding I/O when ublk server exits is met with errors
> + * - I/O issued while there is no ublk server queues
> + */
>  #define UBLK_F_USER_RECOVERY	(1UL << 3)
>  
> +/*
> + * - Block devices are recoverable if ublk server exits and restarts
> + * - Outstanding I/O when ublk server exits is reissued
> + * - I/O issued while there is no ublk server queues
> + */
>  #define UBLK_F_USER_RECOVERY_REISSUE	(1UL << 4)
>  
>  /*
> @@ -184,10 +194,18 @@
>   */
>  #define UBLK_F_ZONED (1ULL << 8)
>  
> +/*
> + * - Block devices are recoverable if ublk server exits and restarts
> + * - Outstanding I/O when ublk server exits is met with errors
> + * - I/O issued while there is no ublk server is met with errors
> + */
> +#define UBLK_F_USER_RECOVERY_NOQUEUE (1ULL << 9)

Maybe UBLK_F_USER_RECOVERY_FAIL_IO is more readable?


Thanks,
Ming


  reply	other threads:[~2024-07-02 13:46 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-06-17 19:44 [PATCH 0/4] ublk: support device recovery without I/O queueing Uday Shankar
2024-06-17 19:44 ` [PATCH 1/4] ublk: check recovery flags for validity Uday Shankar
2024-06-18  2:00   ` Ming Lei
2024-07-23 19:32     ` Uday Shankar
2024-06-17 19:44 ` [PATCH 2/4] ublk: refactor recovery configuration flag helpers Uday Shankar
2024-06-18  2:11   ` Ming Lei
2024-06-26 17:22     ` Uday Shankar
2024-06-27  1:17       ` Ming Lei
2024-06-27 17:09         ` Uday Shankar
2024-06-30 13:56           ` Ming Lei
2024-07-01 21:02             ` Uday Shankar
2024-07-02  4:07               ` Ming Lei
2024-07-02 11:09   ` Ming Lei
2024-09-17  0:26     ` Uday Shankar
2024-06-17 19:44 ` [PATCH 3/4] ublk: merge stop_work and quiesce_work Uday Shankar
2024-07-02 13:31   ` Ming Lei
2024-06-17 19:44 ` [PATCH 4/4] ublk: support device recovery without I/O queueing Uday Shankar
2024-07-02 13:46   ` Ming Lei [this message]
2024-09-17  0:29     ` Uday Shankar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZoQEmCuPIq0thaON@fedora \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=ushankar@purestorage.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.