From: Ming Lei <ming.lei@redhat.com>
To: Uday Shankar <ushankar@purestorage.com>
Cc: Jens Axboe <axboe@kernel.dk>, linux-block@vger.kernel.org
Subject: Re: [PATCH v2 4/4] ublk: support device recovery without I/O queueing
Date: Wed, 25 Sep 2024 18:58:30 +0800 [thread overview]
Message-ID: <ZvPs1h1byAkpYMGC@fedora> (raw)
In-Reply-To: <20240917002155.2044225-5-ushankar@purestorage.com>
On Mon, Sep 16, 2024 at 06:21:55PM -0600, Uday Shankar wrote:
> ublk currently supports the following behaviors on ublk server exit:
>
> A: outstanding I/Os get errors, subsequently issued I/Os get errors
> B: outstanding I/Os get errors, subsequently issued I/Os queue
> C: outstanding I/Os get reissued, subsequently issued I/Os queue
>
> and the following behaviors for recovery of preexisting block devices by
> a future incarnation of the ublk server:
>
> 1: ublk devices stopped on ublk server exit (no recovery possible)
> 2: ublk devices are recoverable using start/end_recovery commands
>
> The userspace interface allows selection of combinations of these
> behaviors using flags specified at device creation time, namely:
>
> default behavior: A + 1
> UBLK_F_USER_RECOVERY: B + 2
> UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_REISSUE: C + 2
>
> The behavior A + 2 is currently unsupported. Add support for this
> behavior under the new flag combination
> UBLK_F_USER_RECOVERY|UBLK_F_USER_RECOVERY_FAIL_IO.
>
> Signed-off-by: Uday Shankar <ushankar@purestorage.com>
> ---
> Changes since v1 (https://lore.kernel.org/linux-block/20240617194451.435445-5-ushankar@purestorage.com/):
> - Change flag name from UBLK_F_USER_RECOVERY_NOQUEUE to
> UBLK_F_USER_RECOVERY_FAIL_IO
> - Require UBLK_F_USER_RECOVERY to be set along with the new flag for it
> to be effective. This makes more sense, as UBLK_F_USER_RECOVERY
> essentially selects behavior 2 above (and not setting
> UBLK_F_USER_RECOVERY selects behavior 1).
> - Add per-ublk-queue flag which is true iff device state is
> UBLK_S_DEV_FAIL_IO. This lets us avoid fetching the device in the fast
> path.
>
> drivers/block/ublk_drv.c | 75 ++++++++++++++++++++++++++++-------
> include/uapi/linux/ublk_cmd.h | 18 +++++++++
> 2 files changed, 79 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index c7a0493b3545..548043eeefb9 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -60,10 +60,12 @@
> | UBLK_F_UNPRIVILEGED_DEV \
> | UBLK_F_CMD_IOCTL_ENCODE \
> | UBLK_F_USER_COPY \
> - | UBLK_F_ZONED)
> + | UBLK_F_ZONED \
> + | UBLK_F_USER_RECOVERY_FAIL_IO)
>
> #define UBLK_F_ALL_RECOVERY_FLAGS (UBLK_F_USER_RECOVERY \
> - | UBLK_F_USER_RECOVERY_REISSUE)
> + | UBLK_F_USER_RECOVERY_REISSUE \
> + | UBLK_F_USER_RECOVERY_FAIL_IO)
>
> /* All UBLK_PARAM_TYPE_* should be included here */
> #define UBLK_PARAM_TYPE_ALL \
> @@ -146,6 +148,7 @@ struct ublk_queue {
> bool force_abort;
> bool timeout;
> bool canceling;
> + bool fail_io; /* copy of dev->state == UBLK_S_DEV_FAIL_IO */
> unsigned short nr_io_ready; /* how many ios setup */
> spinlock_t cancel_lock;
> struct ublk_device *dev;
> @@ -690,7 +693,8 @@ static inline bool ublk_nosrv_should_reissue_outstanding(struct ublk_device *ub)
> */
> static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub)
> {
> - return ub->dev_info.flags & UBLK_F_USER_RECOVERY;
> + return (ub->dev_info.flags & UBLK_F_USER_RECOVERY) &&
> + !(ub->dev_info.flags & UBLK_F_USER_RECOVERY_FAIL_IO);
> }
>
> /*
> @@ -700,7 +704,8 @@ static inline bool ublk_nosrv_dev_should_queue_io(struct ublk_device *ub)
> */
> static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq)
> {
> - return ubq->flags & UBLK_F_USER_RECOVERY;
> + return (ubq->flags & UBLK_F_USER_RECOVERY) &&
> + !(ubq->flags & UBLK_F_USER_RECOVERY_FAIL_IO);
> }
>
> /*
> @@ -712,7 +717,14 @@ static inline bool ublk_nosrv_should_queue_io(struct ublk_queue *ubq)
> static inline bool ublk_nosrv_should_stop_dev(struct ublk_device *ub)
> {
> return (!(ub->dev_info.flags & UBLK_F_USER_RECOVERY)) &&
> - (!(ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE));
> + (!(ub->dev_info.flags & UBLK_F_USER_RECOVERY_REISSUE)) &&
> + (!(ub->dev_info.flags & UBLK_F_USER_RECOVERY_FAIL_IO));
> +}
> +
> +static inline bool ublk_dev_in_recoverable_state(struct ublk_device *ub)
> +{
> + return ub->dev_info.state == UBLK_S_DEV_QUIESCED ||
> + ub->dev_info.state == UBLK_S_DEV_FAIL_IO;
> }
>
> static void ublk_free_disk(struct gendisk *disk)
> @@ -1276,6 +1288,10 @@ static blk_status_t ublk_queue_rq(struct blk_mq_hw_ctx *hctx,
> struct request *rq = bd->rq;
> blk_status_t res;
>
> + if (unlikely(ubq->fail_io)) {
> + return BLK_STS_TARGET;
> + }
> +
> /* fill iod to slot in io cmd buffer */
> res = ublk_setup_iod(ubq, rq);
> if (unlikely(res != BLK_STS_OK))
> @@ -1626,6 +1642,7 @@ static void ublk_nosrv_work(struct work_struct *work)
> {
> struct ublk_device *ub =
> container_of(work, struct ublk_device, nosrv_work);
> + int i;
>
> if (ublk_nosrv_should_stop_dev(ub)) {
> ublk_stop_dev(ub);
> @@ -1635,7 +1652,18 @@ static void ublk_nosrv_work(struct work_struct *work)
> mutex_lock(&ub->mutex);
> if (ub->dev_info.state != UBLK_S_DEV_LIVE)
> goto unlock;
> - __ublk_quiesce_dev(ub);
> +
> + if (ublk_nosrv_dev_should_queue_io(ub)) {
> + __ublk_quiesce_dev(ub);
> + } else {
> + blk_mq_quiesce_queue(ub->ub_disk->queue);
> + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
> + ublk_get_queue(ub, i)->fail_io = true;
> + }
> + blk_mq_unquiesce_queue(ub->ub_disk->queue);
> + ub->dev_info.state = UBLK_S_DEV_FAIL_IO;
> + }
> +
> unlock:
> mutex_unlock(&ub->mutex);
> ublk_cancel_dev(ub);
> @@ -2389,8 +2417,13 @@ static int ublk_ctrl_add_dev(struct io_uring_cmd *cmd)
> return -EPERM;
>
> /* forbid nonsense combinations of recovery flags */
> - if ((info.flags & UBLK_F_USER_RECOVERY_REISSUE) &&
> - !(info.flags & UBLK_F_USER_RECOVERY)) {
> + switch (info.flags & UBLK_F_ALL_RECOVERY_FLAGS) {
> + case 0:
> + case UBLK_F_USER_RECOVERY:
> + case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE):
> + case (UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO):
> + break;
> + default:
> pr_warn("%s: invalid recovery flags %llx\n", __func__,
> info.flags & UBLK_F_ALL_RECOVERY_FLAGS);
> return -EINVAL;
> @@ -2722,14 +2755,18 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
> * and related io_uring ctx is freed so file struct of /dev/ublkcX is
> * released.
> *
> + * and one of the following holds
> + *
> * (2) UBLK_S_DEV_QUIESCED is set, which means the quiesce_work:
> * (a)has quiesced request queue
> * (b)has requeued every inflight rqs whose io_flags is ACTIVE
> * (c)has requeued/aborted every inflight rqs whose io_flags is NOT ACTIVE
> * (d)has completed/camceled all ioucmds owned by ther dying process
> + *
> + * (3) UBLK_S_DEV_FAIL_IO is set, which means the queue is not
> + * quiesced, but all I/O is being immediately errored
> */
> - if (test_bit(UB_STATE_OPEN, &ub->state) ||
> - ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
> + if (test_bit(UB_STATE_OPEN, &ub->state) || !ublk_dev_in_recoverable_state(ub)) {
> ret = -EBUSY;
> goto out_unlock;
> }
> @@ -2753,6 +2790,7 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
> const struct ublksrv_ctrl_cmd *header = io_uring_sqe_cmd(cmd->sqe);
> int ublksrv_pid = (int)header->data[0];
> int ret = -EINVAL;
> + int i;
>
> pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
> __func__, ub->dev_info.nr_hw_queues, header->dev_id);
> @@ -2767,18 +2805,27 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
> if (ublk_nosrv_should_stop_dev(ub))
> goto out_unlock;
>
> - if (ub->dev_info.state != UBLK_S_DEV_QUIESCED) {
> + if (!ublk_dev_in_recoverable_state(ub)) {
> ret = -EBUSY;
> goto out_unlock;
> }
> ub->dev_info.ublksrv_pid = ublksrv_pid;
> pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
> __func__, ublksrv_pid, header->dev_id);
> +
> + blk_mq_quiesce_queue(ub->ub_disk->queue);
> + for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
> + ublk_get_queue(ub, i)->fail_io = false;
> + }
> blk_mq_unquiesce_queue(ub->ub_disk->queue);
> - pr_devel("%s: queue unquiesced, dev id %d.\n",
> - __func__, header->dev_id);
> - blk_mq_kick_requeue_list(ub->ub_disk->queue);
> ub->dev_info.state = UBLK_S_DEV_LIVE;
> + if (ublk_nosrv_dev_should_queue_io(ub)) {
> + blk_mq_unquiesce_queue(ub->ub_disk->queue);
> + pr_devel("%s: queue unquiesced, dev id %d.\n",
> + __func__, header->dev_id);
> + blk_mq_kick_requeue_list(ub->ub_disk->queue);
> + }
I'd suggest to change the above into the following:
if (ublk_nosrv_dev_should_queue_io(ub)) {
ub->dev_info.state = UBLK_S_DEV_LIVE;
blk_mq_unquiesce_queue(ub->ub_disk->queue);
pr_devel("%s: queue unquiesced, dev id %d.\n",
__func__, header->dev_id);
blk_mq_kick_requeue_list(ub->ub_disk->queue);
} else {
blk_mq_quiesce_queue(ub->ub_disk->queue);
ub->dev_info.state = UBLK_S_DEV_LIVE;
for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
ublk_get_queue(ub, i)->fail_io = false;
blk_mq_unquiesce_queue(ub->ub_disk->queue);
}
- one more quiesce is avoided
- ub->dev_info.state is only updated if request queue is quiesced.
Otherwise, this patch looks fine.
Thanks,
Ming
prev parent reply other threads:[~2024-09-25 10:58 UTC|newest]
Thread overview: 8+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-17 0:21 [PATCH v2 0/4] ublk: support device recovery without I/O queueing Uday Shankar
2024-09-17 0:21 ` [PATCH v2 1/4] ublk: check recovery flags for validity Uday Shankar
2024-09-25 3:43 ` Ming Lei
2024-09-17 0:21 ` [PATCH v2 2/4] ublk: refactor recovery configuration flag helpers Uday Shankar
2024-09-25 3:54 ` Ming Lei
2024-09-17 0:21 ` [PATCH v2 3/4] ublk: merge stop_work and quiesce_work Uday Shankar
2024-09-17 0:21 ` [PATCH v2 4/4] ublk: support device recovery without I/O queueing Uday Shankar
2024-09-25 10:58 ` Ming Lei [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZvPs1h1byAkpYMGC@fedora \
--to=ming.lei@redhat.com \
--cc=axboe@kernel.dk \
--cc=linux-block@vger.kernel.org \
--cc=ushankar@purestorage.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.