All of lore.kernel.org
 help / color / mirror / Atom feed
From: Uday Shankar <ushankar@purestorage.com>
To: Ming Lei <ming.lei@redhat.com>
Cc: Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org,
	Caleb Sander Mateos <csander@purestorage.com>
Subject: Re: [PATCH 5/9] ublk: move device reset into ublk_ch_release()
Date: Mon, 14 Apr 2025 14:29:48 -0600	[thread overview]
Message-ID: <Z/1wPCiGOlFgcrpq@dev-ushankar.dev.purestorage.com> (raw)
In-Reply-To: <20250414112554.3025113-6-ming.lei@redhat.com>

On Mon, Apr 14, 2025 at 07:25:46PM +0800, Ming Lei wrote:
> ublk_ch_release() is called after ublk char device is closed, when all
> uring_cmd are done, so it is perfect fine to move ublk device reset to
> ublk_ch_release() from ublk_ctrl_start_recovery().
> 
> This way can avoid to grab the exiting daemon task_struct too long.

Nice, I had noticed this leak too, where we keep the task struct ref
until the new daemon comes around. Thanks for the fix!

> 
> However, reset of the following ublk IO flags has to be moved until ublk
> io_uring queues are ready:
> 
> - ubq->canceling
> 
> For requeuing IO in case of ublk_nosrv_dev_should_queue_io() before device
> is recovered
> 
> - ubq->fail_io
> 
> For failing IO in case of UBLK_F_USER_RECOVERY_FAIL_IO before device is
> recovered
> 
> - ublk_io->flags
> 
> For preventing using io->cmd
> 
> With this way, recovery is simplified a lot.
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>  drivers/block/ublk_drv.c | 121 +++++++++++++++++++++++----------------
>  1 file changed, 72 insertions(+), 49 deletions(-)
> 
> diff --git a/drivers/block/ublk_drv.c b/drivers/block/ublk_drv.c
> index e0213222e3cf..b68bd4172fa8 100644
> --- a/drivers/block/ublk_drv.c
> +++ b/drivers/block/ublk_drv.c
> @@ -1074,7 +1074,7 @@ static inline struct ublk_uring_cmd_pdu *ublk_get_uring_cmd_pdu(
>  
>  static inline bool ubq_daemon_is_dying(struct ublk_queue *ubq)
>  {
> -	return ubq->ubq_daemon->flags & PF_EXITING;
> +	return !ubq->ubq_daemon || ubq->ubq_daemon->flags & PF_EXITING;
>  }
>  
>  /* todo: handle partial completion */
> @@ -1470,6 +1470,37 @@ static const struct blk_mq_ops ublk_mq_ops = {
>  	.timeout	= ublk_timeout,
>  };
>  
> +static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
> +{
> +	int i;
> +
> +	/* All old ioucmds have to be completed */
> +	ubq->nr_io_ready = 0;
> +
> +	/*
> +	 * old daemon is PF_EXITING, put it now
> +	 *
> +	 * It could be NULL in case of closing one quisced device.
> +	 */
> +	if (ubq->ubq_daemon)
> +		put_task_struct(ubq->ubq_daemon);
> +	/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
> +	ubq->ubq_daemon = NULL;
> +	ubq->timeout = false;
> +
> +	for (i = 0; i < ubq->q_depth; i++) {
> +		struct ublk_io *io = &ubq->ios[i];
> +
> +		/*
> +		 * UBLK_IO_FLAG_CANCELED is kept for avoiding to touch
> +		 * io->cmd
> +		 */
> +		io->flags &= UBLK_IO_FLAG_CANCELED;
> +		io->cmd = NULL;
> +		io->addr = 0;
> +	}
> +}
> +
>  static int ublk_ch_open(struct inode *inode, struct file *filp)
>  {
>  	struct ublk_device *ub = container_of(inode->i_cdev,
> @@ -1481,10 +1512,26 @@ static int ublk_ch_open(struct inode *inode, struct file *filp)
>  	return 0;
>  }
>  
> +static void ublk_reset_ch_dev(struct ublk_device *ub)
> +{
> +	int i;
> +
> +	for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
> +		ublk_queue_reinit(ub, ublk_get_queue(ub, i));
> +
> +	/* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
> +	ub->mm = NULL;
> +	ub->nr_queues_ready = 0;
> +	ub->nr_privileged_daemon = 0;
> +}
> +
>  static int ublk_ch_release(struct inode *inode, struct file *filp)
>  {
>  	struct ublk_device *ub = filp->private_data;
>  
> +	/* all uring_cmd has been done now, reset device & ubq */
> +	ublk_reset_ch_dev(ub);
> +
>  	clear_bit(UB_STATE_OPEN, &ub->state);
>  	return 0;
>  }
> @@ -1831,6 +1878,24 @@ static void ublk_nosrv_work(struct work_struct *work)
>  	ublk_cancel_dev(ub);
>  }
>  
> +/* reset ublk io_uring queue & io flags */
> +static void ublk_reset_io_flags(struct ublk_device *ub)
> +{
> +	int i, j;
> +
> +	for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
> +		struct ublk_queue *ubq = ublk_get_queue(ub, i);
> +
> +		/* UBLK_IO_FLAG_CANCELED can be cleared now */
> +		spin_lock(&ubq->cancel_lock);

Do we need this? I think at this point there shouldn't be any concurrent
activity we need to protect against.

> +		for (j = 0; j < ubq->q_depth; j++)
> +			ubq->ios[j].flags &= ~UBLK_IO_FLAG_CANCELED;
> +		spin_unlock(&ubq->cancel_lock);
> +		ubq->canceling = false;
> +		ubq->fail_io = false;
> +	}
> +}
> +
>  /* device can only be started after all IOs are ready */
>  static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
>  	__must_hold(&ub->mutex)
> @@ -1844,8 +1909,12 @@ static void ublk_mark_io_ready(struct ublk_device *ub, struct ublk_queue *ubq)
>  		if (capable(CAP_SYS_ADMIN))
>  			ub->nr_privileged_daemon++;
>  	}
> -	if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues)
> +
> +	if (ub->nr_queues_ready == ub->dev_info.nr_hw_queues) {
> +		/* now we are ready for handling ublk io request */
> +		ublk_reset_io_flags(ub);
>  		complete_all(&ub->completion);
> +	}
>  }
>  
>  static void ublk_handle_need_get_data(struct ublk_device *ub, int q_id,
> @@ -2943,41 +3012,14 @@ static int ublk_ctrl_set_params(struct ublk_device *ub,
>  	return ret;
>  }
>  
> -static void ublk_queue_reinit(struct ublk_device *ub, struct ublk_queue *ubq)
> -{
> -	int i;
> -
> -	WARN_ON_ONCE(!(ubq->ubq_daemon && ubq_daemon_is_dying(ubq)));
> -
> -	/* All old ioucmds have to be completed */
> -	ubq->nr_io_ready = 0;
> -	/* old daemon is PF_EXITING, put it now */
> -	put_task_struct(ubq->ubq_daemon);
> -	/* We have to reset it to NULL, otherwise ub won't accept new FETCH_REQ */
> -	ubq->ubq_daemon = NULL;
> -	ubq->timeout = false;
> -
> -	for (i = 0; i < ubq->q_depth; i++) {
> -		struct ublk_io *io = &ubq->ios[i];
> -
> -		/* forget everything now and be ready for new FETCH_REQ */
> -		io->flags = 0;
> -		io->cmd = NULL;
> -		io->addr = 0;
> -	}
> -}
> -
>  static int ublk_ctrl_start_recovery(struct ublk_device *ub,
>  		const struct ublksrv_ctrl_cmd *header)
>  {
>  	int ret = -EINVAL;
> -	int i;
>  
>  	mutex_lock(&ub->mutex);
>  	if (ublk_nosrv_should_stop_dev(ub))
>  		goto out_unlock;
> -	if (!ub->nr_queues_ready)
> -		goto out_unlock;
>  	/*
>  	 * START_RECOVERY is only allowd after:
>  	 *
> @@ -3001,12 +3043,6 @@ static int ublk_ctrl_start_recovery(struct ublk_device *ub,
>  		goto out_unlock;
>  	}
>  	pr_devel("%s: start recovery for dev id %d.\n", __func__, header->dev_id);
> -	for (i = 0; i < ub->dev_info.nr_hw_queues; i++)
> -		ublk_queue_reinit(ub, ublk_get_queue(ub, i));
> -	/* set to NULL, otherwise new ubq_daemon cannot mmap the io_cmd_buf */
> -	ub->mm = NULL;
> -	ub->nr_queues_ready = 0;
> -	ub->nr_privileged_daemon = 0;
>  	init_completion(&ub->completion);
>  	ret = 0;
>   out_unlock:
> @@ -3019,7 +3055,6 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
>  {
>  	int ublksrv_pid = (int)header->data[0];
>  	int ret = -EINVAL;
> -	int i;
>  
>  	pr_devel("%s: Waiting for new ubq_daemons(nr: %d) are ready, dev id %d...\n",
>  			__func__, ub->dev_info.nr_hw_queues, header->dev_id);
> @@ -3039,22 +3074,10 @@ static int ublk_ctrl_end_recovery(struct ublk_device *ub,
>  		goto out_unlock;
>  	}
>  	ub->dev_info.ublksrv_pid = ublksrv_pid;
> +	ub->dev_info.state = UBLK_S_DEV_LIVE;
>  	pr_devel("%s: new ublksrv_pid %d, dev id %d\n",
>  			__func__, ublksrv_pid, header->dev_id);
> -
> -	blk_mq_quiesce_queue(ub->ub_disk->queue);
> -	ub->dev_info.state = UBLK_S_DEV_LIVE;
> -	for (i = 0; i < ub->dev_info.nr_hw_queues; i++) {
> -		struct ublk_queue *ubq = ublk_get_queue(ub, i);
> -
> -		ubq->canceling = false;
> -		ubq->fail_io = false;
> -	}
> -	blk_mq_unquiesce_queue(ub->ub_disk->queue);
> -	pr_devel("%s: queue unquiesced, dev id %d.\n",
> -			__func__, header->dev_id);
>  	blk_mq_kick_requeue_list(ub->ub_disk->queue);
> -
>  	ret = 0;
>   out_unlock:
>  	mutex_unlock(&ub->mutex);
> -- 
> 2.47.0
> 

  reply	other threads:[~2025-04-14 20:29 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-04-14 11:25 [PATCH 0/9] ublk: simplify & improve IO canceling Ming Lei
2025-04-14 11:25 ` [PATCH 1/9] ublk: don't try to stop disk if ->ub_disk is NULL Ming Lei
2025-04-14 19:44   ` Uday Shankar
2025-04-15  1:32     ` Ming Lei
2025-04-14 11:25 ` [PATCH 2/9] ublk: properly serialize all FETCH_REQs Ming Lei
2025-04-14 19:58   ` Uday Shankar
2025-04-14 20:39     ` Jens Axboe
2025-04-14 20:52       ` Uday Shankar
2025-04-14 21:00         ` Jens Axboe
2025-04-15  1:40         ` Ming Lei
2025-04-16  1:13       ` Ming Lei
2025-04-16  1:17         ` Jens Axboe
2025-04-16  2:04           ` Ming Lei
2025-04-16  1:04     ` Uday Shankar
2025-04-14 11:25 ` [PATCH 3/9] ublk: add ublk_force_abort_dev() Ming Lei
2025-04-14 20:06   ` Uday Shankar
2025-04-14 11:25 ` [PATCH 4/9] ublk: rely on ->canceling for dealing with ublk_nosrv_dev_should_queue_io Ming Lei
2025-04-14 20:15   ` Uday Shankar
2025-04-15  1:48     ` Ming Lei
2025-04-14 11:25 ` [PATCH 5/9] ublk: move device reset into ublk_ch_release() Ming Lei
2025-04-14 20:29   ` Uday Shankar [this message]
2025-04-15  1:50     ` Ming Lei
2025-04-14 11:25 ` [PATCH 6/9] ublk: improve detection and handling of ublk server exit Ming Lei
2025-04-14 20:36   ` Uday Shankar
2025-04-15  1:54     ` Ming Lei
2025-04-14 11:25 ` [PATCH 7/9] ublk: remove __ublk_quiesce_dev() Ming Lei
2025-04-14 20:37   ` Uday Shankar
2025-04-14 11:25 ` [PATCH 8/9] ublk: simplify aborting ublk request Ming Lei
2025-04-14 20:42   ` Uday Shankar
2025-04-14 11:25 ` [PATCH 9/9] selftests: ublk: add generic_06 for covering fault inject Ming Lei
2025-04-14 20:44   ` Uday Shankar
2025-04-15  1:57     ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z/1wPCiGOlFgcrpq@dev-ushankar.dev.purestorage.com \
    --to=ushankar@purestorage.com \
    --cc=axboe@kernel.dk \
    --cc=csander@purestorage.com \
    --cc=linux-block@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.