Linux block layer
 help / color / mirror / Atom feed
From: Yu Kuai <yukuai1@huaweicloud.com>
To: Ming Lei <ming.lei@redhat.com>, Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org
Cc: Nilay Shroff <nilay@linux.ibm.com>, "yukuai (C)" <yukuai3@huawei.com>
Subject: Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
Date: Fri, 15 Aug 2025 17:15:14 +0800	[thread overview]
Message-ID: <ff5639d3-9a63-e26c-a062-cb8a23c0ed5d@huaweicloud.com> (raw)
In-Reply-To: <20250815075636.304660-1-ming.lei@redhat.com>

Hi,

在 2025/08/15 15:56, Ming Lei 写道:
> Commit 5989bfe6ac6b ("block: restore two stage elevator switch while
> running nr_hw_queue update") reintroduced a lockdep warning by calling
> blk_mq_freeze_queue_nomemsave() before switching the I/O scheduler.
> 
> The function blk_mq_elv_switch_none() calls elevator_change_done().
> Running this while the queue is frozen causes a lockdep warning.
> 
> Fix this by reordering the operations: first, switch the I/O scheduler
> to 'none', and then freeze the queue. This ensures that elevator_change_done()
> is not called on an already frozen queue. And this way is safe because
> elevator_set_none() does freeze queue before switching to none.
> 
> Also we still have to rely on blk_mq_elv_switch_back() for switching
> back, and it has to cover unfrozen queue case.
> 
> Cc: Nilay Shroff <nilay@linux.ibm.com>
> Cc: Yu Kuai <yukuai3@huawei.com>
> Fixes: 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update")
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>   block/blk-mq.c   | 13 +++++++------
>   block/blk.h      |  2 +-
>   block/elevator.c | 12 +++++++++---
>   3 files changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index b67d6c02eceb..9c62781c6b8c 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -4974,13 +4974,13 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
>    * Switch back to the elevator type stored in the xarray.
>    */
>   static void blk_mq_elv_switch_back(struct request_queue *q,
> -		struct xarray *elv_tbl, struct xarray *et_tbl)
> +		struct xarray *elv_tbl, struct xarray *et_tbl, bool frozen)
>   {
>   	struct elevator_type *e = xa_load(elv_tbl, q->id);
>   	struct elevator_tags *t = xa_load(et_tbl, q->id);
>   
>   	/* The elv_update_nr_hw_queues unfreezes the queue. */
> -	elv_update_nr_hw_queues(q, e, t);
> +	elv_update_nr_hw_queues(q, e, t, frozen);
>   
>   	/* Drop the reference acquired in blk_mq_elv_switch_none. */
>   	if (e)
> @@ -5033,6 +5033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>   	unsigned int memflags;
>   	int i;
>   	struct xarray elv_tbl, et_tbl;
> +	bool queues_frozen = false;
>   
>   	lockdep_assert_held(&set->tag_list_lock);
>   
> @@ -5056,9 +5057,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>   		blk_mq_sysfs_unregister_hctxs(q);
>   	}
>   
> -	list_for_each_entry(q, &set->tag_list, tag_set_list)
> -		blk_mq_freeze_queue_nomemsave(q);
> -
>   	/*
>   	 * Switch IO scheduler to 'none', cleaning up the data associated
>   	 * with the previous scheduler. We will switch back once we are done
> @@ -5068,6 +5066,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>   		if (blk_mq_elv_switch_none(q, &elv_tbl))
>   			goto switch_back;
>   
> +	list_for_each_entry(q, &set->tag_list, tag_set_list)
> +		blk_mq_freeze_queue_nomemsave(q);
> +	queues_frozen = true;
>   	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
>   		goto switch_back;
>   
Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
blk_mq_elv_switch_none(), after elevator is succeed switching to none
then freeze the queue.

Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
elevator_type, related queue is already freezed.

Thanks,
Kuai

> @@ -5092,7 +5093,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>   switch_back:
>   	/* The blk_mq_elv_switch_back unfreezes queue for us. */
>   	list_for_each_entry(q, &set->tag_list, tag_set_list)
> -		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
> +		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl, queues_frozen);
>   
>   	list_for_each_entry(q, &set->tag_list, tag_set_list) {
>   		blk_mq_sysfs_register_hctxs(q);
> diff --git a/block/blk.h b/block/blk.h
> index 0a2eccf28ca4..601db258c00d 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -332,7 +332,7 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
>   bool blk_insert_flush(struct request *rq);
>   
>   void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
> -		struct elevator_tags *t);
> +		struct elevator_tags *t, bool frozen);
>   void elevator_set_default(struct request_queue *q);
>   void elevator_set_none(struct request_queue *q);
>   
> diff --git a/block/elevator.c b/block/elevator.c
> index fe96c6f4753c..0644b2d35ecb 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -706,24 +706,30 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
>    * reattachment when nr_hw_queues changes.
>    */
>   void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
> -		struct elevator_tags *t)
> +		struct elevator_tags *t, bool frozen)
>   {
>   	struct blk_mq_tag_set *set = q->tag_set;
>   	struct elv_change_ctx ctx = {};
>   	int ret = -ENODEV;
>   
> -	WARN_ON_ONCE(q->mq_freeze_depth == 0);
> +	WARN_ON_ONCE(frozen == (q->mq_freeze_depth == 0));
>   
>   	if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
>   		ctx.name = e->elevator_name;
>   		ctx.et = t;
>   
> +		/* elevator switch requires queue to be frozen */
> +		if (!frozen) {
> +			blk_mq_freeze_queue_nomemsave(q);
> +			frozen = true;
> +		}
>   		mutex_lock(&q->elevator_lock);
>   		/* force to reattach elevator after nr_hw_queue is updated */
>   		ret = elevator_switch(q, &ctx);
>   		mutex_unlock(&q->elevator_lock);
>   	}
> -	blk_mq_unfreeze_queue_nomemrestore(q);
> +	if (frozen)
> +		blk_mq_unfreeze_queue_nomemrestore(q);
>   	if (!ret)
>   		WARN_ON_ONCE(elevator_change_done(q, &ctx));
>   	/*
> 


  reply	other threads:[~2025-08-15  9:15 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-15  7:56 [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues Ming Lei
2025-08-15  9:15 ` Yu Kuai [this message]
2025-08-15  9:34   ` Yu Kuai
2025-08-15  9:38     ` Ming Lei
2025-08-15 10:06       ` Nilay Shroff
2025-08-15 12:58         ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ff5639d3-9a63-e26c-a062-cb8a23c0ed5d@huaweicloud.com \
    --to=yukuai1@huaweicloud.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=nilay@linux.ibm.com \
    --cc=yukuai3@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox