Linux block layer
 help / color / mirror / Atom feed
* [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
@ 2025-08-15  7:56 Ming Lei
  2025-08-15  9:15 ` Yu Kuai
  0 siblings, 1 reply; 6+ messages in thread
From: Ming Lei @ 2025-08-15  7:56 UTC (permalink / raw)
  To: Jens Axboe, linux-block; +Cc: Ming Lei, Nilay Shroff, Yu Kuai

Commit 5989bfe6ac6b ("block: restore two stage elevator switch while
running nr_hw_queue update") reintroduced a lockdep warning by calling
blk_mq_freeze_queue_nomemsave() before switching the I/O scheduler.

The function blk_mq_elv_switch_none() calls elevator_change_done().
Running this while the queue is frozen causes a lockdep warning.

Fix this by reordering the operations: first, switch the I/O scheduler
to 'none', and then freeze the queue. This ensures that elevator_change_done()
is not called on an already frozen queue. And this way is safe because
elevator_set_none() does freeze queue before switching to none.

Also we still have to rely on blk_mq_elv_switch_back() for switching
back, and it has to cover unfrozen queue case.

Cc: Nilay Shroff <nilay@linux.ibm.com>
Cc: Yu Kuai <yukuai3@huawei.com>
Fixes: 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update")
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 block/blk-mq.c   | 13 +++++++------
 block/blk.h      |  2 +-
 block/elevator.c | 12 +++++++++---
 3 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index b67d6c02eceb..9c62781c6b8c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4974,13 +4974,13 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  * Switch back to the elevator type stored in the xarray.
  */
 static void blk_mq_elv_switch_back(struct request_queue *q,
-		struct xarray *elv_tbl, struct xarray *et_tbl)
+		struct xarray *elv_tbl, struct xarray *et_tbl, bool frozen)
 {
 	struct elevator_type *e = xa_load(elv_tbl, q->id);
 	struct elevator_tags *t = xa_load(et_tbl, q->id);
 
 	/* The elv_update_nr_hw_queues unfreezes the queue. */
-	elv_update_nr_hw_queues(q, e, t);
+	elv_update_nr_hw_queues(q, e, t, frozen);
 
 	/* Drop the reference acquired in blk_mq_elv_switch_none. */
 	if (e)
@@ -5033,6 +5033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	unsigned int memflags;
 	int i;
 	struct xarray elv_tbl, et_tbl;
+	bool queues_frozen = false;
 
 	lockdep_assert_held(&set->tag_list_lock);
 
@@ -5056,9 +5057,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_sysfs_unregister_hctxs(q);
 	}
 
-	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_freeze_queue_nomemsave(q);
-
 	/*
 	 * Switch IO scheduler to 'none', cleaning up the data associated
 	 * with the previous scheduler. We will switch back once we are done
@@ -5068,6 +5066,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		if (blk_mq_elv_switch_none(q, &elv_tbl))
 			goto switch_back;
 
+	list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_freeze_queue_nomemsave(q);
+	queues_frozen = true;
 	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
 		goto switch_back;
 
@@ -5092,7 +5093,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 switch_back:
 	/* The blk_mq_elv_switch_back unfreezes queue for us. */
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
+		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl, queues_frozen);
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_sysfs_register_hctxs(q);
diff --git a/block/blk.h b/block/blk.h
index 0a2eccf28ca4..601db258c00d 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -332,7 +332,7 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
 bool blk_insert_flush(struct request *rq);
 
 void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
-		struct elevator_tags *t);
+		struct elevator_tags *t, bool frozen);
 void elevator_set_default(struct request_queue *q);
 void elevator_set_none(struct request_queue *q);
 
diff --git a/block/elevator.c b/block/elevator.c
index fe96c6f4753c..0644b2d35ecb 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -706,24 +706,30 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
  * reattachment when nr_hw_queues changes.
  */
 void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
-		struct elevator_tags *t)
+		struct elevator_tags *t, bool frozen)
 {
 	struct blk_mq_tag_set *set = q->tag_set;
 	struct elv_change_ctx ctx = {};
 	int ret = -ENODEV;
 
-	WARN_ON_ONCE(q->mq_freeze_depth == 0);
+	WARN_ON_ONCE(frozen == (q->mq_freeze_depth == 0));
 
 	if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
 		ctx.name = e->elevator_name;
 		ctx.et = t;
 
+		/* elevator switch requires queue to be frozen */
+		if (!frozen) {
+			blk_mq_freeze_queue_nomemsave(q);
+			frozen = true;
+		}
 		mutex_lock(&q->elevator_lock);
 		/* force to reattach elevator after nr_hw_queue is updated */
 		ret = elevator_switch(q, &ctx);
 		mutex_unlock(&q->elevator_lock);
 	}
-	blk_mq_unfreeze_queue_nomemrestore(q);
+	if (frozen)
+		blk_mq_unfreeze_queue_nomemrestore(q);
 	if (!ret)
 		WARN_ON_ONCE(elevator_change_done(q, &ctx));
 	/*
-- 
2.50.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
  2025-08-15  7:56 [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues Ming Lei
@ 2025-08-15  9:15 ` Yu Kuai
  2025-08-15  9:34   ` Yu Kuai
  0 siblings, 1 reply; 6+ messages in thread
From: Yu Kuai @ 2025-08-15  9:15 UTC (permalink / raw)
  To: Ming Lei, Jens Axboe, linux-block; +Cc: Nilay Shroff, yukuai (C)

Hi,

在 2025/08/15 15:56, Ming Lei 写道:
> Commit 5989bfe6ac6b ("block: restore two stage elevator switch while
> running nr_hw_queue update") reintroduced a lockdep warning by calling
> blk_mq_freeze_queue_nomemsave() before switching the I/O scheduler.
> 
> The function blk_mq_elv_switch_none() calls elevator_change_done().
> Running this while the queue is frozen causes a lockdep warning.
> 
> Fix this by reordering the operations: first, switch the I/O scheduler
> to 'none', and then freeze the queue. This ensures that elevator_change_done()
> is not called on an already frozen queue. And this way is safe because
> elevator_set_none() does freeze queue before switching to none.
> 
> Also we still have to rely on blk_mq_elv_switch_back() for switching
> back, and it has to cover unfrozen queue case.
> 
> Cc: Nilay Shroff <nilay@linux.ibm.com>
> Cc: Yu Kuai <yukuai3@huawei.com>
> Fixes: 5989bfe6ac6b ("block: restore two stage elevator switch while running nr_hw_queue update")
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
>   block/blk-mq.c   | 13 +++++++------
>   block/blk.h      |  2 +-
>   block/elevator.c | 12 +++++++++---
>   3 files changed, 17 insertions(+), 10 deletions(-)
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index b67d6c02eceb..9c62781c6b8c 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -4974,13 +4974,13 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
>    * Switch back to the elevator type stored in the xarray.
>    */
>   static void blk_mq_elv_switch_back(struct request_queue *q,
> -		struct xarray *elv_tbl, struct xarray *et_tbl)
> +		struct xarray *elv_tbl, struct xarray *et_tbl, bool frozen)
>   {
>   	struct elevator_type *e = xa_load(elv_tbl, q->id);
>   	struct elevator_tags *t = xa_load(et_tbl, q->id);
>   
>   	/* The elv_update_nr_hw_queues unfreezes the queue. */
> -	elv_update_nr_hw_queues(q, e, t);
> +	elv_update_nr_hw_queues(q, e, t, frozen);
>   
>   	/* Drop the reference acquired in blk_mq_elv_switch_none. */
>   	if (e)
> @@ -5033,6 +5033,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>   	unsigned int memflags;
>   	int i;
>   	struct xarray elv_tbl, et_tbl;
> +	bool queues_frozen = false;
>   
>   	lockdep_assert_held(&set->tag_list_lock);
>   
> @@ -5056,9 +5057,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>   		blk_mq_sysfs_unregister_hctxs(q);
>   	}
>   
> -	list_for_each_entry(q, &set->tag_list, tag_set_list)
> -		blk_mq_freeze_queue_nomemsave(q);
> -
>   	/*
>   	 * Switch IO scheduler to 'none', cleaning up the data associated
>   	 * with the previous scheduler. We will switch back once we are done
> @@ -5068,6 +5066,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>   		if (blk_mq_elv_switch_none(q, &elv_tbl))
>   			goto switch_back;
>   
> +	list_for_each_entry(q, &set->tag_list, tag_set_list)
> +		blk_mq_freeze_queue_nomemsave(q);
> +	queues_frozen = true;
>   	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
>   		goto switch_back;
>   
Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
blk_mq_elv_switch_none(), after elevator is succeed switching to none
then freeze the queue.

Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
elevator_type, related queue is already freezed.

Thanks,
Kuai

> @@ -5092,7 +5093,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
>   switch_back:
>   	/* The blk_mq_elv_switch_back unfreezes queue for us. */
>   	list_for_each_entry(q, &set->tag_list, tag_set_list)
> -		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
> +		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl, queues_frozen);
>   
>   	list_for_each_entry(q, &set->tag_list, tag_set_list) {
>   		blk_mq_sysfs_register_hctxs(q);
> diff --git a/block/blk.h b/block/blk.h
> index 0a2eccf28ca4..601db258c00d 100644
> --- a/block/blk.h
> +++ b/block/blk.h
> @@ -332,7 +332,7 @@ bool blk_bio_list_merge(struct request_queue *q, struct list_head *list,
>   bool blk_insert_flush(struct request *rq);
>   
>   void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
> -		struct elevator_tags *t);
> +		struct elevator_tags *t, bool frozen);
>   void elevator_set_default(struct request_queue *q);
>   void elevator_set_none(struct request_queue *q);
>   
> diff --git a/block/elevator.c b/block/elevator.c
> index fe96c6f4753c..0644b2d35ecb 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -706,24 +706,30 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
>    * reattachment when nr_hw_queues changes.
>    */
>   void elv_update_nr_hw_queues(struct request_queue *q, struct elevator_type *e,
> -		struct elevator_tags *t)
> +		struct elevator_tags *t, bool frozen)
>   {
>   	struct blk_mq_tag_set *set = q->tag_set;
>   	struct elv_change_ctx ctx = {};
>   	int ret = -ENODEV;
>   
> -	WARN_ON_ONCE(q->mq_freeze_depth == 0);
> +	WARN_ON_ONCE(frozen == (q->mq_freeze_depth == 0));
>   
>   	if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
>   		ctx.name = e->elevator_name;
>   		ctx.et = t;
>   
> +		/* elevator switch requires queue to be frozen */
> +		if (!frozen) {
> +			blk_mq_freeze_queue_nomemsave(q);
> +			frozen = true;
> +		}
>   		mutex_lock(&q->elevator_lock);
>   		/* force to reattach elevator after nr_hw_queue is updated */
>   		ret = elevator_switch(q, &ctx);
>   		mutex_unlock(&q->elevator_lock);
>   	}
> -	blk_mq_unfreeze_queue_nomemrestore(q);
> +	if (frozen)
> +		blk_mq_unfreeze_queue_nomemrestore(q);
>   	if (!ret)
>   		WARN_ON_ONCE(elevator_change_done(q, &ctx));
>   	/*
> 


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
  2025-08-15  9:15 ` Yu Kuai
@ 2025-08-15  9:34   ` Yu Kuai
  2025-08-15  9:38     ` Ming Lei
  0 siblings, 1 reply; 6+ messages in thread
From: Yu Kuai @ 2025-08-15  9:34 UTC (permalink / raw)
  To: Yu Kuai, Ming Lei, Jens Axboe, linux-block; +Cc: Nilay Shroff, yukuai (C)

Hi,

在 2025/08/15 17:15, Yu Kuai 写道:
> Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
> blk_mq_elv_switch_none(), after elevator is succeed switching to none
> then freeze the queue.
> 
> Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
> elevator_type, related queue is already freezed.

Like following:

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e9f037a25fe3..3640fae5707b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -5010,7 +5010,13 @@ static int blk_mq_elv_switch_none(struct 
request_queue *q,
                 __elevator_get(q->elevator->type);

                 elevator_set_none(q);
+       } else {
+               ret = xa_insert(elv_tbl, q->id, xa_mk_value(1), GFP_KERNEL);
+               if (WARN_ON_ONCE(ret))
+                       return ret;
         }
+
+       blk_mq_freeze_queue_nomemsave(q);
         return ret;
  }

@@ -5045,9 +5051,6 @@ static void __blk_mq_update_nr_hw_queues(struct 
blk_mq_tag_set *set,
                 blk_mq_sysfs_unregister_hctxs(q);
         }

-       list_for_each_entry(q, &set->tag_list, tag_set_list)
-               blk_mq_freeze_queue_nomemsave(q);
-
         /*
          * Switch IO scheduler to 'none', cleaning up the data associated
          * with the previous scheduler. We will switch back once we are 
done
diff --git a/block/elevator.c b/block/elevator.c
index e2ebfbf107b3..9400ea9ec024 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -715,16 +715,21 @@ void elv_update_nr_hw_queues(struct request_queue 
*q, struct elevator_type *e,

         WARN_ON_ONCE(q->mq_freeze_depth == 0);

-       if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
-               ctx.name = e->elevator_name;
-               ctx.et = t;
-
-               mutex_lock(&q->elevator_lock);
-               /* force to reattach elevator after nr_hw_queue is 
updated */
-               ret = elevator_switch(q, &ctx);
-               mutex_unlock(&q->elevator_lock);
+       if (e) {
+               if (!xa_is_value(e) && !blk_queue_dying(q) &&
+                   blk_queue_registered(q)) {
+                       ctx.name = e->elevator_name;
+                       ctx.et = t;
+
+                       mutex_lock(&q->elevator_lock);
+                       /* force to reattach elevator after nr_hw_queue 
is updated */
+                       ret = elevator_switch(q, &ctx);
+                       mutex_unlock(&q->elevator_lock);
+               }
+
+               blk_mq_unfreeze_queue_nomemrestore(q);
         }
-       blk_mq_unfreeze_queue_nomemrestore(q);
+
         if (!ret)
                 WARN_ON_ONCE(elevator_change_done(q, &ctx));
         /*


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
  2025-08-15  9:34   ` Yu Kuai
@ 2025-08-15  9:38     ` Ming Lei
  2025-08-15 10:06       ` Nilay Shroff
  0 siblings, 1 reply; 6+ messages in thread
From: Ming Lei @ 2025-08-15  9:38 UTC (permalink / raw)
  To: Yu Kuai; +Cc: Jens Axboe, linux-block, Nilay Shroff, yukuai (C)

On Fri, Aug 15, 2025 at 05:34:23PM +0800, Yu Kuai wrote:
> Hi,
> 
> 在 2025/08/15 17:15, Yu Kuai 写道:
> > Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
> > blk_mq_elv_switch_none(), after elevator is succeed switching to none
> > then freeze the queue.
> > 
> > Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
> > elevator_type, related queue is already freezed.
> 
> Like following:
> 
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index e9f037a25fe3..3640fae5707b 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -5010,7 +5010,13 @@ static int blk_mq_elv_switch_none(struct
> request_queue *q,
>                 __elevator_get(q->elevator->type);
> 
>                 elevator_set_none(q);
> +       } else {
> +               ret = xa_insert(elv_tbl, q->id, xa_mk_value(1), GFP_KERNEL);
> +               if (WARN_ON_ONCE(ret))
> +                       return ret;
>         }
> +
> +       blk_mq_freeze_queue_nomemsave(q);
>         return ret;
>  }
> 
> @@ -5045,9 +5051,6 @@ static void __blk_mq_update_nr_hw_queues(struct
> blk_mq_tag_set *set,
>                 blk_mq_sysfs_unregister_hctxs(q);
>         }
> 
> -       list_for_each_entry(q, &set->tag_list, tag_set_list)
> -               blk_mq_freeze_queue_nomemsave(q);
> -
>         /*
>          * Switch IO scheduler to 'none', cleaning up the data associated
>          * with the previous scheduler. We will switch back once we are done
> diff --git a/block/elevator.c b/block/elevator.c
> index e2ebfbf107b3..9400ea9ec024 100644
> --- a/block/elevator.c
> +++ b/block/elevator.c
> @@ -715,16 +715,21 @@ void elv_update_nr_hw_queues(struct request_queue *q,
> struct elevator_type *e,
> 
>         WARN_ON_ONCE(q->mq_freeze_depth == 0);
> 
> -       if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
> -               ctx.name = e->elevator_name;
> -               ctx.et = t;
> -
> -               mutex_lock(&q->elevator_lock);
> -               /* force to reattach elevator after nr_hw_queue is updated
> */
> -               ret = elevator_switch(q, &ctx);
> -               mutex_unlock(&q->elevator_lock);
> +       if (e) {
> +               if (!xa_is_value(e) && !blk_queue_dying(q) &&
> +                   blk_queue_registered(q)) {
> +                       ctx.name = e->elevator_name;
> +                       ctx.et = t;
> +
> +                       mutex_lock(&q->elevator_lock);
> +                       /* force to reattach elevator after nr_hw_queue is
> updated */
> +                       ret = elevator_switch(q, &ctx);
> +                       mutex_unlock(&q->elevator_lock);
> +               }
> +
> +               blk_mq_unfreeze_queue_nomemrestore(q);
>         }
> -       blk_mq_unfreeze_queue_nomemrestore(q);
> +

I feel it doesn't become simpler, :-(

However we still can avoid the change in elv_update_nr_hw_queues() by moving
freeze/unfree queue to blk_mq_elv_switch_back(), which looks more readable.



Thanks, 
Ming


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
  2025-08-15  9:38     ` Ming Lei
@ 2025-08-15 10:06       ` Nilay Shroff
  2025-08-15 12:58         ` Ming Lei
  0 siblings, 1 reply; 6+ messages in thread
From: Nilay Shroff @ 2025-08-15 10:06 UTC (permalink / raw)
  To: Ming Lei, Yu Kuai; +Cc: Jens Axboe, linux-block, yukuai (C)



On 8/15/25 3:08 PM, Ming Lei wrote:
> On Fri, Aug 15, 2025 at 05:34:23PM +0800, Yu Kuai wrote:
>> Hi,
>>
>> 在 2025/08/15 17:15, Yu Kuai 写道:
>>> Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
>>> blk_mq_elv_switch_none(), after elevator is succeed switching to none
>>> then freeze the queue.
>>>
>>> Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
>>> elevator_type, related queue is already freezed.
>>
>> Like following:
>>
>> diff --git a/block/blk-mq.c b/block/blk-mq.c
>> index e9f037a25fe3..3640fae5707b 100644
>> --- a/block/blk-mq.c
>> +++ b/block/blk-mq.c
>> @@ -5010,7 +5010,13 @@ static int blk_mq_elv_switch_none(struct
>> request_queue *q,
>>                 __elevator_get(q->elevator->type);
>>
>>                 elevator_set_none(q);
>> +       } else {
>> +               ret = xa_insert(elv_tbl, q->id, xa_mk_value(1), GFP_KERNEL);
>> +               if (WARN_ON_ONCE(ret))
>> +                       return ret;
>>         }
>> +
>> +       blk_mq_freeze_queue_nomemsave(q);
>>         return ret;
>>  }
>>
>> @@ -5045,9 +5051,6 @@ static void __blk_mq_update_nr_hw_queues(struct
>> blk_mq_tag_set *set,
>>                 blk_mq_sysfs_unregister_hctxs(q);
>>         }
>>
>> -       list_for_each_entry(q, &set->tag_list, tag_set_list)
>> -               blk_mq_freeze_queue_nomemsave(q);
>> -
>>         /*
>>          * Switch IO scheduler to 'none', cleaning up the data associated
>>          * with the previous scheduler. We will switch back once we are done
>> diff --git a/block/elevator.c b/block/elevator.c
>> index e2ebfbf107b3..9400ea9ec024 100644
>> --- a/block/elevator.c
>> +++ b/block/elevator.c
>> @@ -715,16 +715,21 @@ void elv_update_nr_hw_queues(struct request_queue *q,
>> struct elevator_type *e,
>>
>>         WARN_ON_ONCE(q->mq_freeze_depth == 0);
>>
>> -       if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
>> -               ctx.name = e->elevator_name;
>> -               ctx.et = t;
>> -
>> -               mutex_lock(&q->elevator_lock);
>> -               /* force to reattach elevator after nr_hw_queue is updated
>> */
>> -               ret = elevator_switch(q, &ctx);
>> -               mutex_unlock(&q->elevator_lock);
>> +       if (e) {
>> +               if (!xa_is_value(e) && !blk_queue_dying(q) &&
>> +                   blk_queue_registered(q)) {
>> +                       ctx.name = e->elevator_name;
>> +                       ctx.et = t;
>> +
>> +                       mutex_lock(&q->elevator_lock);
>> +                       /* force to reattach elevator after nr_hw_queue is
>> updated */
>> +                       ret = elevator_switch(q, &ctx);
>> +                       mutex_unlock(&q->elevator_lock);
>> +               }
>> +
>> +               blk_mq_unfreeze_queue_nomemrestore(q);
>>         }
>> -       blk_mq_unfreeze_queue_nomemrestore(q);
>> +
> 
> I feel it doesn't become simpler, :-(
> 
> However we still can avoid the change in elv_update_nr_hw_queues() by moving
> freeze/unfree queue to blk_mq_elv_switch_back(), which looks more readable.
> 
I think yes that seems reasonable but then we also need to move 
elevator_change_done() and blk_mq_free_sched_tags() from 
elv_update_nr_hw_queues() to blk_mq_elv_switch_back(). As you know
both these functions (elevator_change_done and blk_mq_free_sched_tags)
have to be called after we unfreeze the queue.

Thanks,
--Nilay


^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
  2025-08-15 10:06       ` Nilay Shroff
@ 2025-08-15 12:58         ` Ming Lei
  0 siblings, 0 replies; 6+ messages in thread
From: Ming Lei @ 2025-08-15 12:58 UTC (permalink / raw)
  To: Nilay Shroff; +Cc: Yu Kuai, Jens Axboe, linux-block, yukuai (C)

On Fri, Aug 15, 2025 at 03:36:02PM +0530, Nilay Shroff wrote:
> 
> 
> On 8/15/25 3:08 PM, Ming Lei wrote:
> > On Fri, Aug 15, 2025 at 05:34:23PM +0800, Yu Kuai wrote:
> >> Hi,
> >>
> >> 在 2025/08/15 17:15, Yu Kuai 写道:
> >>> Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
> >>> blk_mq_elv_switch_none(), after elevator is succeed switching to none
> >>> then freeze the queue.
> >>>
> >>> Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
> >>> elevator_type, related queue is already freezed.
> >>
> >> Like following:
> >>
> >> diff --git a/block/blk-mq.c b/block/blk-mq.c
> >> index e9f037a25fe3..3640fae5707b 100644
> >> --- a/block/blk-mq.c
> >> +++ b/block/blk-mq.c
> >> @@ -5010,7 +5010,13 @@ static int blk_mq_elv_switch_none(struct
> >> request_queue *q,
> >>                 __elevator_get(q->elevator->type);
> >>
> >>                 elevator_set_none(q);
> >> +       } else {
> >> +               ret = xa_insert(elv_tbl, q->id, xa_mk_value(1), GFP_KERNEL);
> >> +               if (WARN_ON_ONCE(ret))
> >> +                       return ret;
> >>         }
> >> +
> >> +       blk_mq_freeze_queue_nomemsave(q);
> >>         return ret;
> >>  }
> >>
> >> @@ -5045,9 +5051,6 @@ static void __blk_mq_update_nr_hw_queues(struct
> >> blk_mq_tag_set *set,
> >>                 blk_mq_sysfs_unregister_hctxs(q);
> >>         }
> >>
> >> -       list_for_each_entry(q, &set->tag_list, tag_set_list)
> >> -               blk_mq_freeze_queue_nomemsave(q);
> >> -
> >>         /*
> >>          * Switch IO scheduler to 'none', cleaning up the data associated
> >>          * with the previous scheduler. We will switch back once we are done
> >> diff --git a/block/elevator.c b/block/elevator.c
> >> index e2ebfbf107b3..9400ea9ec024 100644
> >> --- a/block/elevator.c
> >> +++ b/block/elevator.c
> >> @@ -715,16 +715,21 @@ void elv_update_nr_hw_queues(struct request_queue *q,
> >> struct elevator_type *e,
> >>
> >>         WARN_ON_ONCE(q->mq_freeze_depth == 0);
> >>
> >> -       if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
> >> -               ctx.name = e->elevator_name;
> >> -               ctx.et = t;
> >> -
> >> -               mutex_lock(&q->elevator_lock);
> >> -               /* force to reattach elevator after nr_hw_queue is updated
> >> */
> >> -               ret = elevator_switch(q, &ctx);
> >> -               mutex_unlock(&q->elevator_lock);
> >> +       if (e) {
> >> +               if (!xa_is_value(e) && !blk_queue_dying(q) &&
> >> +                   blk_queue_registered(q)) {
> >> +                       ctx.name = e->elevator_name;
> >> +                       ctx.et = t;
> >> +
> >> +                       mutex_lock(&q->elevator_lock);
> >> +                       /* force to reattach elevator after nr_hw_queue is
> >> updated */
> >> +                       ret = elevator_switch(q, &ctx);
> >> +                       mutex_unlock(&q->elevator_lock);
> >> +               }
> >> +
> >> +               blk_mq_unfreeze_queue_nomemrestore(q);
> >>         }
> >> -       blk_mq_unfreeze_queue_nomemrestore(q);
> >> +
> > 
> > I feel it doesn't become simpler, :-(
> > 
> > However we still can avoid the change in elv_update_nr_hw_queues() by moving
> > freeze/unfree queue to blk_mq_elv_switch_back(), which looks more readable.
> > 
> I think yes that seems reasonable but then we also need to move 
> elevator_change_done() and blk_mq_free_sched_tags() from 
> elv_update_nr_hw_queues() to blk_mq_elv_switch_back(). As you know
> both these functions (elevator_change_done and blk_mq_free_sched_tags)
> have to be called after we unfreeze the queue.

It can be done in easier way:


diff --git a/block/blk-mq.c b/block/blk-mq.c
index b67d6c02eceb..69949929dfbb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4974,11 +4974,15 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  * Switch back to the elevator type stored in the xarray.
  */
 static void blk_mq_elv_switch_back(struct request_queue *q,
-		struct xarray *elv_tbl, struct xarray *et_tbl)
+		struct xarray *elv_tbl, struct xarray *et_tbl, bool frozen)
 {
 	struct elevator_type *e = xa_load(elv_tbl, q->id);
 	struct elevator_tags *t = xa_load(et_tbl, q->id);
 
+	/* elv_update_nr_hw_queues() expects queue to be frozen */
+	if (!frozen)
+		blk_mq_freeze_queue_nomemsave(q);
+
 	/* The elv_update_nr_hw_queues unfreezes the queue. */
 	elv_update_nr_hw_queues(q, e, t);
 
@@ -5033,6 +5037,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	unsigned int memflags;
 	int i;
 	struct xarray elv_tbl, et_tbl;
+	bool queues_frozen = false;
 
 	lockdep_assert_held(&set->tag_list_lock);
 
@@ -5056,9 +5061,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_sysfs_unregister_hctxs(q);
 	}
 
-	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_freeze_queue_nomemsave(q);
-
 	/*
 	 * Switch IO scheduler to 'none', cleaning up the data associated
 	 * with the previous scheduler. We will switch back once we are done
@@ -5068,6 +5070,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		if (blk_mq_elv_switch_none(q, &elv_tbl))
 			goto switch_back;
 
+	list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_freeze_queue_nomemsave(q);
+	queues_frozen = true;
 	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
 		goto switch_back;
 
@@ -5092,7 +5097,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 switch_back:
 	/* The blk_mq_elv_switch_back unfreezes queue for us. */
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
+		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl, queues_frozen);
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_sysfs_register_hctxs(q);

Thanks,
Ming


^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2025-08-15 12:58 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2025-08-15  7:56 [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues Ming Lei
2025-08-15  9:15 ` Yu Kuai
2025-08-15  9:34   ` Yu Kuai
2025-08-15  9:38     ` Ming Lei
2025-08-15 10:06       ` Nilay Shroff
2025-08-15 12:58         ` Ming Lei

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox