Linux block layer
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Nilay Shroff <nilay@linux.ibm.com>
Cc: Yu Kuai <yukuai1@huaweicloud.com>, Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org, "yukuai (C)" <yukuai3@huawei.com>
Subject: Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
Date: Fri, 15 Aug 2025 20:58:22 +0800	[thread overview]
Message-ID: <aJ8u7nI4GtyOECF9@fedora> (raw)
In-Reply-To: <5c57a6a5-d9a4-4c21-86cd-784e4f3876fd@linux.ibm.com>

On Fri, Aug 15, 2025 at 03:36:02PM +0530, Nilay Shroff wrote:
> 
> 
> On 8/15/25 3:08 PM, Ming Lei wrote:
> > On Fri, Aug 15, 2025 at 05:34:23PM +0800, Yu Kuai wrote:
> >> Hi,
> >>
> >> 在 2025/08/15 17:15, Yu Kuai 写道:
> >>> Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
> >>> blk_mq_elv_switch_none(), after elevator is succeed switching to none
> >>> then freeze the queue.
> >>>
> >>> Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
> >>> elevator_type, related queue is already freezed.
> >>
> >> Like following:
> >>
> >> diff --git a/block/blk-mq.c b/block/blk-mq.c
> >> index e9f037a25fe3..3640fae5707b 100644
> >> --- a/block/blk-mq.c
> >> +++ b/block/blk-mq.c
> >> @@ -5010,7 +5010,13 @@ static int blk_mq_elv_switch_none(struct
> >> request_queue *q,
> >>                 __elevator_get(q->elevator->type);
> >>
> >>                 elevator_set_none(q);
> >> +       } else {
> >> +               ret = xa_insert(elv_tbl, q->id, xa_mk_value(1), GFP_KERNEL);
> >> +               if (WARN_ON_ONCE(ret))
> >> +                       return ret;
> >>         }
> >> +
> >> +       blk_mq_freeze_queue_nomemsave(q);
> >>         return ret;
> >>  }
> >>
> >> @@ -5045,9 +5051,6 @@ static void __blk_mq_update_nr_hw_queues(struct
> >> blk_mq_tag_set *set,
> >>                 blk_mq_sysfs_unregister_hctxs(q);
> >>         }
> >>
> >> -       list_for_each_entry(q, &set->tag_list, tag_set_list)
> >> -               blk_mq_freeze_queue_nomemsave(q);
> >> -
> >>         /*
> >>          * Switch IO scheduler to 'none', cleaning up the data associated
> >>          * with the previous scheduler. We will switch back once we are done
> >> diff --git a/block/elevator.c b/block/elevator.c
> >> index e2ebfbf107b3..9400ea9ec024 100644
> >> --- a/block/elevator.c
> >> +++ b/block/elevator.c
> >> @@ -715,16 +715,21 @@ void elv_update_nr_hw_queues(struct request_queue *q,
> >> struct elevator_type *e,
> >>
> >>         WARN_ON_ONCE(q->mq_freeze_depth == 0);
> >>
> >> -       if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
> >> -               ctx.name = e->elevator_name;
> >> -               ctx.et = t;
> >> -
> >> -               mutex_lock(&q->elevator_lock);
> >> -               /* force to reattach elevator after nr_hw_queue is updated
> >> */
> >> -               ret = elevator_switch(q, &ctx);
> >> -               mutex_unlock(&q->elevator_lock);
> >> +       if (e) {
> >> +               if (!xa_is_value(e) && !blk_queue_dying(q) &&
> >> +                   blk_queue_registered(q)) {
> >> +                       ctx.name = e->elevator_name;
> >> +                       ctx.et = t;
> >> +
> >> +                       mutex_lock(&q->elevator_lock);
> >> +                       /* force to reattach elevator after nr_hw_queue is
> >> updated */
> >> +                       ret = elevator_switch(q, &ctx);
> >> +                       mutex_unlock(&q->elevator_lock);
> >> +               }
> >> +
> >> +               blk_mq_unfreeze_queue_nomemrestore(q);
> >>         }
> >> -       blk_mq_unfreeze_queue_nomemrestore(q);
> >> +
> > 
> > I feel it doesn't become simpler, :-(
> > 
> > However we still can avoid the change in elv_update_nr_hw_queues() by moving
> > freeze/unfree queue to blk_mq_elv_switch_back(), which looks more readable.
> > 
> I think yes that seems reasonable but then we also need to move 
> elevator_change_done() and blk_mq_free_sched_tags() from 
> elv_update_nr_hw_queues() to blk_mq_elv_switch_back(). As you know
> both these functions (elevator_change_done and blk_mq_free_sched_tags)
> have to be called after we unfreeze the queue.

It can be done in easier way:


diff --git a/block/blk-mq.c b/block/blk-mq.c
index b67d6c02eceb..69949929dfbb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4974,11 +4974,15 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  * Switch back to the elevator type stored in the xarray.
  */
 static void blk_mq_elv_switch_back(struct request_queue *q,
-		struct xarray *elv_tbl, struct xarray *et_tbl)
+		struct xarray *elv_tbl, struct xarray *et_tbl, bool frozen)
 {
 	struct elevator_type *e = xa_load(elv_tbl, q->id);
 	struct elevator_tags *t = xa_load(et_tbl, q->id);
 
+	/* elv_update_nr_hw_queues() expects queue to be frozen */
+	if (!frozen)
+		blk_mq_freeze_queue_nomemsave(q);
+
 	/* The elv_update_nr_hw_queues unfreezes the queue. */
 	elv_update_nr_hw_queues(q, e, t);
 
@@ -5033,6 +5037,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	unsigned int memflags;
 	int i;
 	struct xarray elv_tbl, et_tbl;
+	bool queues_frozen = false;
 
 	lockdep_assert_held(&set->tag_list_lock);
 
@@ -5056,9 +5061,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_sysfs_unregister_hctxs(q);
 	}
 
-	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_freeze_queue_nomemsave(q);
-
 	/*
 	 * Switch IO scheduler to 'none', cleaning up the data associated
 	 * with the previous scheduler. We will switch back once we are done
@@ -5068,6 +5070,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		if (blk_mq_elv_switch_none(q, &elv_tbl))
 			goto switch_back;
 
+	list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_freeze_queue_nomemsave(q);
+	queues_frozen = true;
 	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
 		goto switch_back;
 
@@ -5092,7 +5097,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 switch_back:
 	/* The blk_mq_elv_switch_back unfreezes queue for us. */
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
+		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl, queues_frozen);
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_sysfs_register_hctxs(q);

Thanks,
Ming


      reply	other threads:[~2025-08-15 12:58 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-15  7:56 [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues Ming Lei
2025-08-15  9:15 ` Yu Kuai
2025-08-15  9:34   ` Yu Kuai
2025-08-15  9:38     ` Ming Lei
2025-08-15 10:06       ` Nilay Shroff
2025-08-15 12:58         ` Ming Lei [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aJ8u7nI4GtyOECF9@fedora \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=nilay@linux.ibm.com \
    --cc=yukuai1@huaweicloud.com \
    --cc=yukuai3@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox