All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ming Lei <ming.lei@redhat.com>
To: Nilay Shroff <nilay@linux.ibm.com>
Cc: Yu Kuai <yukuai1@huaweicloud.com>, Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org, "yukuai (C)" <yukuai3@huawei.com>
Subject: Re: [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues
Date: Fri, 15 Aug 2025 20:58:22 +0800	[thread overview]
Message-ID: <aJ8u7nI4GtyOECF9@fedora> (raw)
In-Reply-To: <5c57a6a5-d9a4-4c21-86cd-784e4f3876fd@linux.ibm.com>

On Fri, Aug 15, 2025 at 03:36:02PM +0530, Nilay Shroff wrote:
> 
> 
> On 8/15/25 3:08 PM, Ming Lei wrote:
> > On Fri, Aug 15, 2025 at 05:34:23PM +0800, Yu Kuai wrote:
> >> Hi,
> >>
> >> 在 2025/08/15 17:15, Yu Kuai 写道:
> >>> Will it be simpler if we move blk_mq_freeze_queue_nomemsave() into
> >>> blk_mq_elv_switch_none(), after elevator is succeed switching to none
> >>> then freeze the queue.
> >>>
> >>> Later in blk_mq_elv_switch_back we'll know if xa_load() return valid
> >>> elevator_type, related queue is already freezed.
> >>
> >> Like following:
> >>
> >> diff --git a/block/blk-mq.c b/block/blk-mq.c
> >> index e9f037a25fe3..3640fae5707b 100644
> >> --- a/block/blk-mq.c
> >> +++ b/block/blk-mq.c
> >> @@ -5010,7 +5010,13 @@ static int blk_mq_elv_switch_none(struct
> >> request_queue *q,
> >>                 __elevator_get(q->elevator->type);
> >>
> >>                 elevator_set_none(q);
> >> +       } else {
> >> +               ret = xa_insert(elv_tbl, q->id, xa_mk_value(1), GFP_KERNEL);
> >> +               if (WARN_ON_ONCE(ret))
> >> +                       return ret;
> >>         }
> >> +
> >> +       blk_mq_freeze_queue_nomemsave(q);
> >>         return ret;
> >>  }
> >>
> >> @@ -5045,9 +5051,6 @@ static void __blk_mq_update_nr_hw_queues(struct
> >> blk_mq_tag_set *set,
> >>                 blk_mq_sysfs_unregister_hctxs(q);
> >>         }
> >>
> >> -       list_for_each_entry(q, &set->tag_list, tag_set_list)
> >> -               blk_mq_freeze_queue_nomemsave(q);
> >> -
> >>         /*
> >>          * Switch IO scheduler to 'none', cleaning up the data associated
> >>          * with the previous scheduler. We will switch back once we are done
> >> diff --git a/block/elevator.c b/block/elevator.c
> >> index e2ebfbf107b3..9400ea9ec024 100644
> >> --- a/block/elevator.c
> >> +++ b/block/elevator.c
> >> @@ -715,16 +715,21 @@ void elv_update_nr_hw_queues(struct request_queue *q,
> >> struct elevator_type *e,
> >>
> >>         WARN_ON_ONCE(q->mq_freeze_depth == 0);
> >>
> >> -       if (e && !blk_queue_dying(q) && blk_queue_registered(q)) {
> >> -               ctx.name = e->elevator_name;
> >> -               ctx.et = t;
> >> -
> >> -               mutex_lock(&q->elevator_lock);
> >> -               /* force to reattach elevator after nr_hw_queue is updated
> >> */
> >> -               ret = elevator_switch(q, &ctx);
> >> -               mutex_unlock(&q->elevator_lock);
> >> +       if (e) {
> >> +               if (!xa_is_value(e) && !blk_queue_dying(q) &&
> >> +                   blk_queue_registered(q)) {
> >> +                       ctx.name = e->elevator_name;
> >> +                       ctx.et = t;
> >> +
> >> +                       mutex_lock(&q->elevator_lock);
> >> +                       /* force to reattach elevator after nr_hw_queue is
> >> updated */
> >> +                       ret = elevator_switch(q, &ctx);
> >> +                       mutex_unlock(&q->elevator_lock);
> >> +               }
> >> +
> >> +               blk_mq_unfreeze_queue_nomemrestore(q);
> >>         }
> >> -       blk_mq_unfreeze_queue_nomemrestore(q);
> >> +
> > 
> > I feel it doesn't become simpler, :-(
> > 
> > However we still can avoid the change in elv_update_nr_hw_queues() by moving
> > freeze/unfree queue to blk_mq_elv_switch_back(), which looks more readable.
> > 
> I think yes that seems reasonable but then we also need to move 
> elevator_change_done() and blk_mq_free_sched_tags() from 
> elv_update_nr_hw_queues() to blk_mq_elv_switch_back(). As you know
> both these functions (elevator_change_done and blk_mq_free_sched_tags)
> have to be called after we unfreeze the queue.

It can be done in easier way:


diff --git a/block/blk-mq.c b/block/blk-mq.c
index b67d6c02eceb..69949929dfbb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4974,11 +4974,15 @@ int blk_mq_update_nr_requests(struct request_queue *q, unsigned int nr)
  * Switch back to the elevator type stored in the xarray.
  */
 static void blk_mq_elv_switch_back(struct request_queue *q,
-		struct xarray *elv_tbl, struct xarray *et_tbl)
+		struct xarray *elv_tbl, struct xarray *et_tbl, bool frozen)
 {
 	struct elevator_type *e = xa_load(elv_tbl, q->id);
 	struct elevator_tags *t = xa_load(et_tbl, q->id);
 
+	/* elv_update_nr_hw_queues() expects queue to be frozen */
+	if (!frozen)
+		blk_mq_freeze_queue_nomemsave(q);
+
 	/* The elv_update_nr_hw_queues unfreezes the queue. */
 	elv_update_nr_hw_queues(q, e, t);
 
@@ -5033,6 +5037,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 	unsigned int memflags;
 	int i;
 	struct xarray elv_tbl, et_tbl;
+	bool queues_frozen = false;
 
 	lockdep_assert_held(&set->tag_list_lock);
 
@@ -5056,9 +5061,6 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		blk_mq_sysfs_unregister_hctxs(q);
 	}
 
-	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_freeze_queue_nomemsave(q);
-
 	/*
 	 * Switch IO scheduler to 'none', cleaning up the data associated
 	 * with the previous scheduler. We will switch back once we are done
@@ -5068,6 +5070,9 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 		if (blk_mq_elv_switch_none(q, &elv_tbl))
 			goto switch_back;
 
+	list_for_each_entry(q, &set->tag_list, tag_set_list)
+		blk_mq_freeze_queue_nomemsave(q);
+	queues_frozen = true;
 	if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
 		goto switch_back;
 
@@ -5092,7 +5097,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
 switch_back:
 	/* The blk_mq_elv_switch_back unfreezes queue for us. */
 	list_for_each_entry(q, &set->tag_list, tag_set_list)
-		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl);
+		blk_mq_elv_switch_back(q, &elv_tbl, &et_tbl, queues_frozen);
 
 	list_for_each_entry(q, &set->tag_list, tag_set_list) {
 		blk_mq_sysfs_register_hctxs(q);

Thanks,
Ming


      reply	other threads:[~2025-08-15 12:58 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-08-15  7:56 [PATCH] blk-mq: fix lockdep warning in __blk_mq_update_nr_hw_queues Ming Lei
2025-08-15  9:15 ` Yu Kuai
2025-08-15  9:34   ` Yu Kuai
2025-08-15  9:38     ` Ming Lei
2025-08-15 10:06       ` Nilay Shroff
2025-08-15 12:58         ` Ming Lei [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=aJ8u7nI4GtyOECF9@fedora \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=linux-block@vger.kernel.org \
    --cc=nilay@linux.ibm.com \
    --cc=yukuai1@huaweicloud.com \
    --cc=yukuai3@huawei.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.