From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-block@vger.kernel.org
Cc: axboe@kernel.dk, hch@lst.de, ming.lei@redhat.com,
yukuai@fnnas.com, yi.zhang@redhat.com,
shinichiro.kawasaki@wdc.com, gjoyce@ibm.com,
Nilay Shroff <nilay@linux.ibm.com>
Subject: [PATCH] block: break pcpu_alloc_mutex dependency on freeze_lock
Date: Wed, 25 Feb 2026 18:14:16 +0530 [thread overview]
Message-ID: <20260225124556.1152510-1-nilay@linux.ibm.com> (raw)
While nr_hw_update allocates tagset tags it acquires ->pcpu_alloc_mutex
after ->freeze_lock is acquired or queue is frozen. This potentially
creates a circular dependency involving ->fs_reclaim if reclaim is
triggered simultaneously in a code path which first acquires ->pcpu_
alloc_mutex. As the queue is already frozen while nr_hw_queue update
allocates tagsets, the reclaim can't forward progress and thus it could
cause a potential deadlock as reported in lockdep splat[1].
Fix this by pre-allocating tagset tags before we freeze queue during
nr_hw_queue update. Later the allocated tagset tags could be safely
installed and used after queue is frozen.
Reported-by: Yi Zhang <yi.zhang@redhat.com>
Closes: https://lore.kernel.org/all/CAHj4cs8F=OV9s3La2kEQ34YndgfZP-B5PHS4Z8_b9euKG6J4mw@mail.gmail.com/ [1]
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
block/blk-mq.c | 53 +++++++++++++++++++++++++++++++++++---------------
1 file changed, 37 insertions(+), 16 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d5602ff62c7c..687fd47786a6 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4793,10 +4793,10 @@ static void blk_mq_update_queue_map(struct blk_mq_tag_set *set)
}
}
-static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
- int new_nr_hw_queues)
+static int blk_mq_prealloc_tag_set_tags(struct blk_mq_tag_set *set,
+ int new_nr_hw_queues, struct blk_mq_tags ***tags)
{
- struct blk_mq_tags **new_tags;
+ struct blk_mq_tags **new_tags = NULL;
int i;
if (set->nr_hw_queues >= new_nr_hw_queues)
@@ -4807,24 +4807,42 @@ static int blk_mq_realloc_tag_set_tags(struct blk_mq_tag_set *set,
if (!new_tags)
return -ENOMEM;
- if (set->tags)
- memcpy(new_tags, set->tags, set->nr_hw_queues *
- sizeof(*set->tags));
- kfree(set->tags);
- set->tags = new_tags;
-
for (i = set->nr_hw_queues; i < new_nr_hw_queues; i++) {
- if (!__blk_mq_alloc_map_and_rqs(set, i)) {
- while (--i >= set->nr_hw_queues)
- __blk_mq_free_map_and_rqs(set, i);
- return -ENOMEM;
+ if (blk_mq_is_shared_tags(set->flags))
+ new_tags[i] = set->shared_tags;
+ else {
+ new_tags[i] = blk_mq_alloc_map_and_rqs(set, i,
+ set->queue_depth);
+ if (!new_tags[i])
+ goto out_unwind;
}
cond_resched();
}
done:
- set->nr_hw_queues = new_nr_hw_queues;
+ *tags = new_tags;
return 0;
+out_unwind:
+ while (--i >= set->nr_hw_queues) {
+ if (!blk_mq_is_shared_tags(set->flags))
+ blk_mq_free_map_and_rqs(set, new_tags[i], i);
+ }
+ return -ENOMEM;
+}
+
+static void blk_mq_init_tag_set_tags(struct blk_mq_tag_set *set,
+ int new_nr_hw_queues, struct blk_mq_tags **new_tags)
+{
+ if (set->nr_hw_queues >= new_nr_hw_queues)
+ goto done;
+
+ if (set->tags)
+ memcpy(new_tags, set->tags, set->nr_hw_queues *
+ sizeof(*set->tags));
+ kfree(set->tags);
+ set->tags = new_tags;
+done:
+ set->nr_hw_queues = new_nr_hw_queues;
}
/*
@@ -5113,6 +5131,7 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
unsigned int memflags;
int i;
struct xarray elv_tbl;
+ struct blk_mq_tags **new_tags;
bool queues_frozen = false;
lockdep_assert_held(&set->tag_list_lock);
@@ -5147,11 +5166,13 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
if (blk_mq_elv_switch_none(q, &elv_tbl))
goto switch_back;
+ if (blk_mq_prealloc_tag_set_tags(set, nr_hw_queues, &new_tags) < 0)
+ goto switch_back;
+
list_for_each_entry(q, &set->tag_list, tag_set_list)
blk_mq_freeze_queue_nomemsave(q);
queues_frozen = true;
- if (blk_mq_realloc_tag_set_tags(set, nr_hw_queues) < 0)
- goto switch_back;
+ blk_mq_init_tag_set_tags(set, nr_hw_queues, new_tags);
fallback:
blk_mq_update_queue_map(set);
--
2.52.0
next reply other threads:[~2026-02-25 12:46 UTC|newest]
Thread overview: 4+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-02-25 12:44 Nilay Shroff [this message]
2026-02-25 15:50 ` [PATCH] block: break pcpu_alloc_mutex dependency on freeze_lock Yu Kuai
2026-02-26 15:37 ` Nilay Shroff
[not found] ` <CO1PR15MB496953C277EC8E15ED965D7FCE72A@CO1PR15MB4969.namprd15.prod.outlook.com>
2026-02-27 8:09 ` Nilay Shroff
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260225124556.1152510-1-nilay@linux.ibm.com \
--to=nilay@linux.ibm.com \
--cc=axboe@kernel.dk \
--cc=gjoyce@ibm.com \
--cc=hch@lst.de \
--cc=linux-block@vger.kernel.org \
--cc=ming.lei@redhat.com \
--cc=shinichiro.kawasaki@wdc.com \
--cc=yi.zhang@redhat.com \
--cc=yukuai@fnnas.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox