From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-block@vger.kernel.org
Cc: axboe@kernel.dk, kch@nvidia.com, shinichiro.kawasaki@wdc.com,
hch@lst.de, ming.lei@redhat.com, gjoyce@ibm.com
Subject: [PATCH 1/2] block: avoid cpu_hotplug_lock depedency on freeze_lock
Date: Mon, 4 Aug 2025 17:51:10 +0530 [thread overview]
Message-ID: <20250804122125.3271397-2-nilay@linux.ibm.com> (raw)
In-Reply-To: <20250804122125.3271397-1-nilay@linux.ibm.com>
A recent lockdep[1] splat observed while running blktest block/005
reveals a potential deadlock caused by the cpu_hotplug_lock dependency
on ->freeze_lock. This dependency was introduced by commit 033b667a823e
("block: blk-rq-qos: guard rq-qos helpers by static key").
That change added a static key to avoid fetching q->rq_qos when neither
blk-wbt nor blk-iolatency is configured. The static key dynamically
patches kernel text to a NOP when disabled, eliminating overhead of
fetching q->rq_qos in the I/O hot path. However, enabling a static key
at runtime requires acquiring both cpu_hotplug_lock and jump_label_mutex.
When this happens after the queue has already been frozen (i.e., while
holding ->freeze_lock), it creates a locking dependency from cpu_hotplug_
lock/jump_label_mutex to ->freeze_lock, which leads to a potential deadlock
reported by lockdep[1].
To resolve this, replace the static key mechanism with q->queue_flags:
QUEUE_FLAG_QOS_ENABLED. This flag is evaluated in the fast path before
accessing q->rq_qos. If the flag is set, we proceed to fetch q->rq_qos;
otherwise, the access is skipped.
Since q->queue_flags is commonly accessed in IO hotpath and resides in
the first cacheline of struct request_queue, checking it imposes minimal
overhead while eliminating the deadlock risk.
This change avoids the lockdep splat without introducing performance
regressions.
[1] https://lore.kernel.org/linux-block/4fdm37so3o4xricdgfosgmohn63aa7wj3ua4e5vpihoamwg3ui@fq42f5q5t5ic/
Fixes: 033b667a823e ("block: blk-rq-qos: guard rq-qos helpers by static key")
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
block/blk-mq-debugfs.c | 1 +
block/blk-rq-qos.c | 6 ++----
block/blk-rq-qos.h | 45 +++++++++++++++++++++++++-----------------
include/linux/blkdev.h | 1 +
4 files changed, 31 insertions(+), 22 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 7ed3e71f2fc0..32c65efdda46 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -95,6 +95,7 @@ static const char *const blk_queue_flag_name[] = {
QUEUE_FLAG_NAME(SQ_SCHED),
QUEUE_FLAG_NAME(DISABLE_WBT_DEF),
QUEUE_FLAG_NAME(NO_ELV_SWITCH),
+ QUEUE_FLAG_NAME(QOS_ENABLED),
};
#undef QUEUE_FLAG_NAME
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 848591fb3c57..460c04715321 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -2,8 +2,6 @@
#include "blk-rq-qos.h"
-__read_mostly DEFINE_STATIC_KEY_FALSE(block_rq_qos);
-
/*
* Increment 'v', if 'v' is below 'below'. Returns true if we succeeded,
* false if 'v' + 1 would be bigger than 'below'.
@@ -319,8 +317,8 @@ void rq_qos_exit(struct request_queue *q)
struct rq_qos *rqos = q->rq_qos;
q->rq_qos = rqos->next;
rqos->ops->exit(rqos);
- static_branch_dec(&block_rq_qos);
}
+ blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q);
mutex_unlock(&q->rq_qos_mutex);
}
@@ -346,7 +344,7 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
goto ebusy;
rqos->next = q->rq_qos;
q->rq_qos = rqos;
- static_branch_inc(&block_rq_qos);
+ blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q);
blk_mq_unfreeze_queue(q, memflags);
diff --git a/block/blk-rq-qos.h b/block/blk-rq-qos.h
index 39749f4066fb..0eeb5beb1f4d 100644
--- a/block/blk-rq-qos.h
+++ b/block/blk-rq-qos.h
@@ -12,7 +12,6 @@
#include "blk-mq-debugfs.h"
struct blk_mq_debugfs_attr;
-extern struct static_key_false block_rq_qos;
enum rq_qos_id {
RQ_QOS_WBT,
@@ -113,43 +112,50 @@ void __rq_qos_queue_depth_changed(struct rq_qos *rqos);
static inline void rq_qos_cleanup(struct request_queue *q, struct bio *bio)
{
- if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos)
__rq_qos_cleanup(q->rq_qos, bio);
}
static inline void rq_qos_done(struct request_queue *q, struct request *rq)
{
- if (static_branch_unlikely(&block_rq_qos) && q->rq_qos &&
- !blk_rq_is_passthrough(rq))
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos && !blk_rq_is_passthrough(rq))
__rq_qos_done(q->rq_qos, rq);
}
-static inline void rq_qos_issue(struct request_queue *q, struct request *rq)
+static __maybe_unused noinline void rq_qos_issue(struct request_queue *q, struct request *rq)
{
- if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos)
__rq_qos_issue(q->rq_qos, rq);
}
static inline void rq_qos_requeue(struct request_queue *q, struct request *rq)
{
- if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos)
__rq_qos_requeue(q->rq_qos, rq);
}
static inline void rq_qos_done_bio(struct bio *bio)
{
- if (static_branch_unlikely(&block_rq_qos) &&
- bio->bi_bdev && (bio_flagged(bio, BIO_QOS_THROTTLED) ||
- bio_flagged(bio, BIO_QOS_MERGED))) {
- struct request_queue *q = bdev_get_queue(bio->bi_bdev);
- if (q->rq_qos)
- __rq_qos_done_bio(q->rq_qos, bio);
- }
+ struct request_queue *q;
+
+ if (!bio->bi_bdev || (!bio_flagged(bio, BIO_QOS_THROTTLED) &&
+ !bio_flagged(bio, BIO_QOS_MERGED)))
+ return;
+
+ q = bdev_get_queue(bio->bi_bdev);
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos)
+ __rq_qos_done_bio(q->rq_qos, bio);
}
static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
{
- if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) {
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos) {
bio_set_flag(bio, BIO_QOS_THROTTLED);
__rq_qos_throttle(q->rq_qos, bio);
}
@@ -158,14 +164,16 @@ static inline void rq_qos_throttle(struct request_queue *q, struct bio *bio)
static inline void rq_qos_track(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos)
__rq_qos_track(q->rq_qos, rq, bio);
}
static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
struct bio *bio)
{
- if (static_branch_unlikely(&block_rq_qos) && q->rq_qos) {
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos) {
bio_set_flag(bio, BIO_QOS_MERGED);
__rq_qos_merge(q->rq_qos, rq, bio);
}
@@ -173,7 +181,8 @@ static inline void rq_qos_merge(struct request_queue *q, struct request *rq,
static inline void rq_qos_queue_depth_changed(struct request_queue *q)
{
- if (static_branch_unlikely(&block_rq_qos) && q->rq_qos)
+ if (unlikely(test_bit(QUEUE_FLAG_QOS_ENABLED, &q->queue_flags)) &&
+ q->rq_qos)
__rq_qos_queue_depth_changed(q->rq_qos);
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 95886b404b16..fe1797bbec42 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -656,6 +656,7 @@ enum {
QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */
QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */
QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */
+ QUEUE_FLAG_QOS_ENABLED, /* qos is enabled */
QUEUE_FLAG_MAX
};
--
2.50.1
next prev parent reply other threads:[~2025-08-04 12:21 UTC|newest]
Thread overview: 16+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-08-04 12:21 [PATCH 0/2] block: blk-rq-qos: replace static key with atomic bitop Nilay Shroff
2025-08-04 12:21 ` Nilay Shroff [this message]
2025-08-04 12:21 ` [PATCH 2/2] block: clear QUEUE_FLAG_QOS_ENABLED in rq_qos_del() Nilay Shroff
2025-08-04 13:42 ` [PATCH 0/2] block: blk-rq-qos: replace static key with atomic bitop Ming Lei
2025-08-05 4:58 ` Nilay Shroff
2025-08-05 12:44 ` Ming Lei
2025-08-05 17:05 ` Nilay Shroff
2025-08-06 7:21 ` Ming Lei
2025-08-06 1:28 ` Jens Axboe
2025-08-06 1:44 ` Yu Kuai
2025-08-13 11:20 ` Nilay Shroff
2025-08-13 12:16 ` Jens Axboe
2025-08-13 15:01 ` Nilay Shroff
2025-08-06 5:13 ` Nilay Shroff
2025-08-05 9:28 ` Yu Kuai
2025-08-05 12:14 ` Nilay Shroff
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250804122125.3271397-2-nilay@linux.ibm.com \
--to=nilay@linux.ibm.com \
--cc=axboe@kernel.dk \
--cc=gjoyce@ibm.com \
--cc=hch@lst.de \
--cc=kch@nvidia.com \
--cc=linux-block@vger.kernel.org \
--cc=ming.lei@redhat.com \
--cc=shinichiro.kawasaki@wdc.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox