* [PATCH v9 0/8] blk-mq: fix possible deadlocks
@ 2026-02-02 8:05 Yu Kuai
2026-02-02 8:05 ` [PATCH v9 1/8] blk-wbt: factor out a helper wbt_set_lat() Yu Kuai
` (8 more replies)
0 siblings, 9 replies; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
Fix deadlocks:
- patch 1-2, pcpu_alloc_mutex under q_usage_counter in blk-wbt;
- patch 3-8, debugfs_mutex under q_usage_counter;
Yu Kuai (8):
blk-wbt: factor out a helper wbt_set_lat()
blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under
q_usage_counter
blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos
blk-rq-qos: fix possible debugfs_mutex deadlock
blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static
blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos()
blk-mq-debugfs: add missing debugfs_mutex in
blk_mq_debugfs_register_hctxs()
blk-mq-debugfs: warn about possible deadlock
block/blk-mq-debugfs.c | 68 ++++++++++++-------
block/blk-mq-debugfs.h | 8 +--
block/blk-rq-qos.c | 11 ---
block/blk-sysfs.c | 39 +----------
block/blk-wbt.c | 149 ++++++++++++++++++++++++++++++-----------
block/blk-wbt.h | 7 +-
6 files changed, 159 insertions(+), 123 deletions(-)
--
2.51.0
^ permalink raw reply [flat|nested] 11+ messages in thread
* [PATCH v9 1/8] blk-wbt: factor out a helper wbt_set_lat()
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
@ 2026-02-02 8:05 ` Yu Kuai
2026-02-02 8:05 ` [PATCH v9 2/8] blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under q_usage_counter Yu Kuai
` (7 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
To move implementation details inside blk-wbt.c, prepare to fix possible
deadlock to call wbt_init() while queue is frozen in the next patch.
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
block/blk-sysfs.c | 39 ++----------------------------------
block/blk-wbt.c | 50 ++++++++++++++++++++++++++++++++++++++++++++---
block/blk-wbt.h | 7 ++-----
3 files changed, 51 insertions(+), 45 deletions(-)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e0a70d26972b..a580688c3ad5 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -636,11 +636,8 @@ static ssize_t queue_wb_lat_show(struct gendisk *disk, char *page)
static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
size_t count)
{
- struct request_queue *q = disk->queue;
- struct rq_qos *rqos;
ssize_t ret;
s64 val;
- unsigned int memflags;
ret = queue_var_store64(&val, page);
if (ret < 0)
@@ -648,40 +645,8 @@ static ssize_t queue_wb_lat_store(struct gendisk *disk, const char *page,
if (val < -1)
return -EINVAL;
- /*
- * Ensure that the queue is idled, in case the latency update
- * ends up either enabling or disabling wbt completely. We can't
- * have IO inflight if that happens.
- */
- memflags = blk_mq_freeze_queue(q);
-
- rqos = wbt_rq_qos(q);
- if (!rqos) {
- ret = wbt_init(disk);
- if (ret)
- goto out;
- }
-
- ret = count;
- if (val == -1)
- val = wbt_default_latency_nsec(q);
- else if (val >= 0)
- val *= 1000ULL;
-
- if (wbt_get_min_lat(q) == val)
- goto out;
-
- blk_mq_quiesce_queue(q);
-
- mutex_lock(&disk->rqos_state_mutex);
- wbt_set_min_lat(q, val);
- mutex_unlock(&disk->rqos_state_mutex);
-
- blk_mq_unquiesce_queue(q);
-out:
- blk_mq_unfreeze_queue(q, memflags);
-
- return ret;
+ ret = wbt_set_lat(disk, val);
+ return ret ? ret : count;
}
QUEUE_RW_ENTRY(queue_wb_lat, "wbt_lat_usec");
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 8e025834f2fb..0a37d97bda75 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -93,6 +93,8 @@ struct rq_wb {
struct rq_depth rq_depth;
};
+static int wbt_init(struct gendisk *disk);
+
static inline struct rq_wb *RQWB(struct rq_qos *rqos)
{
return container_of(rqos, struct rq_wb, rqos);
@@ -506,7 +508,7 @@ u64 wbt_get_min_lat(struct request_queue *q)
return RQWB(rqos)->min_lat_nsec;
}
-void wbt_set_min_lat(struct request_queue *q, u64 val)
+static void wbt_set_min_lat(struct request_queue *q, u64 val)
{
struct rq_qos *rqos = wbt_rq_qos(q);
if (!rqos)
@@ -741,7 +743,7 @@ void wbt_init_enable_default(struct gendisk *disk)
WARN_ON_ONCE(wbt_init(disk));
}
-u64 wbt_default_latency_nsec(struct request_queue *q)
+static u64 wbt_default_latency_nsec(struct request_queue *q)
{
/*
* We default to 2msec for non-rotational storage, and 75msec
@@ -901,7 +903,7 @@ static const struct rq_qos_ops wbt_rqos_ops = {
#endif
};
-int wbt_init(struct gendisk *disk)
+static int wbt_init(struct gendisk *disk)
{
struct request_queue *q = disk->queue;
struct rq_wb *rwb;
@@ -948,3 +950,45 @@ int wbt_init(struct gendisk *disk)
return ret;
}
+
+int wbt_set_lat(struct gendisk *disk, s64 val)
+{
+ struct request_queue *q = disk->queue;
+ unsigned int memflags;
+ struct rq_qos *rqos;
+ int ret = 0;
+
+ /*
+ * Ensure that the queue is idled, in case the latency update
+ * ends up either enabling or disabling wbt completely. We can't
+ * have IO inflight if that happens.
+ */
+ memflags = blk_mq_freeze_queue(q);
+
+ rqos = wbt_rq_qos(q);
+ if (!rqos) {
+ ret = wbt_init(disk);
+ if (ret)
+ goto out;
+ }
+
+ if (val == -1)
+ val = wbt_default_latency_nsec(q);
+ else if (val >= 0)
+ val *= 1000ULL;
+
+ if (wbt_get_min_lat(q) == val)
+ goto out;
+
+ blk_mq_quiesce_queue(q);
+
+ mutex_lock(&disk->rqos_state_mutex);
+ wbt_set_min_lat(q, val);
+ mutex_unlock(&disk->rqos_state_mutex);
+
+ blk_mq_unquiesce_queue(q);
+out:
+ blk_mq_unfreeze_queue(q, memflags);
+
+ return ret;
+}
diff --git a/block/blk-wbt.h b/block/blk-wbt.h
index 925f22475738..6e39da17218b 100644
--- a/block/blk-wbt.h
+++ b/block/blk-wbt.h
@@ -4,16 +4,13 @@
#ifdef CONFIG_BLK_WBT
-int wbt_init(struct gendisk *disk);
void wbt_init_enable_default(struct gendisk *disk);
void wbt_disable_default(struct gendisk *disk);
void wbt_enable_default(struct gendisk *disk);
u64 wbt_get_min_lat(struct request_queue *q);
-void wbt_set_min_lat(struct request_queue *q, u64 val);
-bool wbt_disabled(struct request_queue *);
-
-u64 wbt_default_latency_nsec(struct request_queue *);
+bool wbt_disabled(struct request_queue *q);
+int wbt_set_lat(struct gendisk *disk, s64 val);
#else
--
2.51.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v9 2/8] blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under q_usage_counter
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
2026-02-02 8:05 ` [PATCH v9 1/8] blk-wbt: factor out a helper wbt_set_lat() Yu Kuai
@ 2026-02-02 8:05 ` Yu Kuai
2026-02-02 8:05 ` [PATCH v9 3/8] blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos Yu Kuai
` (6 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
If wbt is disabled by default and user configures wbt by sysfs, queue
will be frozen first and then pcpu_alloc_mutex will be held in
blk_stat_alloc_callback().
Fix this problem by allocating memory first before queue frozen.
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-wbt.c | 108 ++++++++++++++++++++++++++++--------------------
1 file changed, 63 insertions(+), 45 deletions(-)
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 0a37d97bda75..665760274e60 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -93,7 +93,7 @@ struct rq_wb {
struct rq_depth rq_depth;
};
-static int wbt_init(struct gendisk *disk);
+static int wbt_init(struct gendisk *disk, struct rq_wb *rwb);
static inline struct rq_wb *RQWB(struct rq_qos *rqos)
{
@@ -698,6 +698,41 @@ static void wbt_requeue(struct rq_qos *rqos, struct request *rq)
}
}
+static int wbt_data_dir(const struct request *rq)
+{
+ const enum req_op op = req_op(rq);
+
+ if (op == REQ_OP_READ)
+ return READ;
+ else if (op_is_write(op))
+ return WRITE;
+
+ /* don't account */
+ return -1;
+}
+
+static struct rq_wb *wbt_alloc(void)
+{
+ struct rq_wb *rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
+
+ if (!rwb)
+ return NULL;
+
+ rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
+ if (!rwb->cb) {
+ kfree(rwb);
+ return NULL;
+ }
+
+ return rwb;
+}
+
+static void wbt_free(struct rq_wb *rwb)
+{
+ blk_stat_free_callback(rwb->cb);
+ kfree(rwb);
+}
+
/*
* Enable wbt if defaults are configured that way
*/
@@ -739,8 +774,17 @@ EXPORT_SYMBOL_GPL(wbt_enable_default);
void wbt_init_enable_default(struct gendisk *disk)
{
- if (__wbt_enable_default(disk))
- WARN_ON_ONCE(wbt_init(disk));
+ struct rq_wb *rwb;
+
+ if (!__wbt_enable_default(disk))
+ return;
+
+ rwb = wbt_alloc();
+ if (WARN_ON_ONCE(!rwb))
+ return;
+
+ if (WARN_ON_ONCE(wbt_init(disk, rwb)))
+ wbt_free(rwb);
}
static u64 wbt_default_latency_nsec(struct request_queue *q)
@@ -754,19 +798,6 @@ static u64 wbt_default_latency_nsec(struct request_queue *q)
return 2000000ULL;
}
-static int wbt_data_dir(const struct request *rq)
-{
- const enum req_op op = req_op(rq);
-
- if (op == REQ_OP_READ)
- return READ;
- else if (op_is_write(op))
- return WRITE;
-
- /* don't account */
- return -1;
-}
-
static void wbt_queue_depth_changed(struct rq_qos *rqos)
{
RQWB(rqos)->rq_depth.queue_depth = blk_queue_depth(rqos->disk->queue);
@@ -778,8 +809,7 @@ static void wbt_exit(struct rq_qos *rqos)
struct rq_wb *rwb = RQWB(rqos);
blk_stat_remove_callback(rqos->disk->queue, rwb->cb);
- blk_stat_free_callback(rwb->cb);
- kfree(rwb);
+ wbt_free(rwb);
}
/*
@@ -903,22 +933,11 @@ static const struct rq_qos_ops wbt_rqos_ops = {
#endif
};
-static int wbt_init(struct gendisk *disk)
+static int wbt_init(struct gendisk *disk, struct rq_wb *rwb)
{
struct request_queue *q = disk->queue;
- struct rq_wb *rwb;
- int i;
int ret;
-
- rwb = kzalloc(sizeof(*rwb), GFP_KERNEL);
- if (!rwb)
- return -ENOMEM;
-
- rwb->cb = blk_stat_alloc_callback(wb_timer_fn, wbt_data_dir, 2, rwb);
- if (!rwb->cb) {
- kfree(rwb);
- return -ENOMEM;
- }
+ int i;
for (i = 0; i < WBT_NUM_RWQ; i++)
rq_wait_init(&rwb->rq_wait[i]);
@@ -938,38 +957,38 @@ static int wbt_init(struct gendisk *disk)
ret = rq_qos_add(&rwb->rqos, disk, RQ_QOS_WBT, &wbt_rqos_ops);
mutex_unlock(&q->rq_qos_mutex);
if (ret)
- goto err_free;
+ return ret;
blk_stat_add_callback(q, rwb->cb);
-
return 0;
-
-err_free:
- blk_stat_free_callback(rwb->cb);
- kfree(rwb);
- return ret;
-
}
int wbt_set_lat(struct gendisk *disk, s64 val)
{
struct request_queue *q = disk->queue;
+ struct rq_qos *rqos = wbt_rq_qos(q);
+ struct rq_wb *rwb = NULL;
unsigned int memflags;
- struct rq_qos *rqos;
int ret = 0;
+ if (!rqos) {
+ rwb = wbt_alloc();
+ if (!rwb)
+ return -ENOMEM;
+ }
+
/*
* Ensure that the queue is idled, in case the latency update
* ends up either enabling or disabling wbt completely. We can't
* have IO inflight if that happens.
*/
memflags = blk_mq_freeze_queue(q);
-
- rqos = wbt_rq_qos(q);
if (!rqos) {
- ret = wbt_init(disk);
- if (ret)
+ ret = wbt_init(disk, rwb);
+ if (ret) {
+ wbt_free(rwb);
goto out;
+ }
}
if (val == -1)
@@ -989,6 +1008,5 @@ int wbt_set_lat(struct gendisk *disk, s64 val)
blk_mq_unquiesce_queue(q);
out:
blk_mq_unfreeze_queue(q, memflags);
-
return ret;
}
--
2.51.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v9 3/8] blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
2026-02-02 8:05 ` [PATCH v9 1/8] blk-wbt: factor out a helper wbt_set_lat() Yu Kuai
2026-02-02 8:05 ` [PATCH v9 2/8] blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under q_usage_counter Yu Kuai
@ 2026-02-02 8:05 ` Yu Kuai
2026-02-02 8:05 ` [PATCH v9 4/8] blk-rq-qos: fix possible debugfs_mutex deadlock Yu Kuai
` (5 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
There is already a helper blk_mq_debugfs_register_rqos() to register
one rqos, however this helper is called synchronously when the rqos is
created with queue frozen.
Prepare to fix possible deadlock to create blk-mq debugfs entries while
queue is still frozen.
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
block/blk-mq-debugfs.c | 23 +++++++++++++++--------
block/blk-mq-debugfs.h | 5 +++++
2 files changed, 20 insertions(+), 8 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 4896525b1c05..4fe164b6d648 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -631,14 +631,7 @@ void blk_mq_debugfs_register(struct request_queue *q)
blk_mq_debugfs_register_hctx(q, hctx);
}
- if (q->rq_qos) {
- struct rq_qos *rqos = q->rq_qos;
-
- while (rqos) {
- blk_mq_debugfs_register_rqos(rqos);
- rqos = rqos->next;
- }
- }
+ blk_mq_debugfs_register_rq_qos(q);
}
static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
@@ -769,6 +762,20 @@ void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
}
+void blk_mq_debugfs_register_rq_qos(struct request_queue *q)
+{
+ lockdep_assert_held(&q->debugfs_mutex);
+
+ if (q->rq_qos) {
+ struct rq_qos *rqos = q->rq_qos;
+
+ while (rqos) {
+ blk_mq_debugfs_register_rqos(rqos);
+ rqos = rqos->next;
+ }
+ }
+}
+
void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx)
{
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index c80e453e3014..54948a266889 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -33,6 +33,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
struct blk_mq_hw_ctx *hctx);
void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
+void blk_mq_debugfs_register_rq_qos(struct request_queue *q);
void blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
#else
@@ -78,6 +79,10 @@ static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
{
}
+static inline void blk_mq_debugfs_register_rq_qos(struct request_queue *q)
+{
+}
+
static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
{
}
--
2.51.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v9 4/8] blk-rq-qos: fix possible debugfs_mutex deadlock
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
` (2 preceding siblings ...)
2026-02-02 8:05 ` [PATCH v9 3/8] blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos Yu Kuai
@ 2026-02-02 8:05 ` Yu Kuai
2026-02-02 8:05 ` [PATCH v9 5/8] blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static Yu Kuai
` (4 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
Currently rq-qos debugfs entries are created from rq_qos_add(), while
rq_qos_add() can be called while queue is still frozen. This can
deadlock because creating new entries can trigger fs reclaim.
Fix this problem by delaying creating rq-qos debugfs entries after queue
is unfrozen.
- For wbt, 1) it can be initialized by default, fix it by calling new
helper after wbt_init() from wbt_init_enable_default(); 2) it can be
initialized by sysfs, fix it by calling new helper after queue is
unfrozen from wbt_set_lat().
- For iocost and iolatency, they can only be initialized by blkcg
configuration, however, they don't have debugfs entries for now, hence
they are not handled yet.
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
block/blk-rq-qos.c | 7 -------
block/blk-wbt.c | 13 ++++++++++++-
2 files changed, 12 insertions(+), 8 deletions(-)
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index 654478dfbc20..d7ce99ce2e80 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -347,13 +347,6 @@ int rq_qos_add(struct rq_qos *rqos, struct gendisk *disk, enum rq_qos_id id,
blk_queue_flag_set(QUEUE_FLAG_QOS_ENABLED, q);
blk_mq_unfreeze_queue(q, memflags);
-
- if (rqos->ops->debugfs_attrs) {
- mutex_lock(&q->debugfs_mutex);
- blk_mq_debugfs_register_rqos(rqos);
- mutex_unlock(&q->debugfs_mutex);
- }
-
return 0;
ebusy:
blk_mq_unfreeze_queue(q, memflags);
diff --git a/block/blk-wbt.c b/block/blk-wbt.c
index 665760274e60..1415f2bf8611 100644
--- a/block/blk-wbt.c
+++ b/block/blk-wbt.c
@@ -774,6 +774,7 @@ EXPORT_SYMBOL_GPL(wbt_enable_default);
void wbt_init_enable_default(struct gendisk *disk)
{
+ struct request_queue *q = disk->queue;
struct rq_wb *rwb;
if (!__wbt_enable_default(disk))
@@ -783,8 +784,14 @@ void wbt_init_enable_default(struct gendisk *disk)
if (WARN_ON_ONCE(!rwb))
return;
- if (WARN_ON_ONCE(wbt_init(disk, rwb)))
+ if (WARN_ON_ONCE(wbt_init(disk, rwb))) {
wbt_free(rwb);
+ return;
+ }
+
+ mutex_lock(&q->debugfs_mutex);
+ blk_mq_debugfs_register_rq_qos(q);
+ mutex_unlock(&q->debugfs_mutex);
}
static u64 wbt_default_latency_nsec(struct request_queue *q)
@@ -1008,5 +1015,9 @@ int wbt_set_lat(struct gendisk *disk, s64 val)
blk_mq_unquiesce_queue(q);
out:
blk_mq_unfreeze_queue(q, memflags);
+ mutex_lock(&q->debugfs_mutex);
+ blk_mq_debugfs_register_rq_qos(q);
+ mutex_unlock(&q->debugfs_mutex);
+
return ret;
}
--
2.51.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v9 5/8] blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
` (3 preceding siblings ...)
2026-02-02 8:05 ` [PATCH v9 4/8] blk-rq-qos: fix possible debugfs_mutex deadlock Yu Kuai
@ 2026-02-02 8:05 ` Yu Kuai
2026-02-02 8:05 ` [PATCH v9 6/8] blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos() Yu Kuai
` (3 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
Because it's only used inside blk-mq-debugfs.c now.
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
block/blk-mq-debugfs.c | 2 +-
block/blk-mq-debugfs.h | 5 -----
2 files changed, 1 insertion(+), 6 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 4fe164b6d648..11f00a868541 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -744,7 +744,7 @@ void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
rqos->debugfs_dir = NULL;
}
-void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
+static void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
{
struct request_queue *q = rqos->disk->queue;
const char *dir_name = rq_qos_id_to_name(rqos->id);
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index 54948a266889..d94daa66556b 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -34,7 +34,6 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
void blk_mq_debugfs_register_rq_qos(struct request_queue *q);
-void blk_mq_debugfs_register_rqos(struct rq_qos *rqos);
void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
#else
static inline void blk_mq_debugfs_register(struct request_queue *q)
@@ -75,10 +74,6 @@ static inline void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hc
{
}
-static inline void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
-{
-}
-
static inline void blk_mq_debugfs_register_rq_qos(struct request_queue *q)
{
}
--
2.51.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v9 6/8] blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos()
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
` (4 preceding siblings ...)
2026-02-02 8:05 ` [PATCH v9 5/8] blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static Yu Kuai
@ 2026-02-02 8:05 ` Yu Kuai
2026-02-02 8:05 ` [PATCH v9 7/8] blk-mq-debugfs: add missing debugfs_mutex in blk_mq_debugfs_register_hctxs() Yu Kuai
` (2 subsequent siblings)
8 siblings, 0 replies; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
Because this helper is only used by iocost and iolatency, while they
don't have debugfs entries.
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
block/blk-mq-debugfs.c | 10 ----------
block/blk-mq-debugfs.h | 4 ----
block/blk-rq-qos.c | 4 ----
3 files changed, 18 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 11f00a868541..22c182b40bc3 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -734,16 +734,6 @@ static const char *rq_qos_id_to_name(enum rq_qos_id id)
return "unknown";
}
-void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
-{
- lockdep_assert_held(&rqos->disk->queue->debugfs_mutex);
-
- if (!rqos->disk->queue->debugfs_dir)
- return;
- debugfs_remove_recursive(rqos->debugfs_dir);
- rqos->debugfs_dir = NULL;
-}
-
static void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
{
struct request_queue *q = rqos->disk->queue;
diff --git a/block/blk-mq-debugfs.h b/block/blk-mq-debugfs.h
index d94daa66556b..49bb1aaa83dc 100644
--- a/block/blk-mq-debugfs.h
+++ b/block/blk-mq-debugfs.h
@@ -34,7 +34,6 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
void blk_mq_debugfs_unregister_sched_hctx(struct blk_mq_hw_ctx *hctx);
void blk_mq_debugfs_register_rq_qos(struct request_queue *q);
-void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos);
#else
static inline void blk_mq_debugfs_register(struct request_queue *q)
{
@@ -78,9 +77,6 @@ static inline void blk_mq_debugfs_register_rq_qos(struct request_queue *q)
{
}
-static inline void blk_mq_debugfs_unregister_rqos(struct rq_qos *rqos)
-{
-}
#endif
#if defined(CONFIG_BLK_DEV_ZONED) && defined(CONFIG_BLK_DEBUG_FS)
diff --git a/block/blk-rq-qos.c b/block/blk-rq-qos.c
index d7ce99ce2e80..85cf74402a09 100644
--- a/block/blk-rq-qos.c
+++ b/block/blk-rq-qos.c
@@ -371,8 +371,4 @@ void rq_qos_del(struct rq_qos *rqos)
if (!q->rq_qos)
blk_queue_flag_clear(QUEUE_FLAG_QOS_ENABLED, q);
blk_mq_unfreeze_queue(q, memflags);
-
- mutex_lock(&q->debugfs_mutex);
- blk_mq_debugfs_unregister_rqos(rqos);
- mutex_unlock(&q->debugfs_mutex);
}
--
2.51.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v9 7/8] blk-mq-debugfs: add missing debugfs_mutex in blk_mq_debugfs_register_hctxs()
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
` (5 preceding siblings ...)
2026-02-02 8:05 ` [PATCH v9 6/8] blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos() Yu Kuai
@ 2026-02-02 8:05 ` Yu Kuai
2026-02-02 8:05 ` [PATCH v9 8/8] blk-mq-debugfs: warn about possible deadlock Yu Kuai
2026-02-02 15:21 ` [PATCH v9 0/8] blk-mq: fix possible deadlocks Jens Axboe
8 siblings, 0 replies; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
In blk_mq_update_nr_hw_queues(), debugfs_mutex is not held while
creating debugfs entries for hctxs. Hence add debugfs_mutex there,
it's safe because queue is not frozen.
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
block/blk-mq-debugfs.c | 2 ++
1 file changed, 2 insertions(+)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 22c182b40bc3..5c7cadf51a88 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -679,8 +679,10 @@ void blk_mq_debugfs_register_hctxs(struct request_queue *q)
struct blk_mq_hw_ctx *hctx;
unsigned long i;
+ mutex_lock(&q->debugfs_mutex);
queue_for_each_hw_ctx(q, hctx, i)
blk_mq_debugfs_register_hctx(q, hctx);
+ mutex_unlock(&q->debugfs_mutex);
}
void blk_mq_debugfs_unregister_hctxs(struct request_queue *q)
--
2.51.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* [PATCH v9 8/8] blk-mq-debugfs: warn about possible deadlock
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
` (6 preceding siblings ...)
2026-02-02 8:05 ` [PATCH v9 7/8] blk-mq-debugfs: add missing debugfs_mutex in blk_mq_debugfs_register_hctxs() Yu Kuai
@ 2026-02-02 8:05 ` Yu Kuai
2026-02-06 9:26 ` kernel test robot
2026-02-02 15:21 ` [PATCH v9 0/8] blk-mq: fix possible deadlocks Jens Axboe
8 siblings, 1 reply; 11+ messages in thread
From: Yu Kuai @ 2026-02-02 8:05 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke, yukuai
Creating new debugfs entries can trigger fs reclaim, hence we can't do
this with queue frozen, meanwhile, other locks that can be held while
queue is frozen should not be held as well.
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
Reviewed-by: Nilay Shroff <nilay@linux.ibm.com>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
block/blk-mq-debugfs.c | 31 ++++++++++++++++++++++++-------
1 file changed, 24 insertions(+), 7 deletions(-)
diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 5c7cadf51a88..faeaa1fc86a7 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -608,9 +608,23 @@ static const struct blk_mq_debugfs_attr blk_mq_debugfs_ctx_attrs[] = {
{},
};
-static void debugfs_create_files(struct dentry *parent, void *data,
+static void debugfs_create_files(struct request_queue *q, struct dentry *parent,
+ void *data,
const struct blk_mq_debugfs_attr *attr)
{
+ lockdep_assert_held(&q->debugfs_mutex);
+ /*
+ * Creating new debugfs entries with queue freezed has the risk of
+ * deadlock.
+ */
+ WARN_ON_ONCE(q->mq_freeze_depth != 0);
+ /*
+ * debugfs_mutex should not be nested under other locks that can be
+ * grabbed while queue is frozen.
+ */
+ lockdep_assert_not_held(&q->elevator_lock);
+ lockdep_assert_not_held(&q->rq_qos_mutex);
+
if (IS_ERR_OR_NULL(parent))
return;
@@ -624,7 +638,7 @@ void blk_mq_debugfs_register(struct request_queue *q)
struct blk_mq_hw_ctx *hctx;
unsigned long i;
- debugfs_create_files(q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
+ debugfs_create_files(q, q->debugfs_dir, q, blk_mq_debugfs_queue_attrs);
queue_for_each_hw_ctx(q, hctx, i) {
if (!hctx->debugfs_dir)
@@ -643,7 +657,8 @@ static void blk_mq_debugfs_register_ctx(struct blk_mq_hw_ctx *hctx,
snprintf(name, sizeof(name), "cpu%u", ctx->cpu);
ctx_dir = debugfs_create_dir(name, hctx->debugfs_dir);
- debugfs_create_files(ctx_dir, ctx, blk_mq_debugfs_ctx_attrs);
+ debugfs_create_files(hctx->queue, ctx_dir, ctx,
+ blk_mq_debugfs_ctx_attrs);
}
void blk_mq_debugfs_register_hctx(struct request_queue *q,
@@ -659,7 +674,8 @@ void blk_mq_debugfs_register_hctx(struct request_queue *q,
snprintf(name, sizeof(name), "hctx%u", hctx->queue_num);
hctx->debugfs_dir = debugfs_create_dir(name, q->debugfs_dir);
- debugfs_create_files(hctx->debugfs_dir, hctx, blk_mq_debugfs_hctx_attrs);
+ debugfs_create_files(q, hctx->debugfs_dir, hctx,
+ blk_mq_debugfs_hctx_attrs);
hctx_for_each_ctx(hctx, ctx, i)
blk_mq_debugfs_register_ctx(hctx, ctx);
@@ -712,7 +728,7 @@ void blk_mq_debugfs_register_sched(struct request_queue *q)
q->sched_debugfs_dir = debugfs_create_dir("sched", q->debugfs_dir);
- debugfs_create_files(q->sched_debugfs_dir, q, e->queue_debugfs_attrs);
+ debugfs_create_files(q, q->sched_debugfs_dir, q, e->queue_debugfs_attrs);
}
void blk_mq_debugfs_unregister_sched(struct request_queue *q)
@@ -751,7 +767,8 @@ static void blk_mq_debugfs_register_rqos(struct rq_qos *rqos)
q->debugfs_dir);
rqos->debugfs_dir = debugfs_create_dir(dir_name, q->rqos_debugfs_dir);
- debugfs_create_files(rqos->debugfs_dir, rqos, rqos->ops->debugfs_attrs);
+ debugfs_create_files(q, rqos->debugfs_dir, rqos,
+ rqos->ops->debugfs_attrs);
}
void blk_mq_debugfs_register_rq_qos(struct request_queue *q)
@@ -788,7 +805,7 @@ void blk_mq_debugfs_register_sched_hctx(struct request_queue *q,
hctx->sched_debugfs_dir = debugfs_create_dir("sched",
hctx->debugfs_dir);
- debugfs_create_files(hctx->sched_debugfs_dir, hctx,
+ debugfs_create_files(q, hctx->sched_debugfs_dir, hctx,
e->hctx_debugfs_attrs);
}
--
2.51.0
^ permalink raw reply related [flat|nested] 11+ messages in thread
* Re: [PATCH v9 0/8] blk-mq: fix possible deadlocks
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
` (7 preceding siblings ...)
2026-02-02 8:05 ` [PATCH v9 8/8] blk-mq-debugfs: warn about possible deadlock Yu Kuai
@ 2026-02-02 15:21 ` Jens Axboe
8 siblings, 0 replies; 11+ messages in thread
From: Jens Axboe @ 2026-02-02 15:21 UTC (permalink / raw)
To: Yu Kuai; +Cc: linux-block, linux-kernel, Ming Lei, Nilay Shroff,
Hannes Reinecke
On Mon, 02 Feb 2026 16:05:15 +0800, Yu Kuai wrote:
> Fix deadlocks:
> - patch 1-2, pcpu_alloc_mutex under q_usage_counter in blk-wbt;
> - patch 3-8, debugfs_mutex under q_usage_counter;
>
> Yu Kuai (8):
> blk-wbt: factor out a helper wbt_set_lat()
> blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under
> q_usage_counter
> blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos
> blk-rq-qos: fix possible debugfs_mutex deadlock
> blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static
> blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos()
> blk-mq-debugfs: add missing debugfs_mutex in
> blk_mq_debugfs_register_hctxs()
> blk-mq-debugfs: warn about possible deadlock
>
> [...]
Applied, thanks!
[1/8] blk-wbt: factor out a helper wbt_set_lat()
commit: 2751b90051a0211ed7c78f26eb2a9b7038804b9b
[2/8] blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under q_usage_counter
commit: 41afaeeda5099d9cd07eaa7dc6c3d20c6f1dd9e9
[3/8] blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos
commit: 3f0bea9f3b9e7d9bdc3794103575168ef007d309
[4/8] blk-rq-qos: fix possible debugfs_mutex deadlock
commit: 3c17a346ffc613615f48c6f1ed30cdf328bab805
[5/8] blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static
commit: 70bafa5e31ff979c4c38ac9838cc960a32c04f49
[6/8] blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos()
commit: 5ae4b12ee6422a816efca4ede8411e4d5503b5ac
[7/8] blk-mq-debugfs: add missing debugfs_mutex in blk_mq_debugfs_register_hctxs()
commit: 9d20fd6ce1ba9733cd5ac96fcab32faa9fc404dd
[8/8] blk-mq-debugfs: warn about possible deadlock
commit: 65d466b6298470ce21ab21ebfdb51309d515737d
Best regards,
--
Jens Axboe
^ permalink raw reply [flat|nested] 11+ messages in thread
* Re: [PATCH v9 8/8] blk-mq-debugfs: warn about possible deadlock
2026-02-02 8:05 ` [PATCH v9 8/8] blk-mq-debugfs: warn about possible deadlock Yu Kuai
@ 2026-02-06 9:26 ` kernel test robot
0 siblings, 0 replies; 11+ messages in thread
From: kernel test robot @ 2026-02-06 9:26 UTC (permalink / raw)
To: Yu Kuai
Cc: oe-lkp, lkp, Nilay Shroff, Ming Lei, Hannes Reinecke, linux-block,
Jens Axboe, linux-kernel, yukuai, oliver.sang
Hello,
kernel test robot noticed "blktests.nvme/040.fail" on:
commit: a228828b6a29e3787c2d4f30b966b4e723436491 ("[PATCH v9 8/8] blk-mq-debugfs: warn about possible deadlock")
url: https://github.com/intel-lab-lkp/linux/commits/Yu-Kuai/blk-wbt-factor-out-a-helper-wbt_set_lat/20260202-161435
base: https://git.kernel.org/cgit/linux/kernel/git/axboe/linux.git for-next
patch link: https://lore.kernel.org/all/20260202080523.3947504-9-yukuai@fnnas.com/
patch subject: [PATCH v9 8/8] blk-mq-debugfs: warn about possible deadlock
in testcase: blktests
version: blktests-x86_64-5885dee-1_20260203
with following parameters:
disk: 1SSD
test: nvme-040
nvme_trtype: rdma
use_siw: true
config: x86_64-rhel-9.4-func
compiler: gcc-14
test machine: 224 threads 2 sockets Intel(R) Xeon(R) Platinum 8480+ (Sapphire Rapids) with 256G memory
(please refer to attached dmesg/kmsg for entire log/backtrace)
If you fix the issue in a separate patch/commit (i.e. not just a new version of
the same patch/commit), kindly add following tags
| Reported-by: kernel test robot <oliver.sang@intel.com>
| Closes: https://lore.kernel.org/oe-lkp/202602061756.96736e8f-lkp@intel.com
2026-02-04 16:08:35 cd /lkp/benchmarks/blktests
2026-02-04 16:08:35 mkdir -p /mnt/nvme-040
2026-02-04 16:08:35 mount /dev/nvme0n1p1 /mnt/nvme-040
2026-02-04 16:08:35 echo nvme/040
2026-02-04 16:08:35 ./check -o /mnt/nvme-040 nvme/040
nvme/040 (tr=rdma) (test nvme fabrics controller reset/disconnect operation during I/O)
runtime 8.568s ...
nvme/040 (tr=rdma) (test nvme fabrics controller reset/disconnect operation during I/O) [failed]
runtime 8.568s ... 8.988s
something found in dmesg:
[ 161.504240] [ T3651] run blktests nvme/040 at 2026-02-04 16:08:36
[ 161.567237] [ T4057] loop0: detected capacity change from 0 to 2097152
[ 161.581589] [ T4062] nvmet: adding nsid 1 to subsystem blktests-subsystem-1
[ 161.598986] [ T4067] iwpm_register_pid: Unable to send a nlmsg (client = 2)
[ 161.608814] [ T4067] nvmet_rdma: enabling port 0 (192.168.3.121:4420)
[ 161.658405] [ T1863] nvmet: Created nvm controller 1 for subsystem blktests-subsystem-1 for NQN nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349.
[ 161.686065] [ T4074] nvme nvme2: creating 128 I/O queues.
[ 162.191928] [ T4074] nvme nvme2: mapped 128/0/0 default/read/poll queues.
[ 162.263779] [ T4074] nvme nvme2: new ctrl: NQN "blktests-subsystem-1", addr 192.168.3.121:4420, hostnqn: nqn.2014-08.org.nvmexpress:uuid:0f01fb42-9f7f-4856-b0b3-51e60b8de349
[ 162.358452] [ T4108] block nvme0n1: No UUID available providing old NGUID
...
(See '/mnt/nvme-040/nodev_tr_rdma/nvme/040.dmesg' for the entire message)
The kernel config and materials to reproduce are available at:
https://download.01.org/0day-ci/archive/20260206/202602061756.96736e8f-lkp@intel.com
--
0-DAY CI Kernel Test Service
https://github.com/intel/lkp-tests/wiki
^ permalink raw reply [flat|nested] 11+ messages in thread
end of thread, other threads:[~2026-02-06 9:27 UTC | newest]
Thread overview: 11+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2026-02-02 8:05 [PATCH v9 0/8] blk-mq: fix possible deadlocks Yu Kuai
2026-02-02 8:05 ` [PATCH v9 1/8] blk-wbt: factor out a helper wbt_set_lat() Yu Kuai
2026-02-02 8:05 ` [PATCH v9 2/8] blk-wbt: fix possible deadlock to nest pcpu_alloc_mutex under q_usage_counter Yu Kuai
2026-02-02 8:05 ` [PATCH v9 3/8] blk-mq-debugfs: factor out a helper to register debugfs for all rq_qos Yu Kuai
2026-02-02 8:05 ` [PATCH v9 4/8] blk-rq-qos: fix possible debugfs_mutex deadlock Yu Kuai
2026-02-02 8:05 ` [PATCH v9 5/8] blk-mq-debugfs: make blk_mq_debugfs_register_rqos() static Yu Kuai
2026-02-02 8:05 ` [PATCH v9 6/8] blk-mq-debugfs: remove blk_mq_debugfs_unregister_rqos() Yu Kuai
2026-02-02 8:05 ` [PATCH v9 7/8] blk-mq-debugfs: add missing debugfs_mutex in blk_mq_debugfs_register_hctxs() Yu Kuai
2026-02-02 8:05 ` [PATCH v9 8/8] blk-mq-debugfs: warn about possible deadlock Yu Kuai
2026-02-06 9:26 ` kernel test robot
2026-02-02 15:21 ` [PATCH v9 0/8] blk-mq: fix possible deadlocks Jens Axboe
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox