From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>, linux-block@vger.kernel.org
Cc: "Nilay Shroff" <nilay@linux.ibm.com>,
"Shinichiro Kawasaki" <shinichiro.kawasaki@wdc.com>,
"Thomas Hellström" <thomas.hellstrom@linux.intel.com>,
"Christoph Hellwig" <hch@lst.de>,
"Ming Lei" <ming.lei@redhat.com>,
"Hannes Reinecke" <hare@suse.de>
Subject: [PATCH V5 21/25] block: move elv_register[unregister]_queue out of elevator_lock
Date: Mon, 5 May 2025 22:17:59 +0800 [thread overview]
Message-ID: <20250505141805.2751237-22-ming.lei@redhat.com> (raw)
In-Reply-To: <20250505141805.2751237-1-ming.lei@redhat.com>
Move elv_register[unregister]_queue out of ->elevator_lock & queue freezing,
so we can kill many lockdep warnings.
elv_register[unregister]_queue() is serialized, and just dealing with sysfs/
debugfs things, no need to be done with queue frozen:
- when it is called from adding disk, elevator switch isn't possible
because ->queue_kobj isn't added yet
- when it is called from deleting disk, disable_elv_switch() is
responsible for preventing new elevator switch and draining old
elevator switch.
- when it is called from blk_mq_update_nr_hw_queues(), adding/removing
disk and elevator switch can't be allowed or in-progress
With this change, elevator's ->exit() is called before calling
elv_unregister_queue, then user may call into ->show()/store() of elevator's
sysfs attributes, and we have covered this issue by adding `ELEVATOR_FLAG_DYNG`.
For blk-mq debugfs, hctx->sched_tags is always checked with ->elevator_lock by
debugfs code, meantime hctx->sched_tags is updated with ->elevator_lock, so
there isn't such issue.
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 3 +--
block/elevator.c | 68 ++++++++++++++++++++++++++++++++++++------------
2 files changed, 53 insertions(+), 18 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 29f67b0e1fd5..d1b5b75840eb 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -5043,11 +5043,10 @@ static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
blk_mq_debugfs_register_hctxs(q);
}
+ /* elv_update_nr_hw_queues() unfreeze queue for us */
list_for_each_entry(q, &set->tag_list, tag_set_list)
elv_update_nr_hw_queues(q);
- list_for_each_entry(q, &set->tag_list, tag_set_list)
- blk_mq_unfreeze_queue_nomemrestore(q);
memalloc_noio_restore(memflags);
/* Free the excess tags when nr_hw_queues shrink. */
diff --git a/block/elevator.c b/block/elevator.c
index f7e333abefe3..8578b969e173 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -49,6 +49,11 @@
struct elv_change_ctx {
const char *name;
bool no_uevent;
+
+ /* for unregistering old elevator */
+ struct elevator_queue *old;
+ /* for registering new elevator */
+ struct elevator_queue *new;
};
static DEFINE_SPINLOCK(elv_list_lock);
@@ -158,14 +163,14 @@ static void elevator_exit(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
+ lockdep_assert_held(&q->elevator_lock);
+
ioc_clear_queue(q);
blk_mq_sched_free_rqs(q);
mutex_lock(&e->sysfs_lock);
blk_mq_exit_sched(q, e);
mutex_unlock(&e->sysfs_lock);
-
- kobject_put(&e->kobj);
}
static inline void __elv_rqhash_del(struct request *rq)
@@ -466,8 +471,6 @@ static int elv_register_queue(struct request_queue *q,
{
int error;
- lockdep_assert_held(&q->elevator_lock);
-
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
if (!error) {
const struct elv_fs_entry *attr = e->type->elevator_attrs;
@@ -494,8 +497,6 @@ static int elv_register_queue(struct request_queue *q,
static void elv_unregister_queue(struct request_queue *q,
struct elevator_queue *e)
{
- lockdep_assert_held(&q->elevator_lock);
-
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
kobject_uevent(&e->kobj, KOBJ_REMOVE);
kobject_del(&e->kobj);
@@ -586,7 +587,7 @@ static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
blk_mq_quiesce_queue(q);
if (q->elevator) {
- elv_unregister_queue(q, q->elevator);
+ ctx->old = q->elevator;
elevator_exit(q);
}
@@ -594,11 +595,7 @@ static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
ret = blk_mq_init_sched(q, new_e);
if (ret)
goto out_unfreeze;
- ret = elv_register_queue(q, q->elevator, !ctx->no_uevent);
- if (ret) {
- elevator_exit(q);
- goto out_unfreeze;
- }
+ ctx->new = q->elevator;
} else {
blk_queue_flag_clear(QUEUE_FLAG_SQ_SCHED, q);
q->elevator = NULL;
@@ -619,6 +616,38 @@ static int elevator_switch(struct request_queue *q, struct elv_change_ctx *ctx)
return ret;
}
+static void elv_exit_and_release(struct request_queue *q)
+{
+ struct elevator_queue *e;
+ unsigned memflags;
+
+ memflags = blk_mq_freeze_queue(q);
+ mutex_lock(&q->elevator_lock);
+ e = q->elevator;
+ elevator_exit(q);
+ mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue(q, memflags);
+ if (e)
+ kobject_put(&e->kobj);
+}
+
+static int elevator_change_done(struct request_queue *q,
+ struct elv_change_ctx *ctx)
+{
+ int ret = 0;
+
+ if (ctx->old) {
+ elv_unregister_queue(q, ctx->old);
+ kobject_put(&ctx->old->kobj);
+ }
+ if (ctx->new) {
+ ret = elv_register_queue(q, ctx->new, !ctx->no_uevent);
+ if (ret)
+ elv_exit_and_release(q);
+ }
+ return ret;
+}
+
/*
* Switch this queue to the given IO scheduler.
*/
@@ -645,6 +674,9 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
ret = elevator_switch(q, ctx);
mutex_unlock(&q->elevator_lock);
blk_mq_unfreeze_queue(q, memflags);
+ if (!ret)
+ ret = elevator_change_done(q, ctx);
+
return ret;
}
@@ -654,18 +686,22 @@ static int elevator_change(struct request_queue *q, struct elv_change_ctx *ctx)
*/
void elv_update_nr_hw_queues(struct request_queue *q)
{
+ struct elv_change_ctx ctx = {};
+ int ret = -ENODEV;
+
WARN_ON_ONCE(q->mq_freeze_depth == 0);
mutex_lock(&q->elevator_lock);
if (q->elevator && !blk_queue_dying(q) && !blk_queue_registered(q)) {
- struct elv_change_ctx ctx = {
- .name = q->elevator->type->elevator_name,
- };
+ ctx.name = q->elevator->type->elevator_name;
/* force to reattach elevator after nr_hw_queue is updated */
- elevator_switch(q, &ctx);
+ ret = elevator_switch(q, &ctx);
}
mutex_unlock(&q->elevator_lock);
+ blk_mq_unfreeze_queue_nomemrestore(q);
+ if (!ret)
+ WARN_ON_ONCE(elevator_change_done(q, &ctx));
}
/*
--
2.47.0
next prev parent reply other threads:[~2025-05-05 14:19 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-05 14:17 [PATCH V5 00/25] block: unify elevator changing and fix lockdep warning Ming Lei
2025-05-05 14:17 ` [PATCH V5 01/25] block: move blk_mq_add_queue_tag_set() after blk_mq_map_swqueue() Ming Lei
2025-05-05 14:17 ` [PATCH V5 02/25] block: move ELEVATOR_FLAG_DISABLE_WBT a request queue flag Ming Lei
2025-05-05 14:17 ` [PATCH V5 03/25] block: don't call freeze queue in elevator_switch() and elevator_disable() Ming Lei
2025-05-05 14:17 ` [PATCH V5 04/25] block: use q->elevator with ->elevator_lock held in elv_iosched_show() Ming Lei
2025-05-05 14:17 ` [PATCH V5 05/25] block: add two helpers for registering/un-registering sched debugfs Ming Lei
2025-05-05 14:17 ` [PATCH V5 06/25] block: move sched debugfs register into elvevator_register_queue Ming Lei
2025-05-05 14:17 ` [PATCH V5 07/25] block: add helper add_disk_final() Ming Lei
2025-05-06 4:40 ` Christoph Hellwig
2025-05-06 7:43 ` Nilay Shroff
2025-05-06 11:02 ` Hannes Reinecke
2025-05-05 14:17 ` [PATCH V5 08/25] block: prevent adding/deleting disk during updating nr_hw_queues Ming Lei
2025-05-05 14:17 ` [PATCH V5 09/25] block: don't allow to switch elevator if updating nr_hw_queues is in-progress Ming Lei
2025-05-06 4:41 ` Christoph Hellwig
2025-05-06 6:26 ` Nilay Shroff
2025-05-05 14:17 ` [PATCH V5 10/25] block: look up the elevator type in elevator_switch Ming Lei
2025-05-05 14:17 ` [PATCH V5 11/25] block: fold elevator_disable into elevator_switch Ming Lei
2025-05-05 14:17 ` [PATCH V5 12/25] block: move blk_queue_registered() check into elv_iosched_store() Ming Lei
2025-05-06 4:41 ` Christoph Hellwig
2025-05-06 7:47 ` Nilay Shroff
2025-05-05 14:17 ` [PATCH V5 13/25] block: simplify elevator reattachment for updating nr_hw_queues Ming Lei
2025-05-05 14:17 ` [PATCH V5 14/25] block: move queue freezing & elevator_lock into elevator_change() Ming Lei
2025-05-05 14:17 ` [PATCH V5 15/25] block: add `struct elv_change_ctx` for unifying elevator change Ming Lei
2025-05-06 4:42 ` Christoph Hellwig
2025-05-05 14:17 ` [PATCH V5 16/25] block: " Ming Lei
2025-05-05 14:17 ` [PATCH V5 17/25] block: pass elevator_queue to elv_register_queue & unregister_queue Ming Lei
2025-05-05 14:17 ` [PATCH V5 18/25] block: remove elevator queue's type check in elv_attr_show/store() Ming Lei
2025-05-05 14:17 ` [PATCH V5 19/25] block: fail to show/store elevator sysfs attribute if elevator is dying Ming Lei
2025-05-06 11:09 ` Hannes Reinecke
2025-05-05 14:17 ` [PATCH V5 20/25] block: add new helper for disabling elevator switch when deleting disk Ming Lei
2025-05-06 6:32 ` Nilay Shroff
2025-05-05 14:17 ` Ming Lei [this message]
2025-05-06 4:43 ` [PATCH V5 21/25] block: move elv_register[unregister]_queue out of elevator_lock Christoph Hellwig
2025-05-06 6:36 ` Nilay Shroff
2025-05-05 14:18 ` [PATCH V5 22/25] block: move hctx debugfs/sysfs registering out of freezing queue Ming Lei
2025-05-05 14:18 ` [PATCH V5 23/25] block: don't acquire ->elevator_lock in blk_mq_map_swqueue and blk_mq_realloc_hw_ctxs Ming Lei
2025-05-06 4:44 ` Christoph Hellwig
2025-05-05 14:18 ` [PATCH V5 24/25] block: move hctx cpuhp add/del out of queue freezing Ming Lei
2025-05-06 4:44 ` Christoph Hellwig
2025-05-05 14:18 ` [PATCH V5 25/25] block: move wbt_enable_default() out of queue freezing from sched ->exit() Ming Lei
2025-05-06 4:44 ` Christoph Hellwig
2025-05-06 13:48 ` [PATCH V5 00/25] block: unify elevator changing and fix lockdep warning Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250505141805.2751237-22-ming.lei@redhat.com \
--to=ming.lei@redhat.com \
--cc=axboe@kernel.dk \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=linux-block@vger.kernel.org \
--cc=nilay@linux.ibm.com \
--cc=shinichiro.kawasaki@wdc.com \
--cc=thomas.hellstrom@linux.intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox