From: Nilay Shroff <nilay@linux.ibm.com>
To: linux-block@vger.kernel.org
Cc: hch@lst.de, ming.lei@redhat.com, dlemoal@kernel.org,
hare@suse.de, axboe@kernel.dk, gjoyce@ibm.com
Subject: [PATCHv3 4/7] block: Introduce a dedicated lock for protecting queue elevator updates
Date: Mon, 24 Feb 2025 19:00:55 +0530 [thread overview]
Message-ID: <20250224133102.1240146-5-nilay@linux.ibm.com> (raw)
In-Reply-To: <20250224133102.1240146-1-nilay@linux.ibm.com>
A queue's elevator can be updated either when modifying nr_hw_queues
or through the sysfs scheduler attribute. Currently, elevator switching/
updating is protected using q->sysfs_lock, but this has led to lockdep
splats[1] due to inconsistent lock ordering between q->sysfs_lock and
the freeze-lock in multiple block layer call sites.
As the scope of q->sysfs_lock is not well-defined, its (mis)use has
resulted in numerous lockdep warnings. To address this, introduce a new
q->elevator_lock, dedicated specifically for protecting elevator
switches/updates. And we'd now use this new q->elevator_lock instead of
q->sysfs_lock for protecting elevator switches/updates.
While at it, make elv_iosched_load_module() a static function, as it is
only called from elv_iosched_store(). Also, remove redundant parameters
from elv_iosched_load_module() function signature.
[1] https://lore.kernel.org/all/67637e70.050a0220.3157ee.000c.GAE@google.com/
Signed-off-by: Nilay Shroff <nilay@linux.ibm.com>
---
block/blk-core.c | 1 +
block/blk-mq.c | 15 +++++++--------
block/blk-sysfs.c | 32 ++++++++++++++++++++++----------
block/elevator.c | 35 ++++++++++++++++-------------------
block/elevator.h | 2 --
block/genhd.c | 9 ++++++---
include/linux/blkdev.h | 5 +++++
7 files changed, 57 insertions(+), 42 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index d6c4fa3943b5..362d0a55b07a 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -429,6 +429,7 @@ struct request_queue *blk_alloc_queue(struct queue_limits *lim, int node_id)
refcount_set(&q->refs, 1);
mutex_init(&q->debugfs_mutex);
+ mutex_init(&q->elevator_lock);
mutex_init(&q->sysfs_lock);
mutex_init(&q->limits_lock);
mutex_init(&q->rq_qos_mutex);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 40490ac88045..474beae6cff2 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -4467,7 +4467,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
unsigned long i, j;
/* protect against switching io scheduler */
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
for (i = 0; i < set->nr_hw_queues; i++) {
int old_node;
int node = blk_mq_get_hctx_node(set, i);
@@ -4500,7 +4500,7 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);
@@ -4933,10 +4933,9 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
if (!qe)
return false;
- /* q->elevator needs protection from ->sysfs_lock */
- mutex_lock(&q->sysfs_lock);
+ /* accessing q->elevator needs protection from ->elevator_lock */
+ mutex_lock(&q->elevator_lock);
- /* the check has to be done with holding sysfs_lock */
if (!q->elevator) {
kfree(qe);
goto unlock;
@@ -4950,7 +4949,7 @@ static bool blk_mq_elv_switch_none(struct list_head *head,
list_add(&qe->node, head);
elevator_disable(q);
unlock:
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
return true;
}
@@ -4980,11 +4979,11 @@ static void blk_mq_elv_switch_back(struct list_head *head,
list_del(&qe->node);
kfree(qe);
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
elevator_switch(q, t);
/* drop the reference acquired in blk_mq_elv_switch_none */
elevator_put(t);
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
}
static void __blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set,
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 83f78d2e1cd3..148b127e7f04 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -693,10 +693,15 @@ static struct attribute *blk_mq_queue_attrs[] = {
* attributes protected with q->sysfs_lock
*/
&queue_requests_entry.attr,
- &elv_iosched_entry.attr,
#ifdef CONFIG_BLK_WBT
&queue_wb_lat_entry.attr,
#endif
+ /*
+ * attributes which require some form of locking
+ * other than q->sysfs_lock
+ */
+ &elv_iosched_entry.attr,
+
/*
* attributes which don't require locking
*/
@@ -865,15 +870,19 @@ int blk_register_queue(struct gendisk *disk)
if (ret)
goto out_debugfs_remove;
+ ret = blk_crypto_sysfs_register(disk);
+ if (ret)
+ goto out_unregister_ia_ranges;
+
+ mutex_lock(&q->elevator_lock);
if (q->elevator) {
ret = elv_register_queue(q, false);
- if (ret)
- goto out_unregister_ia_ranges;
+ if (ret) {
+ mutex_unlock(&q->elevator_lock);
+ goto out_crypto_sysfs_unregister;
+ }
}
-
- ret = blk_crypto_sysfs_register(disk);
- if (ret)
- goto out_elv_unregister;
+ mutex_unlock(&q->elevator_lock);
blk_queue_flag_set(QUEUE_FLAG_REGISTERED, q);
wbt_enable_default(disk);
@@ -898,8 +907,8 @@ int blk_register_queue(struct gendisk *disk)
return ret;
-out_elv_unregister:
- elv_unregister_queue(q);
+out_crypto_sysfs_unregister:
+ blk_crypto_sysfs_unregister(disk);
out_unregister_ia_ranges:
disk_unregister_independent_access_ranges(disk);
out_debugfs_remove:
@@ -945,8 +954,11 @@ void blk_unregister_queue(struct gendisk *disk)
blk_mq_sysfs_unregister(disk);
blk_crypto_sysfs_unregister(disk);
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
elv_unregister_queue(q);
+ mutex_unlock(&q->elevator_lock);
+
+ mutex_lock(&q->sysfs_lock);
disk_unregister_independent_access_ranges(disk);
mutex_unlock(&q->sysfs_lock);
diff --git a/block/elevator.c b/block/elevator.c
index 041f1d983bc7..b4d08026b02c 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -457,7 +457,7 @@ int elv_register_queue(struct request_queue *q, bool uevent)
struct elevator_queue *e = q->elevator;
int error;
- lockdep_assert_held(&q->sysfs_lock);
+ lockdep_assert_held(&q->elevator_lock);
error = kobject_add(&e->kobj, &q->disk->queue_kobj, "iosched");
if (!error) {
@@ -481,7 +481,7 @@ void elv_unregister_queue(struct request_queue *q)
{
struct elevator_queue *e = q->elevator;
- lockdep_assert_held(&q->sysfs_lock);
+ lockdep_assert_held(&q->elevator_lock);
if (e && test_and_clear_bit(ELEVATOR_FLAG_REGISTERED, &e->flags)) {
kobject_uevent(&e->kobj, KOBJ_REMOVE);
@@ -618,7 +618,7 @@ int elevator_switch(struct request_queue *q, struct elevator_type *new_e)
unsigned int memflags;
int ret;
- lockdep_assert_held(&q->sysfs_lock);
+ lockdep_assert_held(&q->elevator_lock);
memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
@@ -655,7 +655,7 @@ void elevator_disable(struct request_queue *q)
{
unsigned int memflags;
- lockdep_assert_held(&q->sysfs_lock);
+ lockdep_assert_held(&q->elevator_lock);
memflags = blk_mq_freeze_queue(q);
blk_mq_quiesce_queue(q);
@@ -700,28 +700,23 @@ static int elevator_change(struct request_queue *q, const char *elevator_name)
return ret;
}
-void elv_iosched_load_module(struct gendisk *disk, const char *buf,
- size_t count)
+static void elv_iosched_load_module(char *elevator_name)
{
- char elevator_name[ELV_NAME_MAX];
struct elevator_type *found;
- const char *name;
-
- strscpy(elevator_name, buf, sizeof(elevator_name));
- name = strstrip(elevator_name);
spin_lock(&elv_list_lock);
- found = __elevator_find(name);
+ found = __elevator_find(elevator_name);
spin_unlock(&elv_list_lock);
if (!found)
- request_module("%s-iosched", name);
+ request_module("%s-iosched", elevator_name);
}
ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
size_t count)
{
char elevator_name[ELV_NAME_MAX];
+ char *name;
int ret;
unsigned int memflags;
struct request_queue *q = disk->queue;
@@ -731,16 +726,18 @@ ssize_t elv_iosched_store(struct gendisk *disk, const char *buf,
* queue to ensure that the module file can be read when the request
* queue is the one for the device storing the module file.
*/
- elv_iosched_load_module(disk, buf, count);
strscpy(elevator_name, buf, sizeof(elevator_name));
+ name = strstrip(elevator_name);
+
+ elv_iosched_load_module(name);
- mutex_lock(&q->sysfs_lock);
memflags = blk_mq_freeze_queue(q);
- ret = elevator_change(q, strstrip(elevator_name));
+ mutex_lock(&q->elevator_lock);
+ ret = elevator_change(q, name);
if (!ret)
ret = count;
+ mutex_unlock(&q->elevator_lock);
blk_mq_unfreeze_queue(q, memflags);
- mutex_unlock(&q->sysfs_lock);
return ret;
}
@@ -751,7 +748,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
struct elevator_type *cur = NULL, *e;
int len = 0;
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
if (!q->elevator) {
len += sprintf(name+len, "[none] ");
} else {
@@ -769,7 +766,7 @@ ssize_t elv_iosched_show(struct gendisk *disk, char *name)
spin_unlock(&elv_list_lock);
len += sprintf(name+len, "\n");
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
return len;
}
diff --git a/block/elevator.h b/block/elevator.h
index e526662c5dbb..e4e44dfac503 100644
--- a/block/elevator.h
+++ b/block/elevator.h
@@ -148,8 +148,6 @@ extern void elv_unregister(struct elevator_type *);
* io scheduler sysfs switching
*/
ssize_t elv_iosched_show(struct gendisk *disk, char *page);
-void elv_iosched_load_module(struct gendisk *disk, const char *page,
- size_t count);
ssize_t elv_iosched_store(struct gendisk *disk, const char *page, size_t count);
extern bool elv_bio_merge_ok(struct request *, struct bio *);
diff --git a/block/genhd.c b/block/genhd.c
index e9375e20d866..c2bd86cd09de 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -565,8 +565,11 @@ int __must_check add_disk_fwnode(struct device *parent, struct gendisk *disk,
if (disk->major == BLOCK_EXT_MAJOR)
blk_free_ext_minor(disk->first_minor);
out_exit_elevator:
- if (disk->queue->elevator)
+ if (disk->queue->elevator) {
+ mutex_lock(&disk->queue->elevator_lock);
elevator_exit(disk->queue);
+ mutex_unlock(&disk->queue->elevator_lock);
+ }
return ret;
}
EXPORT_SYMBOL_GPL(add_disk_fwnode);
@@ -742,9 +745,9 @@ void del_gendisk(struct gendisk *disk)
blk_mq_quiesce_queue(q);
if (q->elevator) {
- mutex_lock(&q->sysfs_lock);
+ mutex_lock(&q->elevator_lock);
elevator_exit(q);
- mutex_unlock(&q->sysfs_lock);
+ mutex_unlock(&q->elevator_lock);
}
rq_qos_exit(q);
blk_mq_unquiesce_queue(q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 248416ecd01c..22f4d3a700ae 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -560,6 +560,11 @@ struct request_queue {
struct blk_flush_queue *fq;
struct list_head flush_list;
+ /*
+ * protects elevator switch/update
+ */
+ struct mutex elevator_lock;
+
struct mutex sysfs_lock;
struct mutex limits_lock;
--
2.47.1
next prev parent reply other threads:[~2025-02-24 13:33 UTC|newest]
Thread overview: 21+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-02-24 13:30 [PATCHv3 0/7] block: fix lock order and remove redundant locking Nilay Shroff
2025-02-24 13:30 ` [PATCHv3 1/7] block: acquire q->limits_lock while reading sysfs attributes Nilay Shroff
2025-02-25 7:38 ` Hannes Reinecke
2025-02-24 13:30 ` [PATCHv3 2/7] block: move q->sysfs_lock and queue-freeze under show/store method Nilay Shroff
2025-02-24 16:31 ` Christoph Hellwig
2025-02-25 7:41 ` Hannes Reinecke
2025-02-24 13:30 ` [PATCHv3 3/7] block: remove q->sysfs_lock for attributes which don't need it Nilay Shroff
2025-02-25 7:46 ` Hannes Reinecke
2025-02-24 13:30 ` Nilay Shroff [this message]
2025-02-24 16:33 ` [PATCHv3 4/7] block: Introduce a dedicated lock for protecting queue elevator updates Christoph Hellwig
2025-02-25 13:28 ` Nilay Shroff
2025-02-25 7:49 ` Hannes Reinecke
2025-02-24 13:30 ` [PATCHv3 5/7] block: protect nr_requests update using q->elevator_lock Nilay Shroff
2025-02-25 7:50 ` Hannes Reinecke
2025-02-24 13:30 ` [PATCHv3 6/7] block: protect wbt_lat_usec " Nilay Shroff
2025-02-25 7:53 ` Hannes Reinecke
2025-02-25 10:05 ` Nilay Shroff
2025-02-24 13:30 ` [PATCHv3 7/7] block: protect read_ahead_kb using q->limits_lock Nilay Shroff
2025-02-25 7:58 ` Hannes Reinecke
2025-02-25 10:18 ` Nilay Shroff
2025-02-25 11:43 ` Hannes Reinecke
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250224133102.1240146-5-nilay@linux.ibm.com \
--to=nilay@linux.ibm.com \
--cc=axboe@kernel.dk \
--cc=dlemoal@kernel.org \
--cc=gjoyce@ibm.com \
--cc=hare@suse.de \
--cc=hch@lst.de \
--cc=linux-block@vger.kernel.org \
--cc=ming.lei@redhat.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox