From: Christoph Hellwig <hch@lst.de>
To: Jens Axboe <axboe@kernel.dk>, Keith Busch <kbusch@kernel.org>,
Sagi Grimberg <sagi@grimberg.me>, Chao Leng <lengchao@huawei.com>
Cc: Ming Lei <ming.lei@redhat.com>,
linux-nvme@lists.infradead.org, linux-block@vger.kernel.org
Subject: [PATCH 3/8] blk-mq: move the srcu_struct used for quiescing to the tagset
Date: Thu, 20 Oct 2022 12:56:03 +0200 [thread overview]
Message-ID: <20221020105608.1581940-4-hch@lst.de> (raw)
In-Reply-To: <20221020105608.1581940-1-hch@lst.de>
All I/O submissions have fairly similar latencies, and a tagset-wide
quiesce is a fairly common operation. Becuase there are a lot less
tagsets there is also no need for the variable size allocation trick.
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
block/blk-core.c | 27 +++++----------------------
block/blk-mq.c | 25 +++++++++++++++++--------
block/blk-mq.h | 14 +++++++-------
block/blk-sysfs.c | 9 ++-------
block/blk.h | 9 +--------
block/genhd.c | 2 +-
include/linux/blk-mq.h | 4 ++++
include/linux/blkdev.h | 9 ---------
8 files changed, 37 insertions(+), 62 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 17667159482e0..3a2ed8dadf738 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -65,7 +65,6 @@ DEFINE_IDA(blk_queue_ida);
* For queue allocation
*/
struct kmem_cache *blk_requestq_cachep;
-struct kmem_cache *blk_requestq_srcu_cachep;
/*
* Controlling structure to kblockd
@@ -373,26 +372,20 @@ static void blk_timeout_work(struct work_struct *work)
{
}
-struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
+struct request_queue *blk_alloc_queue(int node_id)
{
struct request_queue *q;
- q = kmem_cache_alloc_node(blk_get_queue_kmem_cache(alloc_srcu),
- GFP_KERNEL | __GFP_ZERO, node_id);
+ q = kmem_cache_alloc_node(blk_requestq_cachep, GFP_KERNEL | __GFP_ZERO,
+ node_id);
if (!q)
return NULL;
- if (alloc_srcu) {
- blk_queue_flag_set(QUEUE_FLAG_HAS_SRCU, q);
- if (init_srcu_struct(q->srcu) != 0)
- goto fail_q;
- }
-
q->last_merge = NULL;
q->id = ida_alloc(&blk_queue_ida, GFP_KERNEL);
if (q->id < 0)
- goto fail_srcu;
+ goto fail_q;
q->stats = blk_alloc_queue_stats();
if (!q->stats)
@@ -435,11 +428,8 @@ struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu)
blk_free_queue_stats(q->stats);
fail_id:
ida_free(&blk_queue_ida, q->id);
-fail_srcu:
- if (alloc_srcu)
- cleanup_srcu_struct(q->srcu);
fail_q:
- kmem_cache_free(blk_get_queue_kmem_cache(alloc_srcu), q);
+ kmem_cache_free(blk_requestq_cachep, q);
return NULL;
}
@@ -1184,9 +1174,6 @@ int __init blk_dev_init(void)
sizeof_field(struct request, cmd_flags));
BUILD_BUG_ON(REQ_OP_BITS + REQ_FLAG_BITS > 8 *
sizeof_field(struct bio, bi_opf));
- BUILD_BUG_ON(ALIGN(offsetof(struct request_queue, srcu),
- __alignof__(struct request_queue)) !=
- sizeof(struct request_queue));
/* used for unplugging and affects IO latency/throughput - HIGHPRI */
kblockd_workqueue = alloc_workqueue("kblockd",
@@ -1197,10 +1184,6 @@ int __init blk_dev_init(void)
blk_requestq_cachep = kmem_cache_create("request_queue",
sizeof(struct request_queue), 0, SLAB_PANIC, NULL);
- blk_requestq_srcu_cachep = kmem_cache_create("request_queue_srcu",
- sizeof(struct request_queue) +
- sizeof(struct srcu_struct), 0, SLAB_PANIC, NULL);
-
blk_debugfs_root = debugfs_create_dir("block", NULL);
return 0;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index df967c8af9fee..4a81a2da43328 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -261,8 +261,8 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue_nowait);
*/
void blk_mq_wait_quiesce_done(struct request_queue *q)
{
- if (blk_queue_has_srcu(q))
- synchronize_srcu(q->srcu);
+ if (q->tag_set->flags & BLK_MQ_F_BLOCKING)
+ synchronize_srcu(&q->tag_set->srcu);
else
synchronize_rcu();
}
@@ -3971,7 +3971,7 @@ static struct request_queue *blk_mq_init_queue_data(struct blk_mq_tag_set *set,
struct request_queue *q;
int ret;
- q = blk_alloc_queue(set->numa_node, set->flags & BLK_MQ_F_BLOCKING);
+ q = blk_alloc_queue(set->numa_node);
if (!q)
return ERR_PTR(-ENOMEM);
q->queuedata = queuedata;
@@ -4138,9 +4138,6 @@ static void blk_mq_update_poll_flag(struct request_queue *q)
int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
struct request_queue *q)
{
- WARN_ON_ONCE(blk_queue_has_srcu(q) !=
- !!(set->flags & BLK_MQ_F_BLOCKING));
-
/* mark the queue as mq asap */
q->mq_ops = set->ops;
@@ -4398,9 +4395,16 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
*/
if (set->nr_maps == 1 && set->nr_hw_queues > nr_cpu_ids)
set->nr_hw_queues = nr_cpu_ids;
+
+ if (set->flags & BLK_MQ_F_BLOCKING) {
+ ret = init_srcu_struct(&set->srcu);
+ if (ret)
+ return ret;
+ }
- if (blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues) < 0)
- return -ENOMEM;
+ ret = blk_mq_alloc_tag_set_tags(set, set->nr_hw_queues);
+ if (ret)
+ goto out_free_srcu;
ret = -ENOMEM;
for (i = 0; i < set->nr_maps; i++) {
@@ -4430,6 +4434,9 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
}
kfree(set->tags);
set->tags = NULL;
+out_free_srcu:
+ if (set->flags & BLK_MQ_F_BLOCKING)
+ cleanup_srcu_struct(&set->srcu);
return ret;
}
EXPORT_SYMBOL(blk_mq_alloc_tag_set);
@@ -4469,6 +4476,8 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
kfree(set->tags);
set->tags = NULL;
+ if (set->flags & BLK_MQ_F_BLOCKING)
+ cleanup_srcu_struct(&set->srcu);
}
EXPORT_SYMBOL(blk_mq_free_tag_set);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 0b2870839cdd6..06eb46d1d7a76 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -377,17 +377,17 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
/* run the code block in @dispatch_ops with rcu/srcu read lock held */
#define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops) \
do { \
- if (!blk_queue_has_srcu(q)) { \
- rcu_read_lock(); \
- (dispatch_ops); \
- rcu_read_unlock(); \
- } else { \
+ if ((q)->tag_set->flags & BLK_MQ_F_BLOCKING) { \
int srcu_idx; \
\
might_sleep_if(check_sleep); \
- srcu_idx = srcu_read_lock((q)->srcu); \
+ srcu_idx = srcu_read_lock(&((q)->tag_set->srcu)); \
(dispatch_ops); \
- srcu_read_unlock((q)->srcu, srcu_idx); \
+ srcu_read_unlock(&((q)->tag_set->srcu), srcu_idx); \
+ } else { \
+ rcu_read_lock(); \
+ (dispatch_ops); \
+ rcu_read_unlock(); \
} \
} while (0)
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index e71b3b43927c0..e7871665825a3 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -739,10 +739,8 @@ queue_attr_store(struct kobject *kobj, struct attribute *attr,
static void blk_free_queue_rcu(struct rcu_head *rcu_head)
{
- struct request_queue *q = container_of(rcu_head, struct request_queue,
- rcu_head);
-
- kmem_cache_free(blk_get_queue_kmem_cache(blk_queue_has_srcu(q)), q);
+ kmem_cache_free(blk_requestq_cachep,
+ container_of(rcu_head, struct request_queue, rcu_head));
}
/**
@@ -779,9 +777,6 @@ static void blk_release_queue(struct kobject *kobj)
if (queue_is_mq(q))
blk_mq_release(q);
- if (blk_queue_has_srcu(q))
- cleanup_srcu_struct(q->srcu);
-
ida_free(&blk_queue_ida, q->id);
call_rcu(&q->rcu_head, blk_free_queue_rcu);
}
diff --git a/block/blk.h b/block/blk.h
index 5350bf363035e..b25e2d22f3725 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -27,7 +27,6 @@ struct blk_flush_queue {
};
extern struct kmem_cache *blk_requestq_cachep;
-extern struct kmem_cache *blk_requestq_srcu_cachep;
extern struct kobj_type blk_queue_ktype;
extern struct ida blk_queue_ida;
@@ -420,13 +419,7 @@ int bio_add_hw_page(struct request_queue *q, struct bio *bio,
struct page *page, unsigned int len, unsigned int offset,
unsigned int max_sectors, bool *same_page);
-static inline struct kmem_cache *blk_get_queue_kmem_cache(bool srcu)
-{
- if (srcu)
- return blk_requestq_srcu_cachep;
- return blk_requestq_cachep;
-}
-struct request_queue *blk_alloc_queue(int node_id, bool alloc_srcu);
+struct request_queue *blk_alloc_queue(int node_id);
int disk_scan_partitions(struct gendisk *disk, fmode_t mode);
diff --git a/block/genhd.c b/block/genhd.c
index 2877b5f905579..fd0b13d6175a3 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1410,7 +1410,7 @@ struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass)
struct request_queue *q;
struct gendisk *disk;
- q = blk_alloc_queue(node, false);
+ q = blk_alloc_queue(node);
if (!q)
return NULL;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ba18e9bdb799b..f040a7cab5dbf 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -7,6 +7,7 @@
#include <linux/lockdep.h>
#include <linux/scatterlist.h>
#include <linux/prefetch.h>
+#include <linux/srcu.h>
struct blk_mq_tags;
struct blk_flush_queue;
@@ -501,6 +502,8 @@ enum hctx_type {
* @tag_list_lock: Serializes tag_list accesses.
* @tag_list: List of the request queues that use this tag set. See also
* request_queue.tag_set_list.
+ * @srcu: Use as lock when type of the request queue is blocking
+ * (BLK_MQ_F_BLOCKING). Must be the last member
*/
struct blk_mq_tag_set {
struct blk_mq_queue_map map[HCTX_MAX_TYPES];
@@ -521,6 +524,7 @@ struct blk_mq_tag_set {
struct mutex tag_list_lock;
struct list_head tag_list;
+ struct srcu_struct srcu;
};
/**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50e358a19d986..b15b6a011c028 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -22,7 +22,6 @@
#include <linux/blkzoned.h>
#include <linux/sched.h>
#include <linux/sbitmap.h>
-#include <linux/srcu.h>
#include <linux/uuid.h>
#include <linux/xarray.h>
@@ -543,18 +542,11 @@ struct request_queue {
struct mutex debugfs_mutex;
bool mq_sysfs_init_done;
-
- /**
- * @srcu: Sleepable RCU. Use as lock when type of the request queue
- * is blocking (BLK_MQ_F_BLOCKING). Must be the last member
- */
- struct srcu_struct srcu[];
};
/* Keep blk_queue_flag_name[] in sync with the definitions below */
#define QUEUE_FLAG_STOPPED 0 /* queue is stopped */
#define QUEUE_FLAG_DYING 1 /* queue being torn down */
-#define QUEUE_FLAG_HAS_SRCU 2 /* SRCU is allocated */
#define QUEUE_FLAG_NOMERGES 3 /* disable merge attempts */
#define QUEUE_FLAG_SAME_COMP 4 /* complete on same CPU-group */
#define QUEUE_FLAG_FAIL_IO 5 /* fake timeout */
@@ -590,7 +582,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
#define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
-#define blk_queue_has_srcu(q) test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
#define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
#define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
#define blk_queue_noxmerges(q) \
--
2.30.2
next prev parent reply other threads:[~2022-10-20 12:00 UTC|newest]
Thread overview: 61+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-10-20 10:56 per-tagset SRCU struct and quiesce Christoph Hellwig
2022-10-20 10:56 ` [PATCH 1/8] block: set the disk capacity to 0 in blk_mark_disk_dead Christoph Hellwig
2022-10-20 13:16 ` Sagi Grimberg
2022-10-21 1:09 ` Ming Lei
2022-10-21 13:11 ` Christoph Hellwig
2022-10-21 1:53 ` Chao Leng
2022-10-21 6:49 ` Hannes Reinecke
2022-10-21 13:13 ` Christoph Hellwig
2022-10-21 21:12 ` Bart Van Assche
2022-10-20 10:56 ` [PATCH 2/8] blk-mq: skip non-mq queues in blk_mq_quiesce_queue Christoph Hellwig
2022-10-20 13:16 ` Sagi Grimberg
2022-10-21 1:13 ` Ming Lei
2022-10-21 13:19 ` Christoph Hellwig
2022-10-21 15:08 ` Ming Lei
2022-10-21 2:47 ` Chao Leng
2022-10-21 3:16 ` Chao Leng
2022-10-21 6:49 ` Hannes Reinecke
2022-10-20 10:56 ` Christoph Hellwig [this message]
2022-10-20 13:23 ` [PATCH 3/8] blk-mq: move the srcu_struct used for quiescing to the tagset Sagi Grimberg
2022-10-20 17:26 ` Keith Busch
2022-10-21 13:20 ` Christoph Hellwig
2022-10-21 1:41 ` Ming Lei
2022-10-21 2:49 ` Chao Leng
2022-10-21 6:50 ` Hannes Reinecke
2022-10-21 7:16 ` Chao Leng
2022-10-21 13:22 ` Christoph Hellwig
2022-10-20 10:56 ` [PATCH 4/8] blk-mq: pass a tagset to blk_mq_wait_quiesce_done Christoph Hellwig
2022-10-20 13:23 ` Sagi Grimberg
2022-10-21 1:46 ` Ming Lei
2022-10-21 13:23 ` Christoph Hellwig
2022-10-21 2:49 ` Chao Leng
2022-10-21 6:50 ` Hannes Reinecke
2022-10-21 21:18 ` Bart Van Assche
2022-10-20 10:56 ` [PATCH 5/8] blk-mq: add tagset quiesce interface Christoph Hellwig
2022-10-20 13:24 ` Sagi Grimberg
2022-10-21 1:53 ` Ming Lei
2022-10-21 2:49 ` Chao Leng
2022-10-21 6:51 ` Hannes Reinecke
2022-10-21 21:22 ` Bart Van Assche
2022-10-24 1:57 ` Chao Leng
2022-10-24 13:35 ` Bart Van Assche
2022-10-25 1:38 ` Chao Leng
2022-10-20 10:56 ` [PATCH 6/8] nvme: move the NS_DEAD flag to the controller Christoph Hellwig
2022-10-20 13:30 ` Sagi Grimberg
2022-10-21 13:28 ` Christoph Hellwig
2022-10-24 8:43 ` Sagi Grimberg
2022-10-24 8:50 ` Sagi Grimberg
2022-10-21 2:49 ` Chao Leng
2022-10-21 6:51 ` Hannes Reinecke
2022-10-20 10:56 ` [PATCH 7/8] nvme: remove nvme_set_queue_dying Christoph Hellwig
2022-10-20 13:10 ` Sagi Grimberg
2022-10-21 13:29 ` Christoph Hellwig
2022-10-24 8:48 ` Sagi Grimberg
2022-10-21 2:50 ` Chao Leng
2022-10-21 6:52 ` Hannes Reinecke
2022-10-20 10:56 ` [PATCH 8/8] nvme: use blk_mq_[un]quiesce_tagset Christoph Hellwig
2022-10-20 13:35 ` Sagi Grimberg
2022-10-21 2:50 ` Chao Leng
2022-10-21 6:52 ` Hannes Reinecke
2022-10-20 13:16 ` per-tagset SRCU struct and quiesce Sagi Grimberg
2022-10-21 18:06 ` Keith Busch
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221020105608.1581940-4-hch@lst.de \
--to=hch@lst.de \
--cc=axboe@kernel.dk \
--cc=kbusch@kernel.org \
--cc=lengchao@huawei.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-nvme@lists.infradead.org \
--cc=ming.lei@redhat.com \
--cc=sagi@grimberg.me \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox