From: Jens Axboe <axboe@kernel.dk>
To: linux-block@vger.kernel.org
Cc: Bart Van Assche <bvanassche@acm.org>, Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 4/8] block/mq-deadline: use separate insertion lists
Date: Tue, 23 Jan 2024 10:34:16 -0700 [thread overview]
Message-ID: <20240123174021.1967461-5-axboe@kernel.dk> (raw)
In-Reply-To: <20240123174021.1967461-1-axboe@kernel.dk>
From: Bart Van Assche <bvanassche@acm.org>
Reduce lock contention on dd->lock by calling dd_insert_request() from
inside the dispatch callback instead of from the insert callback. This
patch is inspired by a patch from Jens.
With the previous dispatch and merge optimization, this drastically
reduces contention for a sample cases of 32 threads doing IO to devices.
The test case looks as follows:
fio --bs=512 --group_reporting=1 --gtod_reduce=1 --invalidate=1 \
--ioengine=io_uring --norandommap --runtime=60 --rw=randread \
--thread --time_based=1 --buffered=0 --fixedbufs=1 --numjobs=32 \
--iodepth=4 --iodepth_batch_submit=4 --iodepth_batch_complete=4 \
--name=scaletest --filename=/dev/$DEV
Before:
Device IOPS sys contention diff
====================================================
null_blk 879K 89% 93.6%
nvme0n1 901K 86% 94.5%
and after this and the previous two patches:
Device IOPS sys contention diff
====================================================
null_blk 2867K 11.1% ~6.0% +226%
nvme0n1 3162K 9.9% ~5.0% +250%
which basically eliminates all of the lock contention, it's down to
more normal levels. The throughput increases show that nicely, with more
than a 200% improvement for both cases.
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
[axboe: expand commit message with more details and perf results]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
block/mq-deadline.c | 66 ++++++++++++++++++++++++++++++++++++---------
1 file changed, 53 insertions(+), 13 deletions(-)
diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 740b94f36cac..1b0de4fc3958 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -89,11 +89,15 @@ struct deadline_data {
*/
struct {
spinlock_t lock;
+ spinlock_t insert_lock;
spinlock_t zone_lock;
} ____cacheline_aligned_in_smp;
unsigned long run_state;
+ struct list_head at_head;
+ struct list_head at_tail;
+
struct dd_per_prio per_prio[DD_PRIO_COUNT];
/* Data direction of latest dispatched request. */
@@ -120,6 +124,9 @@ static const enum dd_prio ioprio_class_to_prio[] = {
[IOPRIO_CLASS_IDLE] = DD_IDLE_PRIO,
};
+static void dd_insert_request(struct request_queue *q, struct request *rq,
+ blk_insert_t flags, struct list_head *free);
+
static inline struct rb_root *
deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
{
@@ -592,6 +599,33 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
return NULL;
}
+static void __dd_do_insert(struct request_queue *q, blk_insert_t flags,
+ struct list_head *list, struct list_head *free)
+{
+ while (!list_empty(list)) {
+ struct request *rq;
+
+ rq = list_first_entry(list, struct request, queuelist);
+ list_del_init(&rq->queuelist);
+ dd_insert_request(q, rq, flags, free);
+ }
+}
+
+static void dd_do_insert(struct request_queue *q, struct list_head *free)
+{
+ struct deadline_data *dd = q->elevator->elevator_data;
+ LIST_HEAD(at_head);
+ LIST_HEAD(at_tail);
+
+ spin_lock(&dd->insert_lock);
+ list_splice_init(&dd->at_head, &at_head);
+ list_splice_init(&dd->at_tail, &at_tail);
+ spin_unlock(&dd->insert_lock);
+
+ __dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
+ __dd_do_insert(q, 0, &at_tail, free);
+}
+
/*
* Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
*
@@ -602,10 +636,12 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
*/
static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
{
- struct deadline_data *dd = hctx->queue->elevator->elevator_data;
+ struct request_queue *q = hctx->queue;
+ struct deadline_data *dd = q->elevator->elevator_data;
const unsigned long now = jiffies;
struct request *rq;
enum dd_prio prio;
+ LIST_HEAD(free);
/*
* If someone else is already dispatching, skip this one. This will
@@ -620,6 +656,7 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
return NULL;
spin_lock(&dd->lock);
+ dd_do_insert(q, &free);
rq = dd_dispatch_prio_aged_requests(dd, now);
if (rq)
goto unlock;
@@ -638,6 +675,7 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
clear_bit_unlock(DD_DISPATCHING, &dd->run_state);
spin_unlock(&dd->lock);
+ blk_mq_free_requests(&free);
return rq;
}
@@ -727,8 +765,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
eq->elevator_data = dd;
spin_lock_init(&dd->lock);
+ spin_lock_init(&dd->insert_lock);
spin_lock_init(&dd->zone_lock);
+ INIT_LIST_HEAD(&dd->at_head);
+ INIT_LIST_HEAD(&dd->at_tail);
+
for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
struct dd_per_prio *per_prio = &dd->per_prio[prio];
@@ -899,19 +941,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
{
struct request_queue *q = hctx->queue;
struct deadline_data *dd = q->elevator->elevator_data;
- LIST_HEAD(free);
-
- spin_lock(&dd->lock);
- while (!list_empty(list)) {
- struct request *rq;
- rq = list_first_entry(list, struct request, queuelist);
- list_del_init(&rq->queuelist);
- dd_insert_request(q, rq, flags, &free);
- }
- spin_unlock(&dd->lock);
-
- blk_mq_free_requests(&free);
+ spin_lock(&dd->insert_lock);
+ if (flags & BLK_MQ_INSERT_AT_HEAD)
+ list_splice_init(list, &dd->at_head);
+ else
+ list_splice_init(list, &dd->at_tail);
+ spin_unlock(&dd->insert_lock);
}
/* Callback from inside blk_mq_rq_ctx_init(). */
@@ -990,6 +1026,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
struct deadline_data *dd = hctx->queue->elevator->elevator_data;
enum dd_prio prio;
+ if (!list_empty_careful(&dd->at_head) ||
+ !list_empty_careful(&dd->at_tail))
+ return true;
+
for (prio = 0; prio <= DD_PRIO_MAX; prio++)
if (dd_has_work_for_prio(&dd->per_prio[prio]))
return true;
--
2.43.0
next prev parent reply other threads:[~2024-01-23 17:40 UTC|newest]
Thread overview: 30+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-01-23 17:34 [PATCHSET v3] mq-deadline and BFQ scalability improvements Jens Axboe
2024-01-23 17:34 ` [PATCH 1/8] block/mq-deadline: pass in queue directly to dd_insert_request() Jens Axboe
2024-01-24 9:21 ` Johannes Thumshirn
2024-01-23 17:34 ` [PATCH 2/8] block/mq-deadline: serialize request dispatching Jens Axboe
2024-01-23 18:36 ` Bart Van Assche
2024-01-23 19:13 ` Jens Axboe
2024-01-24 9:31 ` Christoph Hellwig
2024-01-24 15:00 ` Jens Axboe
2024-01-24 9:29 ` Johannes Thumshirn
2024-01-23 17:34 ` [PATCH 3/8] block/mq-deadline: skip expensive merge lookups if contended Jens Axboe
2024-01-24 9:31 ` Johannes Thumshirn
2024-01-24 9:32 ` Christoph Hellwig
2024-01-24 15:02 ` Jens Axboe
2024-01-23 17:34 ` Jens Axboe [this message]
2024-01-23 18:37 ` [PATCH 4/8] block/mq-deadline: use separate insertion lists Bart Van Assche
2024-01-24 9:42 ` Johannes Thumshirn
2024-01-23 17:34 ` [PATCH 5/8] block/bfq: pass in queue directly to bfq_insert_request() Jens Axboe
2024-01-23 18:38 ` Bart Van Assche
2024-01-24 9:46 ` Johannes Thumshirn
2024-01-23 17:34 ` [PATCH 6/8] block/bfq: serialize request dispatching Jens Axboe
2024-01-23 18:40 ` Bart Van Assche
2024-01-23 19:14 ` Jens Axboe
2024-01-23 17:34 ` [PATCH 7/8] block/bfq: skip expensive merge lookups if contended Jens Axboe
2024-01-23 18:44 ` Bart Van Assche
2024-01-23 19:14 ` Jens Axboe
2024-01-23 17:34 ` [PATCH 8/8] block/bfq: use separate insertion lists Jens Axboe
2024-01-23 18:47 ` Bart Van Assche
2024-01-23 19:18 ` Jens Axboe
2024-01-23 20:03 ` [PATCHSET v3] mq-deadline and BFQ scalability improvements Oleksandr Natalenko
2024-01-23 22:14 ` Jens Axboe
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20240123174021.1967461-5-axboe@kernel.dk \
--to=axboe@kernel.dk \
--cc=bvanassche@acm.org \
--cc=linux-block@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.