public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
From: Jens Axboe <axboe@kernel.dk>
To: linux-block@vger.kernel.org
Cc: bvanassche@acm.org, Jens Axboe <axboe@kernel.dk>
Subject: [PATCH 2/2] block/mq-deadline: fallback to per-cpu insertion buckets under contention
Date: Thu, 18 Jan 2024 11:04:57 -0700	[thread overview]
Message-ID: <20240118180541.930783-3-axboe@kernel.dk> (raw)
In-Reply-To: <20240118180541.930783-1-axboe@kernel.dk>

If we attempt to insert a list of requests but someone else is already
running an insertion, then fallback to queueing it internally and let
the existing inserter finish the operation.

Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 block/mq-deadline.c | 118 +++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 110 insertions(+), 8 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 9e0ab3ea728a..eeeaaff189e1 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -81,8 +81,17 @@ struct dd_per_prio {
 
 enum {
 	DD_DISPATCHING	= 0,
+	DD_INSERTING	= 1,
 };
 
+#define DD_CPU_BUCKETS		32
+#define DD_CPU_BUCKETS_MASK	(DD_CPU_BUCKETS - 1)
+
+struct dd_bucket_list {
+	struct list_head list;
+	spinlock_t lock;
+} ____cacheline_aligned_in_smp;
+
 struct deadline_data {
 	/*
 	 * run time data
@@ -94,6 +103,9 @@ struct deadline_data {
 
 	unsigned long run_state;
 
+	atomic_t insert_seq;
+	struct dd_bucket_list bucket_lists[DD_CPU_BUCKETS];
+
 	struct dd_per_prio per_prio[DD_PRIO_COUNT];
 
 	/* Data direction of latest dispatched request. */
@@ -711,7 +723,7 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	struct deadline_data *dd;
 	struct elevator_queue *eq;
 	enum dd_prio prio;
-	int ret = -ENOMEM;
+	int i, ret = -ENOMEM;
 
 	eq = elevator_alloc(q, e);
 	if (!eq)
@@ -725,6 +737,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 
 	spin_lock_init(&dd->lock);
 	spin_lock_init(&dd->zone_lock);
+	atomic_set(&dd->insert_seq, 0);
+
+	for (i = 0; i < DD_CPU_BUCKETS; i++) {
+		INIT_LIST_HEAD(&dd->bucket_lists[i].list);
+		spin_lock_init(&dd->bucket_lists[i].lock);
+	}
 
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
@@ -876,6 +894,67 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	}
 }
 
+static void dd_dispatch_from_buckets(struct deadline_data *dd,
+				     struct list_head *list)
+{
+	int i;
+
+	for (i = 0; i < DD_CPU_BUCKETS; i++) {
+		struct dd_bucket_list *bucket = &dd->bucket_lists[i];
+
+		if (list_empty_careful(&bucket->list))
+			continue;
+		spin_lock(&bucket->lock);
+		list_splice_init(&bucket->list, list);
+		spin_unlock(&bucket->lock);
+	}
+}
+
+/*
+ * If we can grab the dd->lock, then just return and do the insertion as per
+ * usual. If not, add to one of our internal buckets, and afterwards recheck
+ * if if we should retry.
+ */
+static bool dd_insert_to_bucket(struct deadline_data *dd,
+				struct list_head *list, int *seq)
+	__acquires(&dd->lock)
+{
+	struct dd_bucket_list *bucket;
+	int next_seq;
+
+	*seq = atomic_read(&dd->insert_seq);
+
+	if (spin_trylock(&dd->lock))
+		return false;
+	if (!test_bit(DD_INSERTING, &dd->run_state)) {
+		spin_lock(&dd->lock);
+		return false;
+	}
+
+	*seq = atomic_inc_return(&dd->insert_seq);
+
+	bucket = &dd->bucket_lists[get_cpu() & DD_CPU_BUCKETS_MASK];
+	spin_lock(&bucket->lock);
+	list_splice_init(list, &bucket->list);
+	spin_unlock(&bucket->lock);
+	put_cpu();
+
+	/*
+	 * If seq still matches, we should be safe to just exit with the
+	 * pending requests queued in a bucket.
+	 */
+	next_seq = atomic_inc_return(&dd->insert_seq);
+	if (next_seq == *seq + 1)
+		return true;
+
+	/*
+	 * Seq changed, be safe and grab the lock and insert. Don't update
+	 * sequence, so that we flusht the buckets too.
+	 */
+	spin_lock(&dd->lock);
+	return false;
+}
+
 /*
  * Called from blk_mq_insert_request() or blk_mq_dispatch_plug_list().
  */
@@ -886,16 +965,39 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
 	struct request_queue *q = hctx->queue;
 	struct deadline_data *dd = q->elevator->elevator_data;
 	LIST_HEAD(free);
+	int seq;
 
-	spin_lock(&dd->lock);
-	while (!list_empty(list)) {
-		struct request *rq;
+	/*
+	 * If dispatch is busy and we ended up adding to our internal bucket,
+	 * then we're done for now.
+	 */
+	if (dd_insert_to_bucket(dd, list, &seq))
+		return;
+
+	set_bit(DD_INSERTING, &dd->run_state);
+	do {
+		int next_seq;
+
+		while (!list_empty(list)) {
+			struct request *rq;
+
+			rq = list_first_entry(list, struct request, queuelist);
+			list_del_init(&rq->queuelist);
+			dd_insert_request(hctx, rq, flags, &free);
+		}
+
+		/*
+		 * If sequence changed, flush internal buckets
+		 */
+		next_seq = atomic_inc_return(&dd->insert_seq);
+		if (next_seq == seq + 1)
+			break;
+		seq = next_seq;
+		dd_dispatch_from_buckets(dd, list);
+	} while (1);
 
-		rq = list_first_entry(list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		dd_insert_request(hctx, rq, flags, &free);
-	}
 	spin_unlock(&dd->lock);
+	clear_bit(DD_INSERTING, &dd->run_state);
 
 	blk_mq_free_requests(&free);
 }
-- 
2.43.0


  parent reply	other threads:[~2024-01-18 18:05 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-01-18 18:04 [PATCHSET RFC 0/2] mq-deadline scalability improvements Jens Axboe
2024-01-18 18:04 ` [PATCH 1/2] block/mq-deadline: serialize request dispatching Jens Axboe
2024-01-18 18:24   ` Bart Van Assche
2024-01-18 18:45     ` Jens Axboe
2024-01-18 18:51       ` Bart Van Assche
2024-01-18 18:55         ` Jens Axboe
2024-01-19  2:40   ` Ming Lei
2024-01-19 15:49     ` Jens Axboe
2024-01-18 18:04 ` Jens Axboe [this message]
2024-01-18 18:25   ` [PATCH 2/2] block/mq-deadline: fallback to per-cpu insertion buckets under contention Keith Busch
2024-01-18 18:28     ` Jens Axboe
2024-01-18 18:31   ` Bart Van Assche
2024-01-18 18:33     ` Jens Axboe
2024-01-18 18:53       ` Bart Van Assche
2024-01-18 18:56         ` Jens Axboe
2024-01-18 20:46           ` Bart Van Assche
2024-01-18 20:52             ` Jens Axboe
2024-01-19 23:11               ` Bart Van Assche
2024-01-18 19:29 ` [PATCHSET RFC 0/2] mq-deadline scalability improvements Jens Axboe
2024-01-18 20:22   ` Jens Axboe

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20240118180541.930783-3-axboe@kernel.dk \
    --to=axboe@kernel.dk \
    --cc=bvanassche@acm.org \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox