[PATCH v2 3/4] block/mq-deadline: Stop using per-CPU counters

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	Jaegeuk Kim <jaegeuk@kernel.org>,
	Bart Van Assche <bvanassche@acm.org>,
	Damien Le Moal <damien.lemoal@wdc.com>,
	Niklas Cassel <Niklas.Cassel@wdc.com>,
	Hannes Reinecke <hare@suse.de>
Subject: [PATCH v2 3/4] block/mq-deadline: Stop using per-CPU counters
Date: Mon, 27 Sep 2021 15:03:27 -0700	[thread overview]
Message-ID: <20210927220328.1410161-4-bvanassche@acm.org> (raw)
In-Reply-To: <20210927220328.1410161-1-bvanassche@acm.org>

Calculating the sum over all CPUs of per-CPU counters frequently is
inefficient. Hence switch from per-CPU to individual counters. Three
counters are protected by the mq-deadline spinlock since these are
only accessed from contexts that already hold that spinlock. The fourth
counter is atomic because protecting it with the mq-deadline spinlock
would trigger lock contention.

Reviewed-by: Damien Le Moal <damien.lemoal@wdc.com>
Reviewed-by: Niklas Cassel <Niklas.Cassel@wdc.com>
Cc: Hannes Reinecke <hare@suse.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c | 124 ++++++++++++++++++++------------------------
 1 file changed, 56 insertions(+), 68 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 2586b3f8c7e9..b262f40f32c0 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -51,17 +51,16 @@ enum dd_prio {
 
 enum { DD_PRIO_COUNT = 3 };
 
-/* I/O statistics per I/O priority. */
+/*
+ * I/O statistics per I/O priority. It is fine if these counters overflow.
+ * What matters is that these counters are at least as wide as
+ * log2(max_outstanding_requests).
+ */
 struct io_stats_per_prio {
-	local_t inserted;
-	local_t merged;
-	local_t dispatched;
-	local_t completed;
-};
-
-/* I/O statistics for all I/O priorities (enum dd_prio). */
-struct io_stats {
-	struct io_stats_per_prio stats[DD_PRIO_COUNT];
+	uint32_t inserted;
+	uint32_t merged;
+	uint32_t dispatched;
+	atomic_t completed;
 };
 
 /*
@@ -74,6 +73,7 @@ struct dd_per_prio {
 	struct list_head fifo_list[DD_DIR_COUNT];
 	/* Next request in FIFO order. Read, write or both are NULL. */
 	struct request *next_rq[DD_DIR_COUNT];
+	struct io_stats_per_prio stats;
 };
 
 struct deadline_data {
@@ -88,8 +88,6 @@ struct deadline_data {
 	unsigned int batching;		/* number of sequential requests made */
 	unsigned int starved;		/* times reads have starved writes */
 
-	struct io_stats __percpu *stats;
-
 	/*
 	 * settings that change how the i/o scheduler behaves
 	 */
@@ -103,33 +101,6 @@ struct deadline_data {
 	spinlock_t zone_lock;
 };
 
-/* Count one event of type 'event_type' and with I/O priority 'prio' */
-#define dd_count(dd, event_type, prio) do {				\
-	struct io_stats *io_stats = get_cpu_ptr((dd)->stats);		\
-									\
-	BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));	\
-	BUILD_BUG_ON(!__same_type((prio), enum dd_prio));		\
-	local_inc(&io_stats->stats[(prio)].event_type);			\
-	put_cpu_ptr(io_stats);						\
-} while (0)
-
-/*
- * Returns the total number of dd_count(dd, event_type, prio) calls across all
- * CPUs. No locking or barriers since it is fine if the returned sum is slightly
- * outdated.
- */
-#define dd_sum(dd, event_type, prio) ({					\
-	unsigned int cpu;						\
-	u32 sum = 0;							\
-									\
-	BUILD_BUG_ON(!__same_type((dd), struct deadline_data *));	\
-	BUILD_BUG_ON(!__same_type((prio), enum dd_prio));		\
-	for_each_present_cpu(cpu)					\
-		sum += local_read(&per_cpu_ptr((dd)->stats, cpu)->	\
-				  stats[(prio)].event_type);		\
-	sum;								\
-})
-
 /* Maps an I/O priority class to a deadline scheduler priority. */
 static const enum dd_prio ioprio_class_to_prio[] = {
 	[IOPRIO_CLASS_NONE]	= DD_BE_PRIO,
@@ -233,7 +204,9 @@ static void dd_merged_requests(struct request_queue *q, struct request *req,
 	const u8 ioprio_class = dd_rq_ioclass(next);
 	const enum dd_prio prio = ioprio_class_to_prio[ioprio_class];
 
-	dd_count(dd, merged, prio);
+	lockdep_assert_held(&dd->lock);
+
+	dd->per_prio[prio].stats.merged++;
 
 	/*
 	 * if next expires before rq, assign its expire time to rq
@@ -273,7 +246,11 @@ deadline_move_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 /* Number of requests queued for a given priority level. */
 static u32 dd_queued(struct deadline_data *dd, enum dd_prio prio)
 {
-	return dd_sum(dd, inserted, prio) - dd_sum(dd, completed, prio);
+	const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
+
+	lockdep_assert_held(&dd->lock);
+
+	return stats->inserted - atomic_read(&stats->completed);
 }
 
 /*
@@ -463,7 +440,7 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 done:
 	ioprio_class = dd_rq_ioclass(rq);
 	prio = ioprio_class_to_prio[ioprio_class];
-	dd_count(dd, dispatched, prio);
+	dd->per_prio[prio].stats.dispatched++;
 	/*
 	 * If the request needs its target zone locked, do it.
 	 */
@@ -542,19 +519,22 @@ static void dd_exit_sched(struct elevator_queue *e)
 
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
+		const struct io_stats_per_prio *stats = &per_prio->stats;
+		uint32_t queued;
 
 		WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_READ]));
 		WARN_ON_ONCE(!list_empty(&per_prio->fifo_list[DD_WRITE]));
-		WARN_ONCE(dd_queued(dd, prio) != 0,
+
+		spin_lock(&dd->lock);
+		queued = dd_queued(dd, prio);
+		spin_unlock(&dd->lock);
+
+		WARN_ONCE(queued != 0,
 			  "statistics for priority %d: i %u m %u d %u c %u\n",
-			  prio, dd_sum(dd, inserted, prio),
-			  dd_sum(dd, merged, prio),
-			  dd_sum(dd, dispatched, prio),
-			  dd_sum(dd, completed, prio));
+			  prio, stats->inserted, stats->merged,
+			  stats->dispatched, atomic_read(&stats->completed));
 	}
 
-	free_percpu(dd->stats);
-
 	kfree(dd);
 }
 
@@ -578,11 +558,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 
 	eq->elevator_data = dd;
 
-	dd->stats = alloc_percpu_gfp(typeof(*dd->stats),
-				     GFP_KERNEL | __GFP_ZERO);
-	if (!dd->stats)
-		goto free_dd;
-
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
@@ -604,9 +579,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	q->elevator = eq;
 	return 0;
 
-free_dd:
-	kfree(dd);
-
 put_eq:
 	kobject_put(&eq->kobj);
 	return ret;
@@ -689,8 +661,9 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	blk_req_zone_write_unlock(rq);
 
 	prio = ioprio_class_to_prio[ioprio_class];
+	per_prio = &dd->per_prio[prio];
 	if (!rq->elv.priv[0]) {
-		dd_count(dd, inserted, prio);
+		per_prio->stats.inserted++;
 		rq->elv.priv[0] = (void *)(uintptr_t)1;
 	}
 
@@ -701,7 +674,6 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
 	trace_block_rq_insert(rq);
 
-	per_prio = &dd->per_prio[prio];
 	if (at_head) {
 		list_add(&rq->queuelist, &per_prio->dispatch);
 	} else {
@@ -779,7 +751,7 @@ static void dd_finish_request(struct request *rq)
 	if (!rq->elv.priv[0])
 		return;
 
-	dd_count(dd, completed, prio);
+	atomic_inc(&per_prio->stats.completed);
 
 	if (blk_queue_is_zoned(q)) {
 		unsigned long flags;
@@ -966,28 +938,44 @@ static int dd_queued_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
 	struct deadline_data *dd = q->elevator->elevator_data;
+	u32 rt, be, idle;
+
+	spin_lock(&dd->lock);
+	rt = dd_queued(dd, DD_RT_PRIO);
+	be = dd_queued(dd, DD_BE_PRIO);
+	idle = dd_queued(dd, DD_IDLE_PRIO);
+	spin_unlock(&dd->lock);
+
+	seq_printf(m, "%u %u %u\n", rt, be, idle);
 
-	seq_printf(m, "%u %u %u\n", dd_queued(dd, DD_RT_PRIO),
-		   dd_queued(dd, DD_BE_PRIO),
-		   dd_queued(dd, DD_IDLE_PRIO));
 	return 0;
 }
 
 /* Number of requests owned by the block driver for a given priority. */
 static u32 dd_owned_by_driver(struct deadline_data *dd, enum dd_prio prio)
 {
-	return dd_sum(dd, dispatched, prio) + dd_sum(dd, merged, prio)
-		- dd_sum(dd, completed, prio);
+	const struct io_stats_per_prio *stats = &dd->per_prio[prio].stats;
+
+	lockdep_assert_held(&dd->lock);
+
+	return stats->dispatched + stats->merged -
+		atomic_read(&stats->completed);
 }
 
 static int dd_owned_by_driver_show(void *data, struct seq_file *m)
 {
 	struct request_queue *q = data;
 	struct deadline_data *dd = q->elevator->elevator_data;
+	u32 rt, be, idle;
+
+	spin_lock(&dd->lock);
+	rt = dd_owned_by_driver(dd, DD_RT_PRIO);
+	be = dd_owned_by_driver(dd, DD_BE_PRIO);
+	idle = dd_owned_by_driver(dd, DD_IDLE_PRIO);
+	spin_unlock(&dd->lock);
+
+	seq_printf(m, "%u %u %u\n", rt, be, idle);
 
-	seq_printf(m, "%u %u %u\n", dd_owned_by_driver(dd, DD_RT_PRIO),
-		   dd_owned_by_driver(dd, DD_BE_PRIO),
-		   dd_owned_by_driver(dd, DD_IDLE_PRIO));
 	return 0;
 }

next prev parent reply	other threads:[~2021-09-27 22:03 UTC|newest]

Thread overview: 21+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-09-27 22:03 [PATCH v2 0/4] Restore I/O priority support in the mq-deadline scheduler Bart Van Assche
2021-09-27 22:03 ` [PATCH v2 1/4] block/mq-deadline: Improve request accounting further Bart Van Assche
2021-09-28  5:45   ` Hannes Reinecke
2021-09-27 22:03 ` [PATCH v2 2/4] block/mq-deadline: Add an invariant check Bart Van Assche
2021-09-28  5:47   ` Hannes Reinecke
2021-09-27 22:03 ` Bart Van Assche [this message]
2021-09-28  5:51   ` [PATCH v2 3/4] block/mq-deadline: Stop using per-CPU counters Hannes Reinecke
2021-09-28 17:35     ` Bart Van Assche
2021-09-27 22:03 ` [PATCH v2 4/4] block/mq-deadline: Prioritize high-priority requests Bart Van Assche
2021-09-27 22:53   ` Damien Le Moal
2021-09-27 23:12     ` Bart Van Assche
2021-09-28  5:53   ` Hannes Reinecke
2021-09-28 10:36   ` Niklas Cassel
2022-03-25 12:33   ` Oleksij Rempel
2022-03-25 13:07     ` Oleksij Rempel
2022-03-25 17:05     ` Bart Van Assche
2022-03-26  8:57       ` Oleksij Rempel
2022-03-28  2:20         ` Bart Van Assche
2022-03-28 22:24           ` Bart Van Assche
2022-03-29  4:49           ` Bart Van Assche
2021-09-27 23:14 ` [PATCH v2 0/4] Restore I/O priority support in the mq-deadline scheduler Jens Axboe

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:2586b3f8c7e dfblob:b262f40f32c )
 OR (
bs:"[PATCH v2 3/4] block/mq-deadline: Stop using per-CPU counters" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20210927220328.1410161-4-bvanassche@acm.org \
    --to=bvanassche@acm.org \
    --cc=Niklas.Cassel@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=damien.lemoal@wdc.com \
    --cc=hare@suse.de \
    --cc=hch@lst.de \
    --cc=jaegeuk@kernel.org \
    --cc=linux-block@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.