linux-block.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, Christoph Hellwig <hch@lst.de>,
	Damien Le Moal <damien.lemoal@wdc.com>,
	"Martin K . Petersen" <martin.petersen@oracle.com>,
	Khazhy Kumykov <khazhy@google.com>,
	Jaegeuk Kim <jaegeuk@kernel.org>,
	Bart Van Assche <bvanassche@acm.org>
Subject: [PATCH 5/5] block/mq-deadline: Remove zone locking
Date: Tue, 14 Jun 2022 10:49:43 -0700	[thread overview]
Message-ID: <20220614174943.611369-6-bvanassche@acm.org> (raw)
In-Reply-To: <20220614174943.611369-1-bvanassche@acm.org>

Measurements have shown that limiting the queue depth to one has a
significant negative performance impact on zoned UFS devices. Hence this
patch that removes zone locking from the mq-deadline scheduler. This
patch is based on the following assumptions:
- Applications submit write requests to sequential write required zones
  in order.
- If such write requests get reordered by the software or hardware queue
  mechanism, nr_hw_queues * nr_requests - 1 retries are sufficient to
  reorder the write requests.
- It happens infrequently that zoned write requests are reordered by the
  block layer.
- Either no I/O scheduler is used or an I/O scheduler is used that
  submits write requests per zone in LBA order.

DD_BE_PRIO is selected for sequential writes to preserve the LBA order.

See also commit 5700f69178e9 ("mq-deadline: Introduce zone locking
support").

Cc: Damien Le Moal <damien.lemoal@wdc.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c | 74 ++++-----------------------------------------
 1 file changed, 6 insertions(+), 68 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 6ed602b2f80a..e168fc9a980a 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -104,7 +104,6 @@ struct deadline_data {
 	int prio_aging_expire;
 
 	spinlock_t lock;
-	spinlock_t zone_lock;
 };
 
 /* Maps an I/O priority class to a deadline scheduler priority. */
@@ -285,30 +284,10 @@ static struct request *
 deadline_fifo_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		      enum dd_data_dir data_dir)
 {
-	struct request *rq;
-	unsigned long flags;
-
 	if (list_empty(&per_prio->fifo_list[data_dir]))
 		return NULL;
 
-	rq = rq_entry_fifo(per_prio->fifo_list[data_dir].next);
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
-		return rq;
-
-	/*
-	 * Look for a write request that can be dispatched, that is one with
-	 * an unlocked target zone.
-	 */
-	spin_lock_irqsave(&dd->zone_lock, flags);
-	list_for_each_entry(rq, &per_prio->fifo_list[DD_WRITE], queuelist) {
-		if (blk_req_can_dispatch_to_zone(rq))
-			goto out;
-	}
-	rq = NULL;
-out:
-	spin_unlock_irqrestore(&dd->zone_lock, flags);
-
-	return rq;
+	return rq_entry_fifo(per_prio->fifo_list[data_dir].next);
 }
 
 /*
@@ -319,29 +298,7 @@ static struct request *
 deadline_next_request(struct deadline_data *dd, struct dd_per_prio *per_prio,
 		      enum dd_data_dir data_dir)
 {
-	struct request *rq;
-	unsigned long flags;
-
-	rq = per_prio->next_rq[data_dir];
-	if (!rq)
-		return NULL;
-
-	if (data_dir == DD_READ || !blk_queue_is_zoned(rq->q))
-		return rq;
-
-	/*
-	 * Look for a write request that can be dispatched, that is one with
-	 * an unlocked target zone.
-	 */
-	spin_lock_irqsave(&dd->zone_lock, flags);
-	while (rq) {
-		if (blk_req_can_dispatch_to_zone(rq))
-			break;
-		rq = deadline_latter_request(rq);
-	}
-	spin_unlock_irqrestore(&dd->zone_lock, flags);
-
-	return rq;
+	return per_prio->next_rq[data_dir];
 }
 
 /*
@@ -467,10 +424,6 @@ static struct request *__dd_dispatch_request(struct deadline_data *dd,
 	ioprio_class = dd_rq_ioclass(rq);
 	prio = ioprio_class_to_prio[ioprio_class];
 	dd->per_prio[prio].stats.dispatched++;
-	/*
-	 * If the request needs its target zone locked, do it.
-	 */
-	blk_req_zone_write_lock(rq);
 	rq->rq_flags |= RQF_STARTED;
 	return rq;
 }
@@ -640,7 +593,6 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	dd->fifo_batch = fifo_batch;
 	dd->prio_aging_expire = prio_aging_expire;
 	spin_lock_init(&dd->lock);
-	spin_lock_init(&dd->zone_lock);
 
 	q->elevator = eq;
 	return 0;
@@ -716,17 +668,13 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	u8 ioprio_class = IOPRIO_PRIO_CLASS(ioprio);
 	struct dd_per_prio *per_prio;
 	enum dd_prio prio;
+	bool seq_write = blk_rq_is_seq_write(rq);
 	LIST_HEAD(free);
 
 	lockdep_assert_held(&dd->lock);
 
-	/*
-	 * This may be a requeue of a write request that has locked its
-	 * target zone. If it is the case, this releases the zone lock.
-	 */
-	blk_req_zone_write_unlock(rq);
-
-	prio = ioprio_class_to_prio[ioprio_class];
+	prio = seq_write ? DD_BE_PRIO :
+		ioprio_class_to_prio[ioprio_class];
 	per_prio = &dd->per_prio[prio];
 	if (!rq->elv.priv[0]) {
 		per_prio->stats.inserted++;
@@ -740,7 +688,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
 	trace_block_rq_insert(rq);
 
-	if (at_head) {
+	if (at_head && !seq_write) {
 		list_add(&rq->queuelist, &per_prio->dispatch);
 		rq->fifo_time = jiffies;
 	} else {
@@ -819,16 +767,6 @@ static void dd_finish_request(struct request *rq)
 		return;
 
 	atomic_inc(&per_prio->stats.completed);
-
-	if (blk_queue_is_zoned(q)) {
-		unsigned long flags;
-
-		spin_lock_irqsave(&dd->zone_lock, flags);
-		blk_req_zone_write_unlock(rq);
-		if (!list_empty(&per_prio->fifo_list[DD_WRITE]))
-			blk_mq_sched_mark_restart_hctx(rq->mq_hctx);
-		spin_unlock_irqrestore(&dd->zone_lock, flags);
-	}
 }
 
 static bool dd_has_work_for_prio(struct dd_per_prio *per_prio)

  parent reply	other threads:[~2022-06-14 17:50 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-06-14 17:49 [PATCH 0/5] Improve zoned storage write performance Bart Van Assche
2022-06-14 17:49 ` [PATCH 1/5] block: Introduce the blk_rq_is_seq_write() function Bart Van Assche
2022-06-16 20:41   ` Jens Axboe
2022-06-16 21:16     ` Bart Van Assche
2022-06-14 17:49 ` [PATCH 2/5] scsi: Retry unaligned zoned writes Bart Van Assche
2022-06-14 21:47   ` Khazhy Kumykov
2022-06-14 22:39     ` Bart Van Assche
2022-06-14 23:50       ` Damien Le Moal
2022-06-14 23:54         ` Bart Van Assche
2022-06-15  0:55           ` Damien Le Moal
2022-06-14 23:29   ` Damien Le Moal
2022-06-14 23:56     ` Bart Van Assche
2022-06-15  1:09       ` Damien Le Moal
2022-06-15  5:49       ` Christoph Hellwig
2022-06-15  7:21         ` Damien Le Moal
2022-06-15 19:38           ` Bart Van Assche
2022-06-16  0:14             ` Damien Le Moal
2022-06-15 19:42         ` Bart Van Assche
2022-06-16 18:36         ` Bart Van Assche
2022-06-14 17:49 ` [PATCH 3/5] nvme: Make the number of retries request specific Bart Van Assche
2022-06-14 17:49 ` [PATCH 4/5] nvme: Increase the number of retries for zoned writes Bart Van Assche
2022-06-14 18:03   ` Keith Busch
2022-06-14 17:49 ` Bart Van Assche [this message]
2022-06-14 23:40   ` [PATCH 5/5] block/mq-deadline: Remove zone locking Damien Le Moal

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220614174943.611369-6-bvanassche@acm.org \
    --to=bvanassche@acm.org \
    --cc=axboe@kernel.dk \
    --cc=damien.lemoal@wdc.com \
    --cc=hch@lst.de \
    --cc=jaegeuk@kernel.org \
    --cc=khazhy@google.com \
    --cc=linux-block@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).