Linux block layer
 help / color / mirror / Atom feed
* [PATCH] block: Improve shared tag set performance
@ 2023-10-18 18:00 Bart Van Assche
  2023-10-20  4:41 ` Christoph Hellwig
                   ` (2 more replies)
  0 siblings, 3 replies; 15+ messages in thread
From: Bart Van Assche @ 2023-10-18 18:00 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Bart Van Assche, Christoph Hellwig,
	Martin K . Petersen, Ming Lei, Keith Busch, Damien Le Moal,
	Yu Kuai, Ed Tsai

Remove the code for fair tag sharing because it significantly hurts
performance for UFS devices. Removing this code is safe because the
legacy block layer worked fine without any equivalent fairness
algorithm.

This algorithm hurts performance for UFS devices because UFS devices
have multiple logical units. One of these logical units (WLUN) is used
to submit control commands, e.g. START STOP UNIT. If any request is
submitted to the WLUN, the queue depth is reduced from 31 to 15 or
lower for data LUNs.

See also https://lore.kernel.org/linux-scsi/20221229030645.11558-1-ed.tsai@mediatek.com/

Note: it has been attempted to rework this algorithm. See also "[PATCH
RFC 0/7] blk-mq: improve tag fair sharing"
(https://lore.kernel.org/linux-block/20230618160738.54385-1-yukuai1@huaweicloud.com/).
Given the complexity of that patch series, I do not expect that patch
series to be merged.

Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Yu Kuai <yukuai1@huaweicloud.com>
Cc: Ed Tsai <ed.tsai@mediatek.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-mq-tag.c |  4 ----
 block/blk-mq.c     |  3 ---
 block/blk-mq.h     | 39 ---------------------------------------
 3 files changed, 46 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index cc57e2dd9a0b..25334bfcabf8 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -105,10 +105,6 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
 			    struct sbitmap_queue *bt)
 {
-	if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
-			!hctx_may_queue(data->hctx, bt))
-		return BLK_MQ_NO_TAG;
-
 	if (data->shallow_depth)
 		return sbitmap_queue_get_shallow(bt, data->shallow_depth);
 	else
diff --git a/block/blk-mq.c b/block/blk-mq.c
index e2d11183f62e..502dafa76716 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1760,9 +1760,6 @@ bool __blk_mq_alloc_driver_tag(struct request *rq)
 	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
 		bt = &rq->mq_hctx->tags->breserved_tags;
 		tag_offset = 0;
-	} else {
-		if (!hctx_may_queue(rq->mq_hctx, bt))
-			return false;
 	}
 
 	tag = __sbitmap_queue_get(bt);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index f75a9ecfebde..14a22f6d3fdf 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -407,45 +407,6 @@ static inline void blk_mq_free_requests(struct list_head *list)
 	}
 }
 
-/*
- * For shared tag users, we track the number of currently active users
- * and attempt to provide a fair share of the tag depth for each of them.
- */
-static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
-				  struct sbitmap_queue *bt)
-{
-	unsigned int depth, users;
-
-	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
-		return true;
-
-	/*
-	 * Don't try dividing an ant
-	 */
-	if (bt->sb.depth == 1)
-		return true;
-
-	if (blk_mq_is_shared_tags(hctx->flags)) {
-		struct request_queue *q = hctx->queue;
-
-		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
-			return true;
-	} else {
-		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-			return true;
-	}
-
-	users = READ_ONCE(hctx->tags->active_queues);
-	if (!users)
-		return true;
-
-	/*
-	 * Allow at least some tags
-	 */
-	depth = max((bt->sb.depth + users - 1) / users, 4U);
-	return __blk_mq_active_requests(hctx) < depth;
-}
-
 /* run the code block in @dispatch_ops with rcu/srcu read lock held */
 #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops)	\
 do {								\

^ permalink raw reply related	[flat|nested] 15+ messages in thread
* [PATCH] block: Improve shared tag set performance
@ 2023-01-02 17:39 Bart Van Assche
  0 siblings, 0 replies; 15+ messages in thread
From: Bart Van Assche @ 2023-01-02 17:39 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Bart Van Assche, Christoph Hellwig,
	Martin K . Petersen, Ming Lei, Keith Busch, Damien Le Moal,
	Ed Tsai

Remove the code for fair tag sharing because it significantly hurts
performance for UFS devices. Removing this code is safe because the
legacy block layer worked fine without any equivalent fairness
algorithm.

This algorithm hurts performance for UFS devices because UFS devices
have multiple logical units. One of these logical units (WLUN) is used
to submit control commands, e.g. START STOP UNIT. If any request is
submitted to the WLUN, the queue depth is reduced from 31 to 15 or
lower for data LUNs.

See also https://lore.kernel.org/linux-scsi/20221229030645.11558-1-ed.tsai@mediatek.com/

Cc: Christoph Hellwig <hch@lst.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Ming Lei <ming.lei@redhat.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Damien Le Moal <damien.lemoal@opensource.wdc.com>
Cc: Ed Tsai <ed.tsai@mediatek.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/blk-mq-debugfs.c |  2 --
 block/blk-mq-tag.c     |  8 --------
 block/blk-mq.c         |  3 ---
 block/blk-mq.h         | 40 ----------------------------------------
 4 files changed, 53 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index bd942341b638..00af8d2c8816 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -426,8 +426,6 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
 {
 	seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
 	seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
-	seq_printf(m, "active_queues=%d\n",
-		   atomic_read(&tags->active_queues));
 
 	seq_puts(m, "\nbitmap_tags:\n");
 	sbitmap_queue_show(&tags->bitmap_tags, m);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 9eb968e14d31..e9243cae41c7 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -53,8 +53,6 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 		set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state);
 	}
 
-	users = atomic_inc_return(&hctx->tags->active_queues);
-
 	blk_mq_update_wake_batch(hctx->tags, users);
 }
 
@@ -88,8 +86,6 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 			return;
 	}
 
-	users = atomic_dec_return(&tags->active_queues);
-
 	blk_mq_update_wake_batch(tags, users);
 
 	blk_mq_tag_wakeup_all(tags, false);
@@ -98,10 +94,6 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
 			    struct sbitmap_queue *bt)
 {
-	if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
-			!hctx_may_queue(data->hctx, bt))
-		return BLK_MQ_NO_TAG;
-
 	if (data->shallow_depth)
 		return sbitmap_queue_get_shallow(bt, data->shallow_depth);
 	else
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 4a7805390cf8..c570f134850a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1698,9 +1698,6 @@ static bool __blk_mq_alloc_driver_tag(struct request *rq)
 	if (blk_mq_tag_is_reserved(rq->mq_hctx->sched_tags, rq->internal_tag)) {
 		bt = &rq->mq_hctx->tags->breserved_tags;
 		tag_offset = 0;
-	} else {
-		if (!hctx_may_queue(rq->mq_hctx, bt))
-			return false;
 	}
 
 	tag = __sbitmap_queue_get(bt);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 0b2870839cdd..908f830931f0 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -334,46 +334,6 @@ static inline void blk_mq_free_requests(struct list_head *list)
 	}
 }
 
-/*
- * For shared tag users, we track the number of currently active users
- * and attempt to provide a fair share of the tag depth for each of them.
- */
-static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
-				  struct sbitmap_queue *bt)
-{
-	unsigned int depth, users;
-
-	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
-		return true;
-
-	/*
-	 * Don't try dividing an ant
-	 */
-	if (bt->sb.depth == 1)
-		return true;
-
-	if (blk_mq_is_shared_tags(hctx->flags)) {
-		struct request_queue *q = hctx->queue;
-
-		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
-			return true;
-	} else {
-		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-			return true;
-	}
-
-	users = atomic_read(&hctx->tags->active_queues);
-
-	if (!users)
-		return true;
-
-	/*
-	 * Allow at least some tags
-	 */
-	depth = max((bt->sb.depth + users - 1) / users, 4U);
-	return __blk_mq_active_requests(hctx) < depth;
-}
-
 /* run the code block in @dispatch_ops with rcu/srcu read lock held */
 #define __blk_mq_run_dispatch_ops(q, check_sleep, dispatch_ops)	\
 do {								\

^ permalink raw reply related	[flat|nested] 15+ messages in thread

end of thread, other threads:[~2023-10-23  3:45 UTC | newest]

Thread overview: 15+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-10-18 18:00 [PATCH] block: Improve shared tag set performance Bart Van Assche
2023-10-20  4:41 ` Christoph Hellwig
2023-10-20 16:17   ` Bart Van Assche
2023-10-20 16:25     ` Keith Busch
2023-10-20 16:45       ` Bart Van Assche
2023-10-20 17:09         ` Keith Busch
2023-10-20 17:54           ` Bart Van Assche
2023-10-21  1:31             ` Ming Lei
2023-10-21 16:13               ` Bart Van Assche
2023-10-23  3:44                 ` Ming Lei
2023-10-20 19:11 ` Bart Van Assche
2023-10-21  7:32 ` Yu Kuai
2023-10-21 16:21   ` Bart Van Assche
2023-10-23  1:11     ` Yu Kuai
  -- strict thread matches above, loose matches on Subject: below --
2023-01-02 17:39 Bart Van Assche

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox