public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH RFC 0/7] blk-mq: improve tag fair sharing
@ 2023-06-18 16:07 Yu Kuai
  2023-06-18 16:07 ` [PATCH RFC 1/7] blk-mq: factor out a structure from blk_mq_tags to control tag sharing Yu Kuai
                   ` (7 more replies)
  0 siblings, 8 replies; 21+ messages in thread
From: Yu Kuai @ 2023-06-18 16:07 UTC (permalink / raw)
  To: bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

This is not a formal version and not fully tested, I send this RFC
because I want to make sure if people think doing this is meaningful,
before I spend too much time on this.

This patchset tries to refactor how tag is shared:
 - patch 2 delay tag sharing from issue io to when get driver tag faild;
 - patch 3 support to access which queues/hctxs is sharing tags through
 blk_mq_tags;
 - patch 4 move the caculation that how many tags is available from
 hctx_may_queue() to when queue/hctx start or stop to sharing tags.
 - patch 5 record new information that how many times fail to get driver
 tag recently; And patch 7 use this to adjust available tags for each
 shared queue.

Yu Kuai (7):
  blk-mq: factor out a structure from blk_mq_tags to control tag sharing
  blk-mq: delay tag fair sharing until fail to get driver tag
  blk-mq: support to track active queues from blk_mq_tags
  blk-mq: precalculate available tags for hctx_may_queue()
  blk-mq: record the number of times fail to get driver tag while
    sharing tags
  blk-mq: move active request counter to struct tag_sharing
  blk-mq: allow shared queue to get more driver tags

 block/blk-core.c       |   2 -
 block/blk-mq-debugfs.c |   6 +-
 block/blk-mq-tag.c     | 154 ++++++++++++++++++++++++++++++++++++++---
 block/blk-mq.c         |  10 ++-
 block/blk-mq.h         |  39 ++++++-----
 include/linux/blk-mq.h |  24 ++++---
 include/linux/blkdev.h |  13 +++-
 7 files changed, 201 insertions(+), 47 deletions(-)

-- 
2.39.2


^ permalink raw reply	[flat|nested] 21+ messages in thread

* [PATCH RFC 1/7] blk-mq: factor out a structure from blk_mq_tags to control tag sharing
  2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
@ 2023-06-18 16:07 ` Yu Kuai
  2023-07-06 17:43   ` Bart Van Assche
  2023-06-18 16:07 ` [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag Yu Kuai
                   ` (6 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Yu Kuai @ 2023-06-18 16:07 UTC (permalink / raw)
  To: bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

Currently tags is fair shared, and the new structure contains only one
field active_queues. There are no functional changes and prepare to
refactor tag sharing.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-mq-debugfs.c |  2 +-
 block/blk-mq-tag.c     |  8 ++++----
 block/blk-mq.h         |  2 +-
 include/linux/blk-mq.h | 10 ++++++++--
 4 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index c3b5930106b2..431aaa3eb181 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -401,7 +401,7 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
 	seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
 	seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
 	seq_printf(m, "active_queues=%d\n",
-		   READ_ONCE(tags->active_queues));
+		   READ_ONCE(tags->ctl.active_queues));
 
 	seq_puts(m, "\nbitmap_tags:\n");
 	sbitmap_queue_show(&tags->bitmap_tags, m);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index cc57e2dd9a0b..fe41a0d34fc0 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -57,8 +57,8 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 	}
 
 	spin_lock_irq(&tags->lock);
-	users = tags->active_queues + 1;
-	WRITE_ONCE(tags->active_queues, users);
+	users = tags->ctl.active_queues + 1;
+	WRITE_ONCE(tags->ctl.active_queues, users);
 	blk_mq_update_wake_batch(tags, users);
 	spin_unlock_irq(&tags->lock);
 }
@@ -94,8 +94,8 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 	}
 
 	spin_lock_irq(&tags->lock);
-	users = tags->active_queues - 1;
-	WRITE_ONCE(tags->active_queues, users);
+	users = tags->ctl.active_queues - 1;
+	WRITE_ONCE(tags->ctl.active_queues, users);
 	blk_mq_update_wake_batch(tags, users);
 	spin_unlock_irq(&tags->lock);
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 1743857e0b01..ca1c13127868 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -412,7 +412,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 			return true;
 	}
 
-	users = READ_ONCE(hctx->tags->active_queues);
+	users = READ_ONCE(hctx->tags->ctl.active_queues);
 	if (!users)
 		return true;
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f401067ac03a..8d2cd6b9d305 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -733,13 +733,16 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 		blk_opf_t opf, blk_mq_req_flags_t flags,
 		unsigned int hctx_idx);
 
+struct tag_sharing_ctl {
+	unsigned int active_queues;
+};
+
 /*
  * Tag address space map.
  */
 struct blk_mq_tags {
 	unsigned int nr_tags;
 	unsigned int nr_reserved_tags;
-	unsigned int active_queues;
 
 	struct sbitmap_queue bitmap_tags;
 	struct sbitmap_queue breserved_tags;
@@ -750,9 +753,12 @@ struct blk_mq_tags {
 
 	/*
 	 * used to clear request reference in rqs[] before freeing one
-	 * request pool
+	 * request pool, and to protect tag_sharing_ctl.
 	 */
 	spinlock_t lock;
+
+	/* used when tags is shared for multiple request_queue or hctx. */
+	struct tag_sharing_ctl ctl;
 };
 
 static inline struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags,
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag
  2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
  2023-06-18 16:07 ` [PATCH RFC 1/7] blk-mq: factor out a structure from blk_mq_tags to control tag sharing Yu Kuai
@ 2023-06-18 16:07 ` Yu Kuai
  2023-06-19  5:55   ` Hannes Reinecke
  2023-07-06 18:00   ` Bart Van Assche
  2023-06-18 16:07 ` [PATCH RFC 3/7] blk-mq: support to track active queues from blk_mq_tags Yu Kuai
                   ` (5 subsequent siblings)
  7 siblings, 2 replies; 21+ messages in thread
From: Yu Kuai @ 2023-06-18 16:07 UTC (permalink / raw)
  To: bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

Start tag fair sharing when a device start to issue io will waste
resources, same number of tags will be assigned to each disk/hctx,
and such tags can't be used for other disk/hctx, which means a disk/hctx
can't use more than assinged tags even if there are still lots of tags
that is assinged to other disks are unused.

Add a new api blk_mq_driver_tag_busy(), it will be called when get
driver tag failed, and move tag sharing from blk_mq_tag_busy() to
blk_mq_driver_tag_busy().

This approch will work well if total tags are not exhausted, and follow
up patches will try to refactor how tag is shared to handle this case.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-mq-debugfs.c |  4 ++-
 block/blk-mq-tag.c     | 60 ++++++++++++++++++++++++++++++++++--------
 block/blk-mq.c         |  4 ++-
 block/blk-mq.h         | 13 ++++++---
 include/linux/blk-mq.h |  6 +++--
 include/linux/blkdev.h |  1 +
 6 files changed, 70 insertions(+), 18 deletions(-)

diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
index 431aaa3eb181..de5a911b07c2 100644
--- a/block/blk-mq-debugfs.c
+++ b/block/blk-mq-debugfs.c
@@ -400,8 +400,10 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
 {
 	seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
 	seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
-	seq_printf(m, "active_queues=%d\n",
+	seq_printf(m, "active_queues=%u\n",
 		   READ_ONCE(tags->ctl.active_queues));
+	seq_printf(m, "share_queues=%u\n",
+		   READ_ONCE(tags->ctl.share_queues));
 
 	seq_puts(m, "\nbitmap_tags:\n");
 	sbitmap_queue_show(&tags->bitmap_tags, m);
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index fe41a0d34fc0..1c2bde917195 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -29,6 +29,32 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
 			users);
 }
 
+void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
+{
+	struct blk_mq_tags *tags = hctx->tags;
+
+	/*
+	 * calling test_bit() prior to test_and_set_bit() is intentional,
+	 * it avoids dirtying the cacheline if the queue is already active.
+	 */
+	if (blk_mq_is_shared_tags(hctx->flags)) {
+		struct request_queue *q = hctx->queue;
+
+		if (test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags) ||
+		    test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags))
+			return;
+	} else {
+		if (test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state) ||
+		    test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
+			return;
+	}
+
+	spin_lock_irq(&tags->lock);
+	WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
+	blk_mq_update_wake_batch(tags, tags->ctl.share_queues);
+	spin_unlock_irq(&tags->lock);
+}
+
 /*
  * If a previously inactive queue goes active, bump the active user count.
  * We need to do this before try to allocate driver tag, then even if fail
@@ -37,7 +63,6 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
  */
 void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
-	unsigned int users;
 	struct blk_mq_tags *tags = hctx->tags;
 
 	/*
@@ -57,9 +82,7 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 	}
 
 	spin_lock_irq(&tags->lock);
-	users = tags->ctl.active_queues + 1;
-	WRITE_ONCE(tags->ctl.active_queues, users);
-	blk_mq_update_wake_batch(tags, users);
+	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues + 1);
 	spin_unlock_irq(&tags->lock);
 }
 
@@ -73,6 +96,14 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 		sbitmap_queue_wake_all(&tags->breserved_tags);
 }
 
+static void __blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx)
+{
+	if (blk_mq_is_shared_tags(hctx->flags))
+		clear_bit(QUEUE_FLAG_HCTX_BUSY, &hctx->queue->queue_flags);
+	else
+		clear_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state);
+}
+
 /*
  * If a previously busy queue goes inactive, potential waiters could now
  * be allowed to queue. Wake them up and check.
@@ -80,7 +111,6 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_tags *tags = hctx->tags;
-	unsigned int users;
 
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
@@ -94,9 +124,10 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 	}
 
 	spin_lock_irq(&tags->lock);
-	users = tags->ctl.active_queues - 1;
-	WRITE_ONCE(tags->ctl.active_queues, users);
-	blk_mq_update_wake_batch(tags, users);
+	__blk_mq_driver_tag_idle(hctx);
+	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues - 1);
+	WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
+	blk_mq_update_wake_batch(tags, tags->ctl.share_queues);
 	spin_unlock_irq(&tags->lock);
 
 	blk_mq_tag_wakeup_all(tags, false);
@@ -105,14 +136,21 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 static int __blk_mq_get_tag(struct blk_mq_alloc_data *data,
 			    struct sbitmap_queue *bt)
 {
+	int ret = BLK_MQ_NO_TAG;
+
 	if (!data->q->elevator && !(data->flags & BLK_MQ_REQ_RESERVED) &&
 			!hctx_may_queue(data->hctx, bt))
-		return BLK_MQ_NO_TAG;
+		goto out;
 
+	/* shallow_depth is only used for elevator */
 	if (data->shallow_depth)
 		return sbitmap_queue_get_shallow(bt, data->shallow_depth);
-	else
-		return __sbitmap_queue_get(bt);
+
+	ret = __sbitmap_queue_get(bt);
+out:
+	if (ret == BLK_MQ_NO_TAG && !(data->rq_flags & RQF_SCHED_TAGS))
+		blk_mq_driver_tag_busy(data->hctx);
+	return ret;
 }
 
 unsigned long blk_mq_get_tags(struct blk_mq_alloc_data *data, int nr_tags,
diff --git a/block/blk-mq.c b/block/blk-mq.c
index da650a2c4ca1..171ee4ac97ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1753,8 +1753,10 @@ static bool __blk_mq_alloc_driver_tag(struct request *rq)
 
 bool __blk_mq_get_driver_tag(struct blk_mq_hw_ctx *hctx, struct request *rq)
 {
-	if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq))
+	if (rq->tag == BLK_MQ_NO_TAG && !__blk_mq_alloc_driver_tag(rq)) {
+		blk_mq_driver_tag_busy(hctx);
 		return false;
+	}
 
 	if ((hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED) &&
 			!(rq->rq_flags & RQF_MQ_INFLIGHT)) {
diff --git a/block/blk-mq.h b/block/blk-mq.h
index ca1c13127868..01441a5e9910 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -193,8 +193,9 @@ static inline struct sbq_wait_state *bt_wait_ptr(struct sbitmap_queue *bt,
 	return sbq_wait_ptr(bt, &hctx->wait_index);
 }
 
-void __blk_mq_tag_busy(struct blk_mq_hw_ctx *);
-void __blk_mq_tag_idle(struct blk_mq_hw_ctx *);
+void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx);
+void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx);
+void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx);
 
 static inline void blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
@@ -208,6 +209,12 @@ static inline void blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 		__blk_mq_tag_idle(hctx);
 }
 
+static inline void blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
+{
+	if (hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED)
+		__blk_mq_driver_tag_busy(hctx);
+}
+
 static inline bool blk_mq_tag_is_reserved(struct blk_mq_tags *tags,
 					  unsigned int tag)
 {
@@ -412,7 +419,7 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 			return true;
 	}
 
-	users = READ_ONCE(hctx->tags->ctl.active_queues);
+	users = READ_ONCE(hctx->tags->ctl.share_queues);
 	if (!users)
 		return true;
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8d2cd6b9d305..bc3ac22edb07 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -675,10 +675,11 @@ enum {
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
-	BLK_MQ_S_SCHED_RESTART	= 2,
+	BLK_MQ_S_DTAG_BUSY	= 2,
+	BLK_MQ_S_SCHED_RESTART	= 3,
 
 	/* hw queue is inactive after all its CPUs become offline */
-	BLK_MQ_S_INACTIVE	= 3,
+	BLK_MQ_S_INACTIVE	= 4,
 
 	BLK_MQ_MAX_DEPTH	= 10240,
 
@@ -735,6 +736,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 
 struct tag_sharing_ctl {
 	unsigned int active_queues;
+	unsigned int share_queues;
 };
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index ed44a997f629..0994707f6a68 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -546,6 +546,7 @@ struct request_queue {
 #define QUEUE_FLAG_DAX		19	/* device supports DAX */
 #define QUEUE_FLAG_STATS	20	/* track IO start and completion times */
 #define QUEUE_FLAG_REGISTERED	22	/* queue has been registered to a disk */
+#define QUEUE_FLAG_HCTX_BUSY	23	/* at least one blk-mq hctx failed to get driver tag */
 #define QUEUE_FLAG_QUIESCED	24	/* queue has been quiesced */
 #define QUEUE_FLAG_PCI_P2PDMA	25	/* device supports PCI p2p requests */
 #define QUEUE_FLAG_ZONE_RESETALL 26	/* supports Zone Reset All */
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH RFC 3/7] blk-mq: support to track active queues from blk_mq_tags
  2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
  2023-06-18 16:07 ` [PATCH RFC 1/7] blk-mq: factor out a structure from blk_mq_tags to control tag sharing Yu Kuai
  2023-06-18 16:07 ` [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag Yu Kuai
@ 2023-06-18 16:07 ` Yu Kuai
  2023-07-06 18:01   ` Bart Van Assche
  2023-06-18 16:07 ` [PATCH RFC 4/7] blk-mq: precalculate available tags for hctx_may_queue() Yu Kuai
                   ` (4 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Yu Kuai @ 2023-06-18 16:07 UTC (permalink / raw)
  To: bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

In order to refactor how tags is shared, it's necessary to acquire some
information for each disk/hctx, so that more tags can be assigned to the
one with higher pressure.

Prepare to refactor tag sharing.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-mq-tag.c     | 13 +++++++++++++
 include/linux/blk-mq.h |  2 ++
 include/linux/blkdev.h |  5 +++++
 3 files changed, 20 insertions(+)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 1c2bde917195..8c527e68d4e4 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -64,6 +64,7 @@ void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
 void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_tags *tags = hctx->tags;
+	struct tag_sharing *tag_sharing;
 
 	/*
 	 * calling test_bit() prior to test_and_set_bit() is intentional,
@@ -75,13 +76,18 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
 		    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
 			return;
+
+		tag_sharing = &q->tag_sharing;
 	} else {
 		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
 		    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return;
+
+		tag_sharing = &hctx->tag_sharing;
 	}
 
 	spin_lock_irq(&tags->lock);
+	list_add(&tag_sharing->node, &tags->ctl.head);
 	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues + 1);
 	spin_unlock_irq(&tags->lock);
 }
@@ -111,6 +117,7 @@ static void __blk_mq_driver_tag_idle(struct blk_mq_hw_ctx *hctx)
 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_tags *tags = hctx->tags;
+	struct tag_sharing *tag_sharing;
 
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
@@ -118,12 +125,17 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 		if (!test_and_clear_bit(QUEUE_FLAG_HCTX_ACTIVE,
 					&q->queue_flags))
 			return;
+
+		tag_sharing = &q->tag_sharing;
 	} else {
 		if (!test_and_clear_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return;
+
+		tag_sharing = &hctx->tag_sharing;
 	}
 
 	spin_lock_irq(&tags->lock);
+	list_del_init(&tag_sharing->node);
 	__blk_mq_driver_tag_idle(hctx);
 	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues - 1);
 	WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
@@ -619,6 +631,7 @@ struct blk_mq_tags *blk_mq_init_tags(unsigned int total_tags,
 	tags->nr_tags = total_tags;
 	tags->nr_reserved_tags = reserved_tags;
 	spin_lock_init(&tags->lock);
+	INIT_LIST_HEAD(&tags->ctl.head);
 
 	if (blk_mq_init_bitmaps(&tags->bitmap_tags, &tags->breserved_tags,
 				total_tags, reserved_tags, node,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index bc3ac22edb07..639d618e6ca8 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -390,6 +390,7 @@ struct blk_mq_hw_ctx {
 	 * assigned when a request is dispatched from a hardware queue.
 	 */
 	struct blk_mq_tags	*tags;
+	struct tag_sharing	tag_sharing;
 	/**
 	 * @sched_tags: Tags owned by I/O scheduler. If there is an I/O
 	 * scheduler associated with a request queue, a tag is assigned when
@@ -737,6 +738,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
 struct tag_sharing_ctl {
 	unsigned int active_queues;
 	unsigned int share_queues;
+	struct list_head head;
 };
 
 /*
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0994707f6a68..62f8fcc20c30 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -375,6 +375,10 @@ struct blk_independent_access_ranges {
 	struct blk_independent_access_range	ia_range[];
 };
 
+struct tag_sharing {
+	struct list_head node;
+};
+
 struct request_queue {
 	struct request		*last_merge;
 	struct elevator_queue	*elevator;
@@ -513,6 +517,7 @@ struct request_queue {
 
 	struct blk_mq_tag_set	*tag_set;
 	struct list_head	tag_set_list;
+	struct tag_sharing	tag_sharing;
 
 	struct dentry		*debugfs_dir;
 	struct dentry		*sched_debugfs_dir;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH RFC 4/7] blk-mq: precalculate available tags for hctx_may_queue()
  2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
                   ` (2 preceding siblings ...)
  2023-06-18 16:07 ` [PATCH RFC 3/7] blk-mq: support to track active queues from blk_mq_tags Yu Kuai
@ 2023-06-18 16:07 ` Yu Kuai
  2023-07-06 18:13   ` Bart Van Assche
  2023-06-18 16:07 ` [PATCH RFC 5/7] blk-mq: record the number of times fail to get driver tag while sharing tags Yu Kuai
                   ` (3 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Yu Kuai @ 2023-06-18 16:07 UTC (permalink / raw)
  To: bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

Currently, hctx_mq_queue() only need to get how many queues is sharing
tags, then calculate how many tags is available for each queue by fair
sharing. In order to refactor how tag is shared, the calculation will be
more complicated, however, hctx_may_queue() is fast path, hence
precalculate available tags and prepare to refactor tag sharing.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-mq-tag.c     | 19 +++++++++++++++++++
 block/blk-mq.c         |  3 +++
 block/blk-mq.h         | 14 +++++---------
 include/linux/blkdev.h |  3 ++-
 4 files changed, 29 insertions(+), 10 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 8c527e68d4e4..e0137206c02b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -14,6 +14,22 @@
 #include "blk-mq.h"
 #include "blk-mq-sched.h"
 
+static void blk_mq_update_available_driver_tags(struct blk_mq_hw_ctx *hctx)
+{
+	struct blk_mq_tags *tags = hctx->tags;
+	unsigned int nr_tags;
+	struct tag_sharing *tag_sharing;
+
+	if (tags->ctl.share_queues <= 1)
+		nr_tags = tags->nr_tags;
+	else
+		nr_tags = max((tags->nr_tags + tags->ctl.share_queues - 1) /
+			       tags->ctl.share_queues, 4U);
+
+	list_for_each_entry(tag_sharing, &tags->ctl.head, node)
+		tag_sharing->available_tags = nr_tags;
+}
+
 /*
  * Recalculate wakeup batch when tag is shared by hctx.
  */
@@ -51,6 +67,7 @@ void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
 
 	spin_lock_irq(&tags->lock);
 	WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
+	blk_mq_update_available_driver_tags(hctx);
 	blk_mq_update_wake_batch(tags, tags->ctl.share_queues);
 	spin_unlock_irq(&tags->lock);
 }
@@ -136,9 +153,11 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 
 	spin_lock_irq(&tags->lock);
 	list_del_init(&tag_sharing->node);
+	tag_sharing->available_tags = tags->nr_tags;
 	__blk_mq_driver_tag_idle(hctx);
 	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues - 1);
 	WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
+	blk_mq_update_available_driver_tags(hctx);
 	blk_mq_update_wake_batch(tags, tags->ctl.share_queues);
 	spin_unlock_irq(&tags->lock);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 171ee4ac97ef..771802ff1d45 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3621,6 +3621,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD, &hctx->cpuhp_dead);
 
 	hctx->tags = set->tags[hctx_idx];
+	hctx->tag_sharing.available_tags = hctx->tags->nr_tags;
 
 	if (set->ops->init_hctx &&
 	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
@@ -3881,6 +3882,7 @@ static void blk_mq_map_swqueue(struct request_queue *q)
 		}
 
 		hctx->tags = set->tags[i];
+		hctx->tag_sharing.available_tags = hctx->tags->nr_tags;
 		WARN_ON(!hctx->tags);
 
 		/*
@@ -4234,6 +4236,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 	spin_lock_init(&q->requeue_lock);
 
 	q->nr_requests = set->queue_depth;
+	q->tag_sharing.available_tags = set->queue_depth;
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 	blk_mq_add_queue_tag_set(set, q);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 01441a5e9910..fcfb040efbbd 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -398,7 +398,7 @@ static inline void blk_mq_free_requests(struct list_head *list)
 static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 				  struct sbitmap_queue *bt)
 {
-	unsigned int depth, users;
+	unsigned int depth;
 
 	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
 		return true;
@@ -414,19 +414,15 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 
 		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
 			return true;
+
+		depth = READ_ONCE(q->tag_sharing.available_tags);
 	} else {
 		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return true;
-	}
 
-	users = READ_ONCE(hctx->tags->ctl.share_queues);
-	if (!users)
-		return true;
+		depth = READ_ONCE(hctx->tag_sharing.available_tags);
+	}
 
-	/*
-	 * Allow at least some tags
-	 */
-	depth = max((bt->sb.depth + users - 1) / users, 4U);
 	return __blk_mq_active_requests(hctx) < depth;
 }
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 62f8fcc20c30..e5111bedfd8d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -376,7 +376,8 @@ struct blk_independent_access_ranges {
 };
 
 struct tag_sharing {
-	struct list_head node;
+	struct list_head	node;
+	unsigned int		available_tags;
 };
 
 struct request_queue {
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH RFC 5/7] blk-mq: record the number of times fail to get driver tag while sharing tags
  2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
                   ` (3 preceding siblings ...)
  2023-06-18 16:07 ` [PATCH RFC 4/7] blk-mq: precalculate available tags for hctx_may_queue() Yu Kuai
@ 2023-06-18 16:07 ` Yu Kuai
  2023-07-06 18:18   ` Bart Van Assche
  2023-06-18 16:07 ` [PATCH RFC 6/7] blk-mq: move active request counter to struct tag_sharing Yu Kuai
                   ` (2 subsequent siblings)
  7 siblings, 1 reply; 21+ messages in thread
From: Yu Kuai @ 2023-06-18 16:07 UTC (permalink / raw)
  To: bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

Add a atomic counter to record such times, such counter will be used to
adjust the number of tags assigned to active queues. And this counter will
degrade each seconds so that it will only represent io pressure
recently.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-mq-tag.c     | 22 ++++++++++++++++++++--
 include/linux/blkdev.h |  2 ++
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e0137206c02b..5e5742c7277a 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -45,6 +45,17 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
 			users);
 }
 
+static void update_tag_sharing_busy(struct tag_sharing *tag_sharing)
+{
+	unsigned int count = atomic_inc_return(&tag_sharing->fail_count);
+	unsigned long last_period = READ_ONCE(tag_sharing->period);
+
+	if (time_after(jiffies, last_period + HZ) &&
+	    cmpxchg_relaxed(&tag_sharing->period, last_period, jiffies) ==
+			    last_period)
+		atomic_sub(count / 2, &tag_sharing->fail_count);
+}
+
 void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_tags *tags = hctx->tags;
@@ -57,12 +68,16 @@ void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
 		struct request_queue *q = hctx->queue;
 
 		if (test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags) ||
-		    test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags))
+		    test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags)) {
+			update_tag_sharing_busy(&q->tag_sharing);
 			return;
+		}
 	} else {
 		if (test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state) ||
-		    test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
+		    test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state)) {
+			update_tag_sharing_busy(&hctx->tag_sharing);
 			return;
+		}
 	}
 
 	spin_lock_irq(&tags->lock);
@@ -152,8 +167,11 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 	}
 
 	spin_lock_irq(&tags->lock);
+
 	list_del_init(&tag_sharing->node);
 	tag_sharing->available_tags = tags->nr_tags;
+	atomic_set(&tag_sharing->fail_count, 0);
+
 	__blk_mq_driver_tag_idle(hctx);
 	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues - 1);
 	WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e5111bedfd8d..f3faaf5f6504 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -378,6 +378,8 @@ struct blk_independent_access_ranges {
 struct tag_sharing {
 	struct list_head	node;
 	unsigned int		available_tags;
+	atomic_t		fail_count;
+	unsigned long		period;
 };
 
 struct request_queue {
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH RFC 6/7] blk-mq: move active request counter to struct tag_sharing
  2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
                   ` (4 preceding siblings ...)
  2023-06-18 16:07 ` [PATCH RFC 5/7] blk-mq: record the number of times fail to get driver tag while sharing tags Yu Kuai
@ 2023-06-18 16:07 ` Yu Kuai
  2023-06-18 16:07 ` [PATCH RFC 7/7] blk-mq: allow shared queue to get more driver tags Yu Kuai
  2023-06-20 15:20 ` [PATCH RFC 0/7] blk-mq: improve tag fair sharing Bart Van Assche
  7 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-06-18 16:07 UTC (permalink / raw)
  To: bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

Now that there is a separate structure to control tag sharing, it make
sense to move such counter for tag sharing into this structure. There
are no functional changes.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-core.c       |  2 --
 block/blk-mq.c         |  3 ++-
 block/blk-mq.h         | 22 +++++++++++-----------
 include/linux/blk-mq.h |  6 ------
 include/linux/blkdev.h |  3 +--
 5 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 99d8b9812b18..f2077ee32a99 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -413,8 +413,6 @@ struct request_queue *blk_alloc_queue(int node_id)
 
 	q->node = node_id;
 
-	atomic_set(&q->nr_active_requests_shared_tags, 0);
-
 	timer_setup(&q->timeout, blk_rq_timed_out_timer, 0);
 	INIT_WORK(&q->timeout_work, blk_timeout_work);
 	INIT_LIST_HEAD(&q->icq_list);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 771802ff1d45..91020cd2f6bf 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -3661,7 +3661,7 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
 	if (!zalloc_cpumask_var_node(&hctx->cpumask, gfp, node))
 		goto free_hctx;
 
-	atomic_set(&hctx->nr_active, 0);
+	atomic_set(&hctx->tag_sharing.active_tags, 0);
 	if (node == NUMA_NO_NODE)
 		node = set->numa_node;
 	hctx->numa_node = node;
@@ -4237,6 +4237,7 @@ int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
 
 	q->nr_requests = set->queue_depth;
 	q->tag_sharing.available_tags = set->queue_depth;
+	atomic_set(&q->tag_sharing.active_tags, 0);
 
 	blk_mq_init_cpu_queues(q, set->nr_hw_queues);
 	blk_mq_add_queue_tag_set(set, q);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index fcfb040efbbd..c8923a8565b5 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -281,18 +281,18 @@ static inline int blk_mq_get_rq_budget_token(struct request *rq)
 static inline void __blk_mq_inc_active_requests(struct blk_mq_hw_ctx *hctx)
 {
 	if (blk_mq_is_shared_tags(hctx->flags))
-		atomic_inc(&hctx->queue->nr_active_requests_shared_tags);
+		atomic_inc(&hctx->queue->tag_sharing.active_tags);
 	else
-		atomic_inc(&hctx->nr_active);
+		atomic_inc(&hctx->tag_sharing.active_tags);
 }
 
 static inline void __blk_mq_sub_active_requests(struct blk_mq_hw_ctx *hctx,
 		int val)
 {
 	if (blk_mq_is_shared_tags(hctx->flags))
-		atomic_sub(val, &hctx->queue->nr_active_requests_shared_tags);
+		atomic_sub(val, &hctx->queue->tag_sharing.active_tags);
 	else
-		atomic_sub(val, &hctx->nr_active);
+		atomic_sub(val, &hctx->tag_sharing.active_tags);
 }
 
 static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
@@ -303,8 +303,8 @@ static inline void __blk_mq_dec_active_requests(struct blk_mq_hw_ctx *hctx)
 static inline int __blk_mq_active_requests(struct blk_mq_hw_ctx *hctx)
 {
 	if (blk_mq_is_shared_tags(hctx->flags))
-		return atomic_read(&hctx->queue->nr_active_requests_shared_tags);
-	return atomic_read(&hctx->nr_active);
+		return atomic_read(&hctx->queue->tag_sharing.active_tags);
+	return atomic_read(&hctx->tag_sharing.active_tags);
 }
 static inline void __blk_mq_put_driver_tag(struct blk_mq_hw_ctx *hctx,
 					   struct request *rq)
@@ -398,7 +398,7 @@ static inline void blk_mq_free_requests(struct list_head *list)
 static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 				  struct sbitmap_queue *bt)
 {
-	unsigned int depth;
+	struct tag_sharing *tag_sharing;
 
 	if (!hctx || !(hctx->flags & BLK_MQ_F_TAG_QUEUE_SHARED))
 		return true;
@@ -415,15 +415,15 @@ static inline bool hctx_may_queue(struct blk_mq_hw_ctx *hctx,
 		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
 			return true;
 
-		depth = READ_ONCE(q->tag_sharing.available_tags);
+		tag_sharing = &q->tag_sharing;
 	} else {
 		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
 			return true;
-
-		depth = READ_ONCE(hctx->tag_sharing.available_tags);
+		tag_sharing = &hctx->tag_sharing;
 	}
 
-	return __blk_mq_active_requests(hctx) < depth;
+	return atomic_read(&tag_sharing->active_tags) <
+	       READ_ONCE(tag_sharing->available_tags);
 }
 
 /* run the code block in @dispatch_ops with rcu/srcu read lock held */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 639d618e6ca8..fdfa63b76136 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -408,12 +408,6 @@ struct blk_mq_hw_ctx {
 	/** @queue_num: Index of this hardware queue. */
 	unsigned int		queue_num;
 
-	/**
-	 * @nr_active: Number of active requests. Only used when a tag set is
-	 * shared across request queues.
-	 */
-	atomic_t		nr_active;
-
 	/** @cpuhp_online: List to store request if CPU is going to die */
 	struct hlist_node	cpuhp_online;
 	/** @cpuhp_dead: List to store request if some CPU die. */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index f3faaf5f6504..0d25e7d2a94c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -378,6 +378,7 @@ struct blk_independent_access_ranges {
 struct tag_sharing {
 	struct list_head	node;
 	unsigned int		available_tags;
+	atomic_t		active_tags;
 	atomic_t		fail_count;
 	unsigned long		period;
 };
@@ -462,8 +463,6 @@ struct request_queue {
 	struct timer_list	timeout;
 	struct work_struct	timeout_work;
 
-	atomic_t		nr_active_requests_shared_tags;
-
 	struct blk_mq_tags	*sched_shared_tags;
 
 	struct list_head	icq_list;
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* [PATCH RFC 7/7] blk-mq: allow shared queue to get more driver tags
  2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
                   ` (5 preceding siblings ...)
  2023-06-18 16:07 ` [PATCH RFC 6/7] blk-mq: move active request counter to struct tag_sharing Yu Kuai
@ 2023-06-18 16:07 ` Yu Kuai
  2023-06-20 15:20 ` [PATCH RFC 0/7] blk-mq: improve tag fair sharing Bart Van Assche
  7 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-06-18 16:07 UTC (permalink / raw)
  To: bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yukuai1, yi.zhang, yangerkun

From: Yu Kuai <yukuai3@huawei.com>

If the queue fail to get driver tags frequently, and other queue
doesn't, try to borrow some shared tags from other queue.

Currently, borrowed tags will not be given back untill this queue is
idle.

Signed-off-by: Yu Kuai <yukuai3@huawei.com>
---
 block/blk-mq-tag.c     | 52 ++++++++++++++++++++++++++++++++++++++----
 include/linux/blkdev.h |  1 +
 2 files changed, 49 insertions(+), 4 deletions(-)

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 5e5742c7277a..aafcc131e3e6 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -45,7 +45,44 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
 			users);
 }
 
-static void update_tag_sharing_busy(struct tag_sharing *tag_sharing)
+static void try_to_increase_available_tags(struct blk_mq_tags *tags,
+					   struct tag_sharing *tag_sharing)
+{
+	unsigned int users = tags->ctl.share_queues;
+	unsigned int free_tags = 0;
+	unsigned int borrowed_tags = 0;
+	unsigned int nr_tags;
+	struct tag_sharing *tmp;
+
+	if (users <= 1)
+		return;
+
+	nr_tags = max((tags->nr_tags + tags->ctl.share_queues - 1) /
+		       tags->ctl.share_queues, 4U);
+
+	list_for_each_entry(tmp, &tags->ctl.head, node) {
+		if (tmp == tag_sharing)
+			continue;
+
+		if (tmp->available_tags > nr_tags)
+			borrowed_tags += tmp->available_tags - nr_tags;
+		else if (atomic_read(&tmp->fail_count) <= nr_tags / 2)
+			free_tags += tmp->available_tags -
+				atomic_read(&tmp->active_tags);
+	}
+
+	/* can't borrow more tags */
+	if (free_tags <= borrowed_tags) {
+		WRITE_ONCE(tag_sharing->suspend, jiffies + HZ);
+		return;
+	}
+
+	/* try to borrow half of free tags */
+	tag_sharing->available_tags += (free_tags - borrowed_tags) / 2;
+}
+
+static void update_tag_sharing_busy(struct blk_mq_tags *tags,
+				    struct tag_sharing *tag_sharing)
 {
 	unsigned int count = atomic_inc_return(&tag_sharing->fail_count);
 	unsigned long last_period = READ_ONCE(tag_sharing->period);
@@ -53,7 +90,14 @@ static void update_tag_sharing_busy(struct tag_sharing *tag_sharing)
 	if (time_after(jiffies, last_period + HZ) &&
 	    cmpxchg_relaxed(&tag_sharing->period, last_period, jiffies) ==
 			    last_period)
-		atomic_sub(count / 2, &tag_sharing->fail_count);
+		count = atomic_sub_return(count / 2, &tag_sharing->fail_count);
+
+	if (count >= tags->nr_tags &&
+	    time_after(jiffies, READ_ONCE(tag_sharing->suspend))) {
+		spin_lock_irq(&tags->lock);
+		try_to_increase_available_tags(tags, tag_sharing);
+		spin_unlock_irq(&tags->lock);
+	}
 }
 
 void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
@@ -69,13 +113,13 @@ void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
 
 		if (test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags) ||
 		    test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags)) {
-			update_tag_sharing_busy(&q->tag_sharing);
+			update_tag_sharing_busy(tags, &q->tag_sharing);
 			return;
 		}
 	} else {
 		if (test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state) ||
 		    test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state)) {
-			update_tag_sharing_busy(&hctx->tag_sharing);
+			update_tag_sharing_busy(tags, &hctx->tag_sharing);
 			return;
 		}
 	}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0d25e7d2a94c..3528bdc96a17 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -381,6 +381,7 @@ struct tag_sharing {
 	atomic_t		active_tags;
 	atomic_t		fail_count;
 	unsigned long		period;
+	unsigned long		suspend;
 };
 
 struct request_queue {
-- 
2.39.2


^ permalink raw reply related	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag
  2023-06-18 16:07 ` [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag Yu Kuai
@ 2023-06-19  5:55   ` Hannes Reinecke
  2023-06-19  6:07     ` Yu Kuai
  2023-07-06 18:00   ` Bart Van Assche
  1 sibling, 1 reply; 21+ messages in thread
From: Hannes Reinecke @ 2023-06-19  5:55 UTC (permalink / raw)
  To: Yu Kuai, bvanassche, axboe
  Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On 6/18/23 18:07, Yu Kuai wrote:
> From: Yu Kuai <yukuai3@huawei.com>
> 
> Start tag fair sharing when a device start to issue io will waste
> resources, same number of tags will be assigned to each disk/hctx,
> and such tags can't be used for other disk/hctx, which means a disk/hctx
> can't use more than assinged tags even if there are still lots of tags
> that is assinged to other disks are unused.
> 
> Add a new api blk_mq_driver_tag_busy(), it will be called when get
> driver tag failed, and move tag sharing from blk_mq_tag_busy() to
> blk_mq_driver_tag_busy().
> 
> This approch will work well if total tags are not exhausted, and follow
> up patches will try to refactor how tag is shared to handle this case.
> 
> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
> ---
>   block/blk-mq-debugfs.c |  4 ++-
>   block/blk-mq-tag.c     | 60 ++++++++++++++++++++++++++++++++++--------
>   block/blk-mq.c         |  4 ++-
>   block/blk-mq.h         | 13 ++++++---
>   include/linux/blk-mq.h |  6 +++--
>   include/linux/blkdev.h |  1 +
>   6 files changed, 70 insertions(+), 18 deletions(-)
> 
> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
> index 431aaa3eb181..de5a911b07c2 100644
> --- a/block/blk-mq-debugfs.c
> +++ b/block/blk-mq-debugfs.c
> @@ -400,8 +400,10 @@ static void blk_mq_debugfs_tags_show(struct seq_file *m,
>   {
>   	seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
>   	seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
> -	seq_printf(m, "active_queues=%d\n",
> +	seq_printf(m, "active_queues=%u\n",
>   		   READ_ONCE(tags->ctl.active_queues));
> +	seq_printf(m, "share_queues=%u\n",
> +		   READ_ONCE(tags->ctl.share_queues));
>   
>   	seq_puts(m, "\nbitmap_tags:\n");
>   	sbitmap_queue_show(&tags->bitmap_tags, m);
> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
> index fe41a0d34fc0..1c2bde917195 100644
> --- a/block/blk-mq-tag.c
> +++ b/block/blk-mq-tag.c
> @@ -29,6 +29,32 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
>   			users);
>   }
>   
> +void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
> +{
> +	struct blk_mq_tags *tags = hctx->tags;
> +
> +	/*
> +	 * calling test_bit() prior to test_and_set_bit() is intentional,
> +	 * it avoids dirtying the cacheline if the queue is already active.
> +	 */
> +	if (blk_mq_is_shared_tags(hctx->flags)) {
> +		struct request_queue *q = hctx->queue;
> +
> +		if (test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags) ||
> +		    test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags))
> +			return;
> +	} else {
> +		if (test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state) ||
> +		    test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
> +			return;
> +	}
> +
> +	spin_lock_irq(&tags->lock);
> +	WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
> +	blk_mq_update_wake_batch(tags, tags->ctl.share_queues);
> +	spin_unlock_irq(&tags->lock);
> +}
> +
>   /*
>    * If a previously inactive queue goes active, bump the active user count.
>    * We need to do this before try to allocate driver tag, then even if fail
> @@ -37,7 +63,6 @@ static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
>    */
>   void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>   {
> -	unsigned int users;
>   	struct blk_mq_tags *tags = hctx->tags;
>   
>   	/*
> @@ -57,9 +82,7 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>   	}
>   
>   	spin_lock_irq(&tags->lock);
> -	users = tags->ctl.active_queues + 1;
> -	WRITE_ONCE(tags->ctl.active_queues, users);
> -	blk_mq_update_wake_batch(tags, users);
> +	WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues + 1);

Why did you remove the call to blk_mq_update_wake_batch() here?

Cheers,

Hannes
-- 
Dr. Hannes Reinecke                Kernel Storage Architect
hare@suse.de                              +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Ivo Totev, Andrew
Myers, Andrew McDonald, Martje Boudien Moerman


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag
  2023-06-19  5:55   ` Hannes Reinecke
@ 2023-06-19  6:07     ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-06-19  6:07 UTC (permalink / raw)
  To: Hannes Reinecke, Yu Kuai, bvanassche, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, yukuai (C)

Hi,

在 2023/06/19 13:55, Hannes Reinecke 写道:
> On 6/18/23 18:07, Yu Kuai wrote:
>> From: Yu Kuai <yukuai3@huawei.com>
>>
>> Start tag fair sharing when a device start to issue io will waste
>> resources, same number of tags will be assigned to each disk/hctx,
>> and such tags can't be used for other disk/hctx, which means a disk/hctx
>> can't use more than assinged tags even if there are still lots of tags
>> that is assinged to other disks are unused.
>>
>> Add a new api blk_mq_driver_tag_busy(), it will be called when get
>> driver tag failed, and move tag sharing from blk_mq_tag_busy() to
>> blk_mq_driver_tag_busy().
>>
>> This approch will work well if total tags are not exhausted, and follow
>> up patches will try to refactor how tag is shared to handle this case.
>>
>> Signed-off-by: Yu Kuai <yukuai3@huawei.com>
>> ---
>>   block/blk-mq-debugfs.c |  4 ++-
>>   block/blk-mq-tag.c     | 60 ++++++++++++++++++++++++++++++++++--------
>>   block/blk-mq.c         |  4 ++-
>>   block/blk-mq.h         | 13 ++++++---
>>   include/linux/blk-mq.h |  6 +++--
>>   include/linux/blkdev.h |  1 +
>>   6 files changed, 70 insertions(+), 18 deletions(-)
>>
>> diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c
>> index 431aaa3eb181..de5a911b07c2 100644
>> --- a/block/blk-mq-debugfs.c
>> +++ b/block/blk-mq-debugfs.c
>> @@ -400,8 +400,10 @@ static void blk_mq_debugfs_tags_show(struct 
>> seq_file *m,
>>   {
>>       seq_printf(m, "nr_tags=%u\n", tags->nr_tags);
>>       seq_printf(m, "nr_reserved_tags=%u\n", tags->nr_reserved_tags);
>> -    seq_printf(m, "active_queues=%d\n",
>> +    seq_printf(m, "active_queues=%u\n",
>>              READ_ONCE(tags->ctl.active_queues));
>> +    seq_printf(m, "share_queues=%u\n",
>> +           READ_ONCE(tags->ctl.share_queues));
>>       seq_puts(m, "\nbitmap_tags:\n");
>>       sbitmap_queue_show(&tags->bitmap_tags, m);
>> diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
>> index fe41a0d34fc0..1c2bde917195 100644
>> --- a/block/blk-mq-tag.c
>> +++ b/block/blk-mq-tag.c
>> @@ -29,6 +29,32 @@ static void blk_mq_update_wake_batch(struct 
>> blk_mq_tags *tags,
>>               users);
>>   }
>> +void __blk_mq_driver_tag_busy(struct blk_mq_hw_ctx *hctx)
>> +{
>> +    struct blk_mq_tags *tags = hctx->tags;
>> +
>> +    /*
>> +     * calling test_bit() prior to test_and_set_bit() is intentional,
>> +     * it avoids dirtying the cacheline if the queue is already active.
>> +     */
>> +    if (blk_mq_is_shared_tags(hctx->flags)) {
>> +        struct request_queue *q = hctx->queue;
>> +
>> +        if (test_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags) ||
>> +            test_and_set_bit(QUEUE_FLAG_HCTX_BUSY, &q->queue_flags))
>> +            return;
>> +    } else {
>> +        if (test_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state) ||
>> +            test_and_set_bit(BLK_MQ_S_DTAG_BUSY, &hctx->state))
>> +            return;
>> +    }
>> +
>> +    spin_lock_irq(&tags->lock);
>> +    WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
>> +    blk_mq_update_wake_batch(tags, tags->ctl.share_queues);
>> +    spin_unlock_irq(&tags->lock);
>> +}
>> +
>>   /*
>>    * If a previously inactive queue goes active, bump the active user 
>> count.
>>    * We need to do this before try to allocate driver tag, then even 
>> if fail
>> @@ -37,7 +63,6 @@ static void blk_mq_update_wake_batch(struct 
>> blk_mq_tags *tags,
>>    */
>>   void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>>   {
>> -    unsigned int users;
>>       struct blk_mq_tags *tags = hctx->tags;
>>       /*
>> @@ -57,9 +82,7 @@ void __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
>>       }
>>       spin_lock_irq(&tags->lock);
>> -    users = tags->ctl.active_queues + 1;
>> -    WRITE_ONCE(tags->ctl.active_queues, users);
>> -    blk_mq_update_wake_batch(tags, users);
>> +    WRITE_ONCE(tags->ctl.active_queues, tags->ctl.active_queues + 1);
> 
> Why did you remove the call to blk_mq_update_wake_batch() here?

blk_mq_update_wake_batch() should be called when the available tags is
changed, however, active_queues is no longer used for hctx_may_queue()
to calculate available tags, share_queues is used instead and it's
updated in the new helper blk_mq_driver_tag_busy().

Thanks,
Kuai
> 
> Cheers,
> 
> Hannes


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 0/7] blk-mq: improve tag fair sharing
  2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
                   ` (6 preceding siblings ...)
  2023-06-18 16:07 ` [PATCH RFC 7/7] blk-mq: allow shared queue to get more driver tags Yu Kuai
@ 2023-06-20 15:20 ` Bart Van Assche
  2023-07-03 13:29   ` Yu Kuai
  7 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2023-06-20 15:20 UTC (permalink / raw)
  To: Yu Kuai, axboe
  Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun,
	Christoph Hellwig

On 6/18/23 09:07, Yu Kuai wrote:
> This is not a formal version and not fully tested, I send this RFC
> because I want to make sure if people think doing this is meaningful,
> before I spend too much time on this.
The approach looks good to me but I'd like to hear from Jens and 
Christoph what their opinion is about the approach of this patch series 
before doing an in-depth review.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 0/7] blk-mq: improve tag fair sharing
  2023-06-20 15:20 ` [PATCH RFC 0/7] blk-mq: improve tag fair sharing Bart Van Assche
@ 2023-07-03 13:29   ` Yu Kuai
  2023-07-03 18:08     ` Bart Van Assche
  0 siblings, 1 reply; 21+ messages in thread
From: Yu Kuai @ 2023-07-03 13:29 UTC (permalink / raw)
  To: Bart Van Assche, Yu Kuai, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, Christoph Hellwig,
	yukuai (C)

Hi, Christoph and Jens and anyone who's interested

在 2023/06/20 23:20, Bart Van Assche 写道:
> On 6/18/23 09:07, Yu Kuai wrote:
>> This is not a formal version and not fully tested, I send this RFC
>> because I want to make sure if people think doing this is meaningful,
>> before I spend too much time on this.
> The approach looks good to me but I'd like to hear from Jens and 
> Christoph what their opinion is about the approach of this patch series 
> before doing an in-depth review.
> 
Any suggestions on this topic? It'll be great to hear that if anyone
thinks it's meaningful to refactor tag fair sharing.

Thanks,
Kuai
> Thanks,
> 
> Bart.
> 
> .
> 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 0/7] blk-mq: improve tag fair sharing
  2023-07-03 13:29   ` Yu Kuai
@ 2023-07-03 18:08     ` Bart Van Assche
  2023-07-05  3:17       ` Yu Kuai
  0 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2023-07-03 18:08 UTC (permalink / raw)
  To: Yu Kuai, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, Christoph Hellwig,
	yukuai (C)

On 7/3/23 06:29, Yu Kuai wrote:
> 在 2023/06/20 23:20, Bart Van Assche 写道:
>> On 6/18/23 09:07, Yu Kuai wrote:
>>> This is not a formal version and not fully tested, I send this RFC
>>> because I want to make sure if people think doing this is meaningful,
>>> before I spend too much time on this.
>> The approach looks good to me but I'd like to hear from Jens and 
>> Christoph what their opinion is about the approach of this patch 
>> series before doing an in-depth review.
>>
> Any suggestions on this topic? It'll be great to hear that if anyone
> thinks it's meaningful to refactor tag fair sharing.

The cover letter of this patch series says "This is not a formal version 
and not fully tested". If a fully tested version will be posted, I will 
help with an in-depth review.

Thanks,

Bart.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 0/7] blk-mq: improve tag fair sharing
  2023-07-03 18:08     ` Bart Van Assche
@ 2023-07-05  3:17       ` Yu Kuai
  2023-07-06 18:43         ` Bart Van Assche
  0 siblings, 1 reply; 21+ messages in thread
From: Yu Kuai @ 2023-07-05  3:17 UTC (permalink / raw)
  To: Bart Van Assche, Yu Kuai, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, Christoph Hellwig,
	yukuai (C)

Hi,

在 2023/07/04 2:08, Bart Van Assche 写道:
> On 7/3/23 06:29, Yu Kuai wrote:
>> 在 2023/06/20 23:20, Bart Van Assche 写道:
>>> On 6/18/23 09:07, Yu Kuai wrote:
>>>> This is not a formal version and not fully tested, I send this RFC
>>>> because I want to make sure if people think doing this is meaningful,
>>>> before I spend too much time on this.
>>> The approach looks good to me but I'd like to hear from Jens and 
>>> Christoph what their opinion is about the approach of this patch 
>>> series before doing an in-depth review.
>>>
>> Any suggestions on this topic? It'll be great to hear that if anyone
>> thinks it's meaningful to refactor tag fair sharing.
> 
> The cover letter of this patch series says "This is not a formal version 
> and not fully tested". If a fully tested version will be posted, I will 
> help with an in-depth review.

Thanks for taking time on this patchset, do you think I need to do
following for the formal version, or these improvements can be done
later?

- current algorithm to borrow tags in patch 7 is very easy and
straightforward, it should work fine on simple caces like the case you
reported for ufs, but this algorithm should be improved for more
complicated cases.
- currently borrowed tags will never be returned untill queue is idle,
I should figure out a way to return borrowed tags if this queue is not
busy, so that other queues can borrow tag from this queue.

Thanks,
Kuai

> 
> Thanks,
> 
> Bart.
> 
> 
> .
> 


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 1/7] blk-mq: factor out a structure from blk_mq_tags to control tag sharing
  2023-06-18 16:07 ` [PATCH RFC 1/7] blk-mq: factor out a structure from blk_mq_tags to control tag sharing Yu Kuai
@ 2023-07-06 17:43   ` Bart Van Assche
  0 siblings, 0 replies; 21+ messages in thread
From: Bart Van Assche @ 2023-07-06 17:43 UTC (permalink / raw)
  To: Yu Kuai, axboe; +Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On 6/18/23 09:07, Yu Kuai wrote:
> Currently tags is fair shared, and the new structure contains only one
                  ^^^^^^^       ^^^^^^^^^
                  are fairly    .  The
> field active_queues. There are no functional changes and prepare to
                                                       ^^^^^^^^^^^^^^^
                                               . This patch prepares for
> refactor tag sharing.
   ^^^^^^^^
   refactoring

Please make the subject of this patch shorter, e.g. "Refactor struct
blk_mq_tags". Otherwise this patch looks good to me.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag
  2023-06-18 16:07 ` [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag Yu Kuai
  2023-06-19  5:55   ` Hannes Reinecke
@ 2023-07-06 18:00   ` Bart Van Assche
  1 sibling, 0 replies; 21+ messages in thread
From: Bart Van Assche @ 2023-07-06 18:00 UTC (permalink / raw)
  To: Yu Kuai, axboe; +Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On 6/18/23 09:07, Yu Kuai wrote:
> Start tag fair sharing when a device start to issue io will waste
> resources, same number of tags will be assigned to each disk/hctx,
> and such tags can't be used for other disk/hctx, which means a disk/hctx
> can't use more than assinged tags even if there are still lots of tags
                       ^^^^^^^^
                       assigned
> that is assinged to other disks are unused.
           ^^^^^^^^
           assigned
> Add a new api blk_mq_driver_tag_busy(), it will be called when get
> driver tag failed, and move tag sharing from blk_mq_tag_busy() to
> blk_mq_driver_tag_busy().


> +	spin_lock_irq(&tags->lock);
> +	WRITE_ONCE(tags->ctl.share_queues, tags->ctl.active_queues);
> +	blk_mq_update_wake_batch(tags, tags->ctl.share_queues);
> +	spin_unlock_irq(&tags->lock);
> +}

Are all tags->ctl.share_queues changes protected by tags->lock? If so, please
use a regular assignment to update that member variable instead of using
WRITE_ONCE().

> @@ -735,6 +736,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
>   
>   struct tag_sharing_ctl {
>   	unsigned int active_queues;
> +	unsigned int share_queues;
>   };

Please rename "share_queues" into "shared_queues" such that the names of
both struct members start with an adjective.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 3/7] blk-mq: support to track active queues from blk_mq_tags
  2023-06-18 16:07 ` [PATCH RFC 3/7] blk-mq: support to track active queues from blk_mq_tags Yu Kuai
@ 2023-07-06 18:01   ` Bart Van Assche
  0 siblings, 0 replies; 21+ messages in thread
From: Bart Van Assche @ 2023-07-06 18:01 UTC (permalink / raw)
  To: Yu Kuai, axboe; +Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On 6/18/23 09:07, Yu Kuai wrote:
> @@ -737,6 +738,7 @@ struct request *blk_mq_alloc_request_hctx(struct request_queue *q,
>   struct tag_sharing_ctl {
>   	unsigned int active_queues;
>   	unsigned int share_queues;
> +	struct list_head head;
>   };

Please add a comment that explains that 'head' is the head of a list with
tag_sharing.node entries.

> +struct tag_sharing {
> +	struct list_head node;
> +};

Data structure names typically are a noun. Above I see a name that ends with a
verb. Would "shared_tag_info" or "tag_sharing_info" perhaps be a better name?

Thanks,

Bart.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 4/7] blk-mq: precalculate available tags for hctx_may_queue()
  2023-06-18 16:07 ` [PATCH RFC 4/7] blk-mq: precalculate available tags for hctx_may_queue() Yu Kuai
@ 2023-07-06 18:13   ` Bart Van Assche
  0 siblings, 0 replies; 21+ messages in thread
From: Bart Van Assche @ 2023-07-06 18:13 UTC (permalink / raw)
  To: Yu Kuai, axboe; +Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On 6/18/23 09:07, Yu Kuai wrote:
> +static void blk_mq_update_available_driver_tags(struct blk_mq_hw_ctx *hctx)
> +{
> +	struct blk_mq_tags *tags = hctx->tags;
> +	unsigned int nr_tags;
> +	struct tag_sharing *tag_sharing;
> +
> +	if (tags->ctl.share_queues <= 1)
> +		nr_tags = tags->nr_tags;
> +	else
> +		nr_tags = max((tags->nr_tags + tags->ctl.share_queues - 1) /
> +			       tags->ctl.share_queues, 4U);
> +
> +	list_for_each_entry(tag_sharing, &tags->ctl.head, node)
> +		tag_sharing->available_tags = nr_tags;
> +}

Since READ_ONCE() is used to read the available_tags member, WRITE_ONCE()
should be used to update that member, even if a spinlock is held around
the update.

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 5/7] blk-mq: record the number of times fail to get driver tag while sharing tags
  2023-06-18 16:07 ` [PATCH RFC 5/7] blk-mq: record the number of times fail to get driver tag while sharing tags Yu Kuai
@ 2023-07-06 18:18   ` Bart Van Assche
  0 siblings, 0 replies; 21+ messages in thread
From: Bart Van Assche @ 2023-07-06 18:18 UTC (permalink / raw)
  To: Yu Kuai, axboe; +Cc: linux-block, linux-kernel, yukuai3, yi.zhang, yangerkun

On 6/18/23 09:07, Yu Kuai wrote:
> +static void update_tag_sharing_busy(struct tag_sharing *tag_sharing)
> +{
> +	unsigned int count = atomic_inc_return(&tag_sharing->fail_count);
> +	unsigned long last_period = READ_ONCE(tag_sharing->period);
> +
> +	if (time_after(jiffies, last_period + HZ) &&
> +	    cmpxchg_relaxed(&tag_sharing->period, last_period, jiffies) ==
> +			    last_period)
> +		atomic_sub(count / 2, &tag_sharing->fail_count);
> +}

For new code, try_cmpxchg_relaxed() is preferred over cmpxchg_relaxed().

>   struct tag_sharing {
>   	struct list_head	node;
>   	unsigned int		available_tags;
> +	atomic_t		fail_count;
> +	unsigned long		period;
>   };

Please consider renaming "period" into "latest_reduction" or any other name
that make the purpose of this member clear.

Thanks,

Bart.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 0/7] blk-mq: improve tag fair sharing
  2023-07-05  3:17       ` Yu Kuai
@ 2023-07-06 18:43         ` Bart Van Assche
  2023-07-07  1:15           ` Yu Kuai
  0 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2023-07-06 18:43 UTC (permalink / raw)
  To: Yu Kuai, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, Christoph Hellwig,
	yukuai (C)

On 7/4/23 20:17, Yu Kuai wrote:
> - currently borrowed tags will never be returned untill queue is idle,
> I should figure out a way to return borrowed tags if this queue is not
> busy, so that other queues can borrow tag from this queue.

At least for UFS this is a significant disadvantage. If e.g. one SCSI
command is sent to the UFS WLUN every 20 seconds and the request queue
timeout is 30 seconds then borrowed tags will never be returned.

The complexity of this patch series is a concern to me. The complexity
of this patch series may be a barrier towards merging this patch series
in the upstream kernel.

Thanks,

Bart.


^ permalink raw reply	[flat|nested] 21+ messages in thread

* Re: [PATCH RFC 0/7] blk-mq: improve tag fair sharing
  2023-07-06 18:43         ` Bart Van Assche
@ 2023-07-07  1:15           ` Yu Kuai
  0 siblings, 0 replies; 21+ messages in thread
From: Yu Kuai @ 2023-07-07  1:15 UTC (permalink / raw)
  To: Bart Van Assche, Yu Kuai, axboe
  Cc: linux-block, linux-kernel, yi.zhang, yangerkun, Christoph Hellwig,
	yukuai (C)

Hi,

在 2023/07/07 2:43, Bart Van Assche 写道:
> On 7/4/23 20:17, Yu Kuai wrote:
>> - currently borrowed tags will never be returned untill queue is idle,
>> I should figure out a way to return borrowed tags if this queue is not
>> busy, so that other queues can borrow tag from this queue.
> 
> At least for UFS this is a significant disadvantage. If e.g. one SCSI
> command is sent to the UFS WLUN every 20 seconds and the request queue
> timeout is 30 seconds then borrowed tags will never be returned.

Ok, thanks for the notice.
> 
> The complexity of this patch series is a concern to me. The complexity
> of this patch series may be a barrier towards merging this patch series
> in the upstream kernel.

Yeah, I see. Thanks for the review! However, all I have in mind will end
up make this patch series more complicated, I'll try my best to make the
code more readable in the next version.

Thanks,
Kuai
> 
> Thanks,
> 
> Bart.
> 
> 
> .
> 


^ permalink raw reply	[flat|nested] 21+ messages in thread

end of thread, other threads:[~2023-07-07  1:15 UTC | newest]

Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2023-06-18 16:07 [PATCH RFC 0/7] blk-mq: improve tag fair sharing Yu Kuai
2023-06-18 16:07 ` [PATCH RFC 1/7] blk-mq: factor out a structure from blk_mq_tags to control tag sharing Yu Kuai
2023-07-06 17:43   ` Bart Van Assche
2023-06-18 16:07 ` [PATCH RFC 2/7] blk-mq: delay tag fair sharing until fail to get driver tag Yu Kuai
2023-06-19  5:55   ` Hannes Reinecke
2023-06-19  6:07     ` Yu Kuai
2023-07-06 18:00   ` Bart Van Assche
2023-06-18 16:07 ` [PATCH RFC 3/7] blk-mq: support to track active queues from blk_mq_tags Yu Kuai
2023-07-06 18:01   ` Bart Van Assche
2023-06-18 16:07 ` [PATCH RFC 4/7] blk-mq: precalculate available tags for hctx_may_queue() Yu Kuai
2023-07-06 18:13   ` Bart Van Assche
2023-06-18 16:07 ` [PATCH RFC 5/7] blk-mq: record the number of times fail to get driver tag while sharing tags Yu Kuai
2023-07-06 18:18   ` Bart Van Assche
2023-06-18 16:07 ` [PATCH RFC 6/7] blk-mq: move active request counter to struct tag_sharing Yu Kuai
2023-06-18 16:07 ` [PATCH RFC 7/7] blk-mq: allow shared queue to get more driver tags Yu Kuai
2023-06-20 15:20 ` [PATCH RFC 0/7] blk-mq: improve tag fair sharing Bart Van Assche
2023-07-03 13:29   ` Yu Kuai
2023-07-03 18:08     ` Bart Van Assche
2023-07-05  3:17       ` Yu Kuai
2023-07-06 18:43         ` Bart Van Assche
2023-07-07  1:15           ` Yu Kuai

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox