[PATCH] block/mq-deadline: Optimize request insertion

public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] block/mq-deadline: Optimize request insertion
@ 2024-01-22 23:53 Bart Van Assche
  2024-01-23  2:30 ` Damien Le Moal
  0 siblings, 1 reply; 4+ messages in thread
From: Bart Van Assche @ 2024-01-22 23:53 UTC (permalink / raw)
  To: Jens Axboe
  Cc: linux-block, Damien Le Moal, Christoph Hellwig, Bart Van Assche

Reduce lock contention on dd->lock by calling dd_insert_request() from
inside the dispatch callback instead of from the insert callback. This
patch is inspired by a patch from Jens.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
---
 block/mq-deadline.c | 70 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 58 insertions(+), 12 deletions(-)

diff --git a/block/mq-deadline.c b/block/mq-deadline.c
index 83bc21801226..d11b8604f046 100644
--- a/block/mq-deadline.c
+++ b/block/mq-deadline.c
@@ -89,11 +89,15 @@ struct deadline_data {
 	 */
 	struct {
 		spinlock_t lock;
+		spinlock_t insert_lock;
 		spinlock_t zone_lock;
 	} ____cacheline_aligned_in_smp;
 
 	unsigned long run_state;
 
+	struct list_head at_head;
+	struct list_head at_tail;
+
 	struct dd_per_prio per_prio[DD_PRIO_COUNT];
 
 	/* Data direction of latest dispatched request. */
@@ -120,6 +124,9 @@ static const enum dd_prio ioprio_class_to_prio[] = {
 	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
 };
 
+static void dd_insert_request(struct request_queue *q, struct request *rq,
+			      blk_insert_t flags, struct list_head *free);
+
 static inline struct rb_root *
 deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
 {
@@ -592,6 +599,35 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
 	return NULL;
 }
 
+static void __dd_do_insert(struct request_queue *q, blk_insert_t flags,
+			   struct list_head *list, struct list_head *free)
+{
+	while (!list_empty(list)) {
+		struct request *rq;
+
+		rq = list_first_entry(list, struct request, queuelist);
+		list_del_init(&rq->queuelist);
+		dd_insert_request(q, rq, flags, free);
+	}
+}
+
+static void dd_do_insert(struct request_queue *q, struct list_head *free)
+{
+	struct deadline_data *dd = q->elevator->elevator_data;
+	LIST_HEAD(at_head);
+	LIST_HEAD(at_tail);
+
+	lockdep_assert_held(&dd->lock);
+
+	spin_lock(&dd->insert_lock);
+	list_splice_init(&dd->at_head, &at_head);
+	list_splice_init(&dd->at_tail, &at_tail);
+	spin_unlock(&dd->insert_lock);
+
+	__dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
+	__dd_do_insert(q, 0, &at_tail, free);
+}
+
 /*
  * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
  *
@@ -606,6 +642,7 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	const unsigned long now = jiffies;
 	struct request *rq;
 	enum dd_prio prio;
+	LIST_HEAD(free);
 
 	/*
 	 * If someone else is already dispatching, skip this one. This will
@@ -620,6 +657,11 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 		return NULL;
 
 	spin_lock(&dd->lock);
+        /*
+         * Request insertion happens from inside the dispatch callback instead
+         * of inside the insert callback to minimize contention on dd->lock.
+         */
+	dd_do_insert(hctx->queue, &free);
 	rq = dd_dispatch_prio_aged_requests(dd, now);
 	if (rq)
 		goto unlock;
@@ -638,6 +680,8 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
 	clear_bit(DD_DISPATCHING, &dd->run_state);
 	spin_unlock(&dd->lock);
 
+	blk_mq_free_requests(&free);
+
 	return rq;
 }
 
@@ -727,8 +771,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
 	eq->elevator_data = dd;
 
 	spin_lock_init(&dd->lock);
+	spin_lock_init(&dd->insert_lock);
 	spin_lock_init(&dd->zone_lock);
 
+	INIT_LIST_HEAD(&dd->at_head);
+	INIT_LIST_HEAD(&dd->at_tail);
+
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
 		struct dd_per_prio *per_prio = &dd->per_prio[prio];
 
@@ -899,19 +947,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
 {
 	struct request_queue *q = hctx->queue;
 	struct deadline_data *dd = q->elevator->elevator_data;
-	LIST_HEAD(free);
-
-	spin_lock(&dd->lock);
-	while (!list_empty(list)) {
-		struct request *rq;
 
-		rq = list_first_entry(list, struct request, queuelist);
-		list_del_init(&rq->queuelist);
-		dd_insert_request(q, rq, flags, &free);
-	}
-	spin_unlock(&dd->lock);
-
-	blk_mq_free_requests(&free);
+	spin_lock(&dd->insert_lock);
+	if (flags & BLK_MQ_INSERT_AT_HEAD)
+		list_splice_init(list, &dd->at_head);
+	else
+		list_splice_init(list, &dd->at_tail);
+	spin_unlock(&dd->insert_lock);
 }
 
 /* Callback from inside blk_mq_rq_ctx_init(). */
@@ -990,6 +1032,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
 	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
 	enum dd_prio prio;
 
+	if (!list_empty_careful(&dd->at_head) ||
+	    !list_empty_careful(&dd->at_tail))
+		return true;
+
 	for (prio = 0; prio <= DD_PRIO_MAX; prio++)
 		if (dd_has_work_for_prio(&dd->per_prio[prio]))
 			return true;

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] block/mq-deadline: Optimize request insertion
  2024-01-22 23:53 [PATCH] block/mq-deadline: Optimize request insertion Bart Van Assche
@ 2024-01-23  2:30 ` Damien Le Moal
  2024-01-23 14:52   ` Jens Axboe
  2024-01-23 15:09   ` Bart Van Assche
  0 siblings, 2 replies; 4+ messages in thread
From: Damien Le Moal @ 2024-01-23  2:30 UTC (permalink / raw)
  To: Bart Van Assche, Jens Axboe; +Cc: linux-block, Christoph Hellwig

On 1/23/24 08:53, Bart Van Assche wrote:
> Reduce lock contention on dd->lock by calling dd_insert_request() from
> inside the dispatch callback instead of from the insert callback. This
> patch is inspired by a patch from Jens.

I supposed this is a followup of the performance discussion with Jens. If so,
can you add performance numbers here and so justifying the change ?
Otherwise, it is hard to figure out the effect of the patch and so why you are
making this change.

> 
> Signed-off-by: Bart Van Assche <bvanassche@acm.org>
> ---
>  block/mq-deadline.c | 70 +++++++++++++++++++++++++++++++++++++--------
>  1 file changed, 58 insertions(+), 12 deletions(-)
> 
> diff --git a/block/mq-deadline.c b/block/mq-deadline.c
> index 83bc21801226..d11b8604f046 100644
> --- a/block/mq-deadline.c
> +++ b/block/mq-deadline.c
> @@ -89,11 +89,15 @@ struct deadline_data {
>  	 */
>  	struct {
>  		spinlock_t lock;
> +		spinlock_t insert_lock;
>  		spinlock_t zone_lock;
>  	} ____cacheline_aligned_in_smp;
>  
>  	unsigned long run_state;
>  
> +	struct list_head at_head;
> +	struct list_head at_tail;
> +
>  	struct dd_per_prio per_prio[DD_PRIO_COUNT];
>  
>  	/* Data direction of latest dispatched request. */
> @@ -120,6 +124,9 @@ static const enum dd_prio ioprio_class_to_prio[] = {
>  	[IOPRIO_CLASS_IDLE]	= DD_IDLE_PRIO,
>  };
>  
> +static void dd_insert_request(struct request_queue *q, struct request *rq,
> +			      blk_insert_t flags, struct list_head *free);
> +
>  static inline struct rb_root *
>  deadline_rb_root(struct dd_per_prio *per_prio, struct request *rq)
>  {
> @@ -592,6 +599,35 @@ static struct request *dd_dispatch_prio_aged_requests(struct deadline_data *dd,
>  	return NULL;
>  }
>  
> +static void __dd_do_insert(struct request_queue *q, blk_insert_t flags,
> +			   struct list_head *list, struct list_head *free)
> +{
> +	while (!list_empty(list)) {
> +		struct request *rq;
> +
> +		rq = list_first_entry(list, struct request, queuelist);
> +		list_del_init(&rq->queuelist);
> +		dd_insert_request(q, rq, flags, free);
> +	}
> +}
> +
> +static void dd_do_insert(struct request_queue *q, struct list_head *free)
> +{
> +	struct deadline_data *dd = q->elevator->elevator_data;
> +	LIST_HEAD(at_head);
> +	LIST_HEAD(at_tail);
> +
> +	lockdep_assert_held(&dd->lock);
> +
> +	spin_lock(&dd->insert_lock);
> +	list_splice_init(&dd->at_head, &at_head);
> +	list_splice_init(&dd->at_tail, &at_tail);
> +	spin_unlock(&dd->insert_lock);
> +
> +	__dd_do_insert(q, BLK_MQ_INSERT_AT_HEAD, &at_head, free);
> +	__dd_do_insert(q, 0, &at_tail, free);
> +}
> +
>  /*
>   * Called from blk_mq_run_hw_queue() -> __blk_mq_sched_dispatch_requests().
>   *
> @@ -606,6 +642,7 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>  	const unsigned long now = jiffies;
>  	struct request *rq;
>  	enum dd_prio prio;
> +	LIST_HEAD(free);
>  
>  	/*
>  	 * If someone else is already dispatching, skip this one. This will
> @@ -620,6 +657,11 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>  		return NULL;
>  
>  	spin_lock(&dd->lock);
> +        /*
> +         * Request insertion happens from inside the dispatch callback instead
> +         * of inside the insert callback to minimize contention on dd->lock.
> +         */
> +	dd_do_insert(hctx->queue, &free);
>  	rq = dd_dispatch_prio_aged_requests(dd, now);
>  	if (rq)
>  		goto unlock;
> @@ -638,6 +680,8 @@ static struct request *dd_dispatch_request(struct blk_mq_hw_ctx *hctx)
>  	clear_bit(DD_DISPATCHING, &dd->run_state);
>  	spin_unlock(&dd->lock);
>  
> +	blk_mq_free_requests(&free);
> +
>  	return rq;
>  }
>  
> @@ -727,8 +771,12 @@ static int dd_init_sched(struct request_queue *q, struct elevator_type *e)
>  	eq->elevator_data = dd;
>  
>  	spin_lock_init(&dd->lock);
> +	spin_lock_init(&dd->insert_lock);
>  	spin_lock_init(&dd->zone_lock);
>  
> +	INIT_LIST_HEAD(&dd->at_head);
> +	INIT_LIST_HEAD(&dd->at_tail);
> +
>  	for (prio = 0; prio <= DD_PRIO_MAX; prio++) {
>  		struct dd_per_prio *per_prio = &dd->per_prio[prio];
>  
> @@ -899,19 +947,13 @@ static void dd_insert_requests(struct blk_mq_hw_ctx *hctx,
>  {
>  	struct request_queue *q = hctx->queue;
>  	struct deadline_data *dd = q->elevator->elevator_data;
> -	LIST_HEAD(free);
> -
> -	spin_lock(&dd->lock);
> -	while (!list_empty(list)) {
> -		struct request *rq;
>  
> -		rq = list_first_entry(list, struct request, queuelist);
> -		list_del_init(&rq->queuelist);
> -		dd_insert_request(q, rq, flags, &free);
> -	}
> -	spin_unlock(&dd->lock);
> -
> -	blk_mq_free_requests(&free);
> +	spin_lock(&dd->insert_lock);
> +	if (flags & BLK_MQ_INSERT_AT_HEAD)
> +		list_splice_init(list, &dd->at_head);
> +	else
> +		list_splice_init(list, &dd->at_tail);
> +	spin_unlock(&dd->insert_lock);
>  }
>  
>  /* Callback from inside blk_mq_rq_ctx_init(). */
> @@ -990,6 +1032,10 @@ static bool dd_has_work(struct blk_mq_hw_ctx *hctx)
>  	struct deadline_data *dd = hctx->queue->elevator->elevator_data;
>  	enum dd_prio prio;
>  
> +	if (!list_empty_careful(&dd->at_head) ||
> +	    !list_empty_careful(&dd->at_tail))
> +		return true;
> +
>  	for (prio = 0; prio <= DD_PRIO_MAX; prio++)
>  		if (dd_has_work_for_prio(&dd->per_prio[prio]))
>  			return true;

-- 
Damien Le Moal
Western Digital Research


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] block/mq-deadline: Optimize request insertion
  2024-01-23  2:30 ` Damien Le Moal
@ 2024-01-23 14:52   ` Jens Axboe
  2024-01-23 15:09   ` Bart Van Assche
  1 sibling, 0 replies; 4+ messages in thread
From: Jens Axboe @ 2024-01-23 14:52 UTC (permalink / raw)
  To: Damien Le Moal, Bart Van Assche; +Cc: linux-block, Christoph Hellwig

On 1/22/24 7:30 PM, Damien Le Moal wrote:
> On 1/23/24 08:53, Bart Van Assche wrote:
>> Reduce lock contention on dd->lock by calling dd_insert_request() from
>> inside the dispatch callback instead of from the insert callback. This
>> patch is inspired by a patch from Jens.
> 
> I supposed this is a followup of the performance discussion with Jens. If so,
> can you add performance numbers here and so justifying the change ?
> Otherwise, it is hard to figure out the effect of the patch and so why you are
> making this change.

Numbers are in here [1], I will fold them into the commit message. It's worth,
as is mentioned in my commit message, that this also relies on the previous
patches to get there. It just leaves the insertion side contention as the
dominant one until this patch is applied.

Before:

Device		IOPS	sys	contention	diff
====================================================
null_blk	879K	89%	93.6%
nvme0n1		901K	86%	94.5%

and after this and the previous two patches:

Device		IOPS	sys	contention	diff
====================================================
null_blk	2867K	11.1%	~6.0%		+226%
nvme0n1		3162K	 9.9%	~5.0%		+250%

[1] https://git.kernel.dk/cgit/linux/commit/?h=block-deadline&id=678abafc92bae49e6f4a92cea05dcd5f39928054

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 4+ messages in thread

* Re: [PATCH] block/mq-deadline: Optimize request insertion
  2024-01-23  2:30 ` Damien Le Moal
  2024-01-23 14:52   ` Jens Axboe
@ 2024-01-23 15:09   ` Bart Van Assche
  1 sibling, 0 replies; 4+ messages in thread
From: Bart Van Assche @ 2024-01-23 15:09 UTC (permalink / raw)
  To: Damien Le Moal, Jens Axboe; +Cc: linux-block, Christoph Hellwig

On 1/22/24 18:30, Damien Le Moal wrote:
> On 1/23/24 08:53, Bart Van Assche wrote:
>> Reduce lock contention on dd->lock by calling dd_insert_request() from
>> inside the dispatch callback instead of from the insert callback. This
>> patch is inspired by a patch from Jens.
> 
> I supposed this is a followup of the performance discussion with Jens. If so,
> can you add performance numbers here and so justifying the change ?
> Otherwise, it is hard to figure out the effect of the patch and so why you are
> making this change.

Hi Damien,

If I replace Jens' patch 3/4 with this patch I achieve 20% higher IOPS
for 1..4 CPU cores (null_blk + fio in an x86 VM).

Bart.


^ permalink raw reply	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2024-01-23 15:09 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-01-22 23:53 [PATCH] block/mq-deadline: Optimize request insertion Bart Van Assche
2024-01-23  2:30 ` Damien Le Moal
2024-01-23 14:52   ` Jens Axboe
2024-01-23 15:09   ` Bart Van Assche

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox