public inbox for linux-block@vger.kernel.org
 help / color / mirror / Atom feed
* [for-4.16 PATCH v5 0/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback
@ 2018-01-17  4:33 Mike Snitzer
  2018-01-17  4:33 ` [for-4.16 PATCH v5 1/3] blk-mq: factor out a few helpers from __blk_mq_try_issue_directly Mike Snitzer
                   ` (3 more replies)
  0 siblings, 4 replies; 7+ messages in thread
From: Mike Snitzer @ 2018-01-17  4:33 UTC (permalink / raw)
  To: axboe; +Cc: Ming Lei, hch, dm-devel, linux-block

Hi Jens,

I spent a decent amount of time going over this and am happy with it.
Hopefully you'll be too.

Thanks,
Mike

Mike Snitzer (2):
  blk-mq: factor out a few helpers from __blk_mq_try_issue_directly
  blk-mq-sched: remove unused 'can_block' arg from blk_mq_sched_insert_request

Ming Lei (1):
  blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback

 block/blk-core.c     |   3 +-
 block/blk-exec.c     |   2 +-
 block/blk-mq-sched.c |   2 +-
 block/blk-mq-sched.h |   2 +-
 block/blk-mq.c       | 109 ++++++++++++++++++++++++++++++++++++---------------
 block/blk-mq.h       |   3 ++
 drivers/md/dm-rq.c   |  19 +++++++--
 7 files changed, 101 insertions(+), 39 deletions(-)

-- 
2.15.0

^ permalink raw reply	[flat|nested] 7+ messages in thread

* [for-4.16 PATCH v5 1/3] blk-mq: factor out a few helpers from __blk_mq_try_issue_directly
  2018-01-17  4:33 [for-4.16 PATCH v5 0/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
@ 2018-01-17  4:33 ` Mike Snitzer
  2018-01-17  4:33 ` [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging performance Mike Snitzer
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 7+ messages in thread
From: Mike Snitzer @ 2018-01-17  4:33 UTC (permalink / raw)
  To: axboe; +Cc: Ming Lei, hch, dm-devel, linux-block

No functional change.  Just makes code flow more logically.

In following commit, __blk_mq_try_issue_directly() will be used to
return the dispatch result (blk_status_t) to DM.  DM needs this
information to improve IO merging.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 block/blk-mq.c | 79 ++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 52 insertions(+), 27 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index c8f62e6be6b6..c117c2baf2c9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1694,9 +1694,9 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
 	return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
 }
 
-static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
-					struct request *rq,
-					blk_qc_t *cookie)
+static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
+					    struct request *rq,
+					    blk_qc_t *cookie)
 {
 	struct request_queue *q = rq->q;
 	struct blk_mq_queue_data bd = {
@@ -1705,6 +1705,43 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 	};
 	blk_qc_t new_cookie;
 	blk_status_t ret;
+
+	new_cookie = request_to_qc_t(hctx, rq);
+
+	/*
+	 * For OK queue, we are done. For error, caller may kill it.
+	 * Any other error (busy), just add it to our list as we
+	 * previously would have done.
+	 */
+	ret = q->mq_ops->queue_rq(hctx, &bd);
+	switch (ret) {
+	case BLK_STS_OK:
+		*cookie = new_cookie;
+		break;
+	case BLK_STS_RESOURCE:
+		__blk_mq_requeue_request(rq);
+		break;
+	default:
+		*cookie = BLK_QC_T_NONE;
+		break;
+	}
+
+	return ret;
+}
+
+static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
+					struct request *rq,
+					bool run_queue)
+{
+	blk_mq_sched_insert_request(rq, false, run_queue, false,
+					hctx->flags & BLK_MQ_F_BLOCKING);
+}
+
+static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+						struct request *rq,
+						blk_qc_t *cookie)
+{
+	struct request_queue *q = rq->q;
 	bool run_queue = true;
 
 	/* RCU or SRCU read lock is needed before checking quiesced flag */
@@ -1724,41 +1761,29 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		goto insert;
 	}
 
-	new_cookie = request_to_qc_t(hctx, rq);
-
-	/*
-	 * For OK queue, we are done. For error, kill it. Any other
-	 * error (busy), just add it to our list as we previously
-	 * would have done
-	 */
-	ret = q->mq_ops->queue_rq(hctx, &bd);
-	switch (ret) {
-	case BLK_STS_OK:
-		*cookie = new_cookie;
-		return;
-	case BLK_STS_RESOURCE:
-		__blk_mq_requeue_request(rq);
-		goto insert;
-	default:
-		*cookie = BLK_QC_T_NONE;
-		blk_mq_end_request(rq, ret);
-		return;
-	}
-
+	return __blk_mq_issue_directly(hctx, rq, cookie);
 insert:
-	blk_mq_sched_insert_request(rq, false, run_queue, false,
-					hctx->flags & BLK_MQ_F_BLOCKING);
+	__blk_mq_fallback_to_insert(hctx, rq, run_queue);
+
+	return BLK_STS_OK;
 }
 
 static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, blk_qc_t *cookie)
 {
+	blk_status_t ret;
 	int srcu_idx;
 
 	might_sleep_if(hctx->flags & BLK_MQ_F_BLOCKING);
 
 	hctx_lock(hctx, &srcu_idx);
-	__blk_mq_try_issue_directly(hctx, rq, cookie);
+
+	ret = __blk_mq_try_issue_directly(hctx, rq, cookie);
+	if (ret == BLK_STS_RESOURCE)
+		__blk_mq_fallback_to_insert(hctx, rq, true);
+	else if (ret != BLK_STS_OK)
+		blk_mq_end_request(rq, ret);
+
 	hctx_unlock(hctx, srcu_idx);
 }
 
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging performance
  2018-01-17  4:33 [for-4.16 PATCH v5 0/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
  2018-01-17  4:33 ` [for-4.16 PATCH v5 1/3] blk-mq: factor out a few helpers from __blk_mq_try_issue_directly Mike Snitzer
@ 2018-01-17  4:33 ` Mike Snitzer
  2018-01-17  4:39   ` Mike Snitzer
  2018-01-17  4:33 ` [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
  2018-01-17  4:33 ` [for-4.16 PATCH v5 3/3] blk-mq-sched: remove unused 'can_block' arg from blk_mq_sched_insert_request Mike Snitzer
  3 siblings, 1 reply; 7+ messages in thread
From: Mike Snitzer @ 2018-01-17  4:33 UTC (permalink / raw)
  To: axboe; +Cc: Ming Lei, hch, dm-devel, linux-block

From: Ming Lei <ming.lei@redhat.com>

blk_insert_cloned_request() is called in the fast path of a dm-rq driver
(e.g. blk-mq request-based DM mpath).  blk_insert_cloned_request() uses
blk_mq_request_bypass_insert() to directly append the request to the
blk-mq hctx->dispatch_list of the underlying queue.

1) This way isn't efficient enough because the hctx spinlock is always
used.

2) With blk_insert_cloned_request(), we completely bypass underlying
queue's elevator and depend on the upper-level dm-rq driver's elevator
to schedule IO.  But dm-rq currently can't get the underlying queue's
dispatch feedback at all.  Without knowing whether a request was issued
or not (e.g. due to underlying queue being busy) the dm-rq elevator will
not be able to provide effective IO merging (as a side-effect of dm-rq
currently blindly destaging a request from its elevator only to requeue
it after a delay, which kills any opportunity for merging).  This
obviously causes very bad sequential IO performance.

Fix this by updating blk_insert_cloned_request() to use
blk_mq_request_direct_issue().  blk_mq_request_direct_issue() allows a
request to be issued directly to the underlying queue and returns the
dispatch feedback (blk_status_t).  If blk_mq_request_direct_issue()
returns BLK_SYS_RESOURCE the dm-rq driver will now use DM_MAPIO_REQUEUE
to _not_ destage the request.  Whereby preserving the opportunity to
merge IO.

With this, request-based DM's blk-mq sequential IO performance is vastly
improved (as much as 3X in mpath/virtio-scsi testing).

Signed-off-by: Ming Lei <ming.lei@redhat.com>
[based _heavily_ on Ming Lei's initial solution, but blk-mq.c changes
were refactored to make them less fragile and easier to read/review]
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 block/blk-core.c   |  3 +--
 block/blk-mq.c     | 42 +++++++++++++++++++++++++++++++++---------
 block/blk-mq.h     |  3 +++
 drivers/md/dm-rq.c | 19 ++++++++++++++++---
 4 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 7ba607527487..55f338020254 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2500,8 +2500,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 		 * bypass a potential scheduler on the bottom device for
 		 * insert.
 		 */
-		blk_mq_request_bypass_insert(rq, true);
-		return BLK_STS_OK;
+		return blk_mq_request_direct_issue(rq);
 	}
 
 	spin_lock_irqsave(q->queue_lock, flags);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index f30e34a22a6c..81ee3f9124dc 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1706,7 +1706,8 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	blk_qc_t new_cookie;
 	blk_status_t ret;
 
-	new_cookie = request_to_qc_t(hctx, rq);
+	if (cookie)
+		new_cookie = request_to_qc_t(hctx, rq);
 
 	/*
 	 * For OK queue, we are done. For error, caller may kill it.
@@ -1716,13 +1717,15 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	ret = q->mq_ops->queue_rq(hctx, &bd);
 	switch (ret) {
 	case BLK_STS_OK:
-		*cookie = new_cookie;
+		if (cookie)
+			*cookie = new_cookie;
 		break;
 	case BLK_STS_RESOURCE:
 		__blk_mq_requeue_request(rq);
 		break;
 	default:
-		*cookie = BLK_QC_T_NONE;
+		if (cookie)
+			*cookie = BLK_QC_T_NONE;
 		break;
 	}
 
@@ -1731,15 +1734,20 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
 					struct request *rq,
-					bool run_queue)
+					bool run_queue, bool bypass_insert)
 {
+	if (bypass_insert) {
+		blk_mq_request_bypass_insert(rq, run_queue);
+		return;
+	}
 	blk_mq_sched_insert_request(rq, false, run_queue, false,
 					hctx->flags & BLK_MQ_F_BLOCKING);
 }
 
 static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 						struct request *rq,
-						blk_qc_t *cookie)
+						blk_qc_t *cookie,
+						bool bypass_insert)
 {
 	struct request_queue *q = rq->q;
 	bool run_queue = true;
@@ -1750,7 +1758,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		goto insert;
 	}
 
-	if (q->elevator)
+	if (q->elevator && !bypass_insert)
 		goto insert;
 
 	if (!blk_mq_get_driver_tag(rq, NULL, false))
@@ -1763,7 +1771,9 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 	return __blk_mq_issue_directly(hctx, rq, cookie);
 insert:
-	__blk_mq_fallback_to_insert(hctx. rq, run_queue);
+	__blk_mq_fallback_to_insert(hctx. rq, run_queue, bypass_insert);
+	if (bypass_insert)
+		return BLK_STS_RESOURCE;
 
 	return BLK_STS_OK;
 }
@@ -1778,15 +1788,29 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 	hctx_lock(hctx, &srcu_idx);
 
-	ret = __blk_mq_try_issue_directly(hctx, rq, cookie);
+	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
 	if (ret == BLK_STS_RESOURCE)
-		__blk_mq_fallback_to_insert(hctx. rq, true);
+		__blk_mq_fallback_to_insert(hctx. rq, true, false);
 	else if (ret != BLK_STS_OK)
 		blk_mq_end_request(rq, ret);
 
 	hctx_unlock(hctx, srcu_idx);
 }
 
+blk_status_t blk_mq_request_direct_issue(struct request *rq)
+{
+	blk_status_t ret;
+	int srcu_idx;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
+
+	hctx_lock(hctx, &srcu_idx);
+	ret = __blk_mq_try_issue_directly(hctx, rq, NULL, true);
+	hctx_unlock(hctx, srcu_idx);
+
+	return ret;
+}
+
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 8591a54d989b..e3ebc93646ca 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -74,6 +74,9 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 				struct list_head *list);
 
+/* Used by blk_insert_cloned_request() to issue request directly */
+blk_status_t blk_mq_request_direct_issue(struct request *rq);
+
 /*
  * CPU -> queue mappings
  */
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index c28357f5cb0e..b7d175e94a02 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -395,7 +395,7 @@ static void end_clone_request(struct request *clone, blk_status_t error)
 	dm_complete_request(tio->orig, error);
 }
 
-static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
+static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
 {
 	blk_status_t r;
 
@@ -404,9 +404,10 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
 
 	clone->start_time = jiffies;
 	r = blk_insert_cloned_request(clone->q, clone);
-	if (r)
+	if (r != BLK_STS_OK && r != BLK_STS_RESOURCE)
 		/* must complete clone in terms of original request */
 		dm_complete_request(rq, r);
+	return r;
 }
 
 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
@@ -476,8 +477,10 @@ static int map_request(struct dm_rq_target_io *tio)
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
 	struct request *clone = NULL;
+	blk_status_t ret;
 
 	r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
+check_again:
 	switch (r) {
 	case DM_MAPIO_SUBMITTED:
 		/* The target has taken the I/O to submit by itself later */
@@ -492,7 +495,17 @@ static int map_request(struct dm_rq_target_io *tio)
 		/* The target has remapped the I/O so dispatch it */
 		trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
 				     blk_rq_pos(rq));
-		dm_dispatch_clone_request(clone, rq);
+		ret = dm_dispatch_clone_request(clone, rq);
+		if (ret == BLK_STS_RESOURCE) {
+			blk_rq_unprep_clone(clone);
+			tio->ti->type->release_clone_rq(clone);
+			tio->clone = NULL;
+			if (!rq->q->mq_ops)
+				r = DM_MAPIO_DELAY_REQUEUE;
+			else
+				r = DM_MAPIO_REQUEUE;
+			goto check_again;
+		}
 		break;
 	case DM_MAPIO_REQUEUE:
 		/* The target wants to requeue the I/O */
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback
  2018-01-17  4:33 [for-4.16 PATCH v5 0/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
  2018-01-17  4:33 ` [for-4.16 PATCH v5 1/3] blk-mq: factor out a few helpers from __blk_mq_try_issue_directly Mike Snitzer
  2018-01-17  4:33 ` [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging performance Mike Snitzer
@ 2018-01-17  4:33 ` Mike Snitzer
  2018-01-17 15:34   ` Jens Axboe
  2018-01-17  4:33 ` [for-4.16 PATCH v5 3/3] blk-mq-sched: remove unused 'can_block' arg from blk_mq_sched_insert_request Mike Snitzer
  3 siblings, 1 reply; 7+ messages in thread
From: Mike Snitzer @ 2018-01-17  4:33 UTC (permalink / raw)
  To: axboe; +Cc: Ming Lei, hch, dm-devel, linux-block

From: Ming Lei <ming.lei@redhat.com>

blk_insert_cloned_request() is called in the fast path of a dm-rq driver
(e.g. blk-mq request-based DM mpath).  blk_insert_cloned_request() uses
blk_mq_request_bypass_insert() to directly append the request to the
blk-mq hctx->dispatch_list of the underlying queue.

1) This way isn't efficient enough because the hctx spinlock is always
used.

2) With blk_insert_cloned_request(), we completely bypass underlying
queue's elevator and depend on the upper-level dm-rq driver's elevator
to schedule IO.  But dm-rq currently can't get the underlying queue's
dispatch feedback at all.  Without knowing whether a request was issued
or not (e.g. due to underlying queue being busy) the dm-rq elevator will
not be able to provide effective IO merging (as a side-effect of dm-rq
currently blindly destaging a request from its elevator only to requeue
it after a delay, which kills any opportunity for merging).  This
obviously causes very bad sequential IO performance.

Fix this by updating blk_insert_cloned_request() to use
blk_mq_request_direct_issue().  blk_mq_request_direct_issue() allows a
request to be issued directly to the underlying queue and returns the
dispatch feedback (blk_status_t).  If blk_mq_request_direct_issue()
returns BLK_SYS_RESOURCE the dm-rq driver will now use DM_MAPIO_REQUEUE
to _not_ destage the request.  Whereby preserving the opportunity to
merge IO.

With this, request-based DM's blk-mq sequential IO performance is vastly
improved (as much as 3X in mpath/virtio-scsi testing).

Signed-off-by: Ming Lei <ming.lei@redhat.com>
[blk-mq.c changes heavily influenced by Ming Lei's initial solution, but
they were refactored to make them less fragile and easier to read/review]
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 block/blk-core.c   |  3 +--
 block/blk-mq.c     | 42 +++++++++++++++++++++++++++++++++---------
 block/blk-mq.h     |  3 +++
 drivers/md/dm-rq.c | 19 ++++++++++++++++---
 4 files changed, 53 insertions(+), 14 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 7ba607527487..55f338020254 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -2500,8 +2500,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
 		 * bypass a potential scheduler on the bottom device for
 		 * insert.
 		 */
-		blk_mq_request_bypass_insert(rq, true);
-		return BLK_STS_OK;
+		return blk_mq_request_direct_issue(rq);
 	}
 
 	spin_lock_irqsave(q->queue_lock, flags);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c117c2baf2c9..0b64f7210a89 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1706,7 +1706,8 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	blk_qc_t new_cookie;
 	blk_status_t ret;
 
-	new_cookie = request_to_qc_t(hctx, rq);
+	if (cookie)
+		new_cookie = request_to_qc_t(hctx, rq);
 
 	/*
 	 * For OK queue, we are done. For error, caller may kill it.
@@ -1716,13 +1717,15 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	ret = q->mq_ops->queue_rq(hctx, &bd);
 	switch (ret) {
 	case BLK_STS_OK:
-		*cookie = new_cookie;
+		if (cookie)
+			*cookie = new_cookie;
 		break;
 	case BLK_STS_RESOURCE:
 		__blk_mq_requeue_request(rq);
 		break;
 	default:
-		*cookie = BLK_QC_T_NONE;
+		if (cookie)
+			*cookie = BLK_QC_T_NONE;
 		break;
 	}
 
@@ -1731,15 +1734,20 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
 					struct request *rq,
-					bool run_queue)
+					bool run_queue, bool bypass_insert)
 {
+	if (bypass_insert) {
+		blk_mq_request_bypass_insert(rq, run_queue);
+		return;
+	}
 	blk_mq_sched_insert_request(rq, false, run_queue, false,
 					hctx->flags & BLK_MQ_F_BLOCKING);
 }
 
 static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 						struct request *rq,
-						blk_qc_t *cookie)
+						blk_qc_t *cookie,
+						bool bypass_insert)
 {
 	struct request_queue *q = rq->q;
 	bool run_queue = true;
@@ -1750,7 +1758,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 		goto insert;
 	}
 
-	if (q->elevator)
+	if (q->elevator && !bypass_insert)
 		goto insert;
 
 	if (!blk_mq_get_driver_tag(rq, NULL, false))
@@ -1763,7 +1771,9 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 	return __blk_mq_issue_directly(hctx, rq, cookie);
 insert:
-	__blk_mq_fallback_to_insert(hctx, rq, run_queue);
+	__blk_mq_fallback_to_insert(hctx, rq, run_queue, bypass_insert);
+	if (bypass_insert)
+		return BLK_STS_RESOURCE;
 
 	return BLK_STS_OK;
 }
@@ -1778,15 +1788,29 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 	hctx_lock(hctx, &srcu_idx);
 
-	ret = __blk_mq_try_issue_directly(hctx, rq, cookie);
+	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
 	if (ret == BLK_STS_RESOURCE)
-		__blk_mq_fallback_to_insert(hctx, rq, true);
+		__blk_mq_fallback_to_insert(hctx, rq, true, false);
 	else if (ret != BLK_STS_OK)
 		blk_mq_end_request(rq, ret);
 
 	hctx_unlock(hctx, srcu_idx);
 }
 
+blk_status_t blk_mq_request_direct_issue(struct request *rq)
+{
+	blk_status_t ret;
+	int srcu_idx;
+	struct blk_mq_ctx *ctx = rq->mq_ctx;
+	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
+
+	hctx_lock(hctx, &srcu_idx);
+	ret = __blk_mq_try_issue_directly(hctx, rq, NULL, true);
+	hctx_unlock(hctx, srcu_idx);
+
+	return ret;
+}
+
 static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 {
 	const int is_sync = op_is_sync(bio->bi_opf);
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 8591a54d989b..e3ebc93646ca 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -74,6 +74,9 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
 void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
 				struct list_head *list);
 
+/* Used by blk_insert_cloned_request() to issue request directly */
+blk_status_t blk_mq_request_direct_issue(struct request *rq);
+
 /*
  * CPU -> queue mappings
  */
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index c28357f5cb0e..b7d175e94a02 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -395,7 +395,7 @@ static void end_clone_request(struct request *clone, blk_status_t error)
 	dm_complete_request(tio->orig, error);
 }
 
-static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
+static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
 {
 	blk_status_t r;
 
@@ -404,9 +404,10 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
 
 	clone->start_time = jiffies;
 	r = blk_insert_cloned_request(clone->q, clone);
-	if (r)
+	if (r != BLK_STS_OK && r != BLK_STS_RESOURCE)
 		/* must complete clone in terms of original request */
 		dm_complete_request(rq, r);
+	return r;
 }
 
 static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
@@ -476,8 +477,10 @@ static int map_request(struct dm_rq_target_io *tio)
 	struct mapped_device *md = tio->md;
 	struct request *rq = tio->orig;
 	struct request *clone = NULL;
+	blk_status_t ret;
 
 	r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
+check_again:
 	switch (r) {
 	case DM_MAPIO_SUBMITTED:
 		/* The target has taken the I/O to submit by itself later */
@@ -492,7 +495,17 @@ static int map_request(struct dm_rq_target_io *tio)
 		/* The target has remapped the I/O so dispatch it */
 		trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
 				     blk_rq_pos(rq));
-		dm_dispatch_clone_request(clone, rq);
+		ret = dm_dispatch_clone_request(clone, rq);
+		if (ret == BLK_STS_RESOURCE) {
+			blk_rq_unprep_clone(clone);
+			tio->ti->type->release_clone_rq(clone);
+			tio->clone = NULL;
+			if (!rq->q->mq_ops)
+				r = DM_MAPIO_DELAY_REQUEUE;
+			else
+				r = DM_MAPIO_REQUEUE;
+			goto check_again;
+		}
 		break;
 	case DM_MAPIO_REQUEUE:
 		/* The target wants to requeue the I/O */
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* [for-4.16 PATCH v5 3/3] blk-mq-sched: remove unused 'can_block' arg from blk_mq_sched_insert_request
  2018-01-17  4:33 [for-4.16 PATCH v5 0/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
                   ` (2 preceding siblings ...)
  2018-01-17  4:33 ` [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
@ 2018-01-17  4:33 ` Mike Snitzer
  3 siblings, 0 replies; 7+ messages in thread
From: Mike Snitzer @ 2018-01-17  4:33 UTC (permalink / raw)
  To: axboe; +Cc: Ming Lei, hch, dm-devel, linux-block

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
---
 block/blk-exec.c     |  2 +-
 block/blk-mq-sched.c |  2 +-
 block/blk-mq-sched.h |  2 +-
 block/blk-mq.c       | 16 +++++++---------
 4 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/block/blk-exec.c b/block/blk-exec.c
index 5c0f3dc446dc..f7b292f12449 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -61,7 +61,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	 * be reused after dying flag is set
 	 */
 	if (q->mq_ops) {
-		blk_mq_sched_insert_request(rq, at_head, true, false, false);
+		blk_mq_sched_insert_request(rq, at_head, true, false);
 		return;
 	}
 
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index 2ff7cf0cbf73..55c0a745b427 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -427,7 +427,7 @@ void blk_mq_sched_restart(struct blk_mq_hw_ctx *const hctx)
 }
 
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
-				 bool run_queue, bool async, bool can_block)
+				 bool run_queue, bool async)
 {
 	struct request_queue *q = rq->q;
 	struct elevator_queue *e = q->elevator;
diff --git a/block/blk-mq-sched.h b/block/blk-mq-sched.h
index ba1d1418a96d..1e9c9018ace1 100644
--- a/block/blk-mq-sched.h
+++ b/block/blk-mq-sched.h
@@ -18,7 +18,7 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq);
 void blk_mq_sched_restart(struct blk_mq_hw_ctx *hctx);
 
 void blk_mq_sched_insert_request(struct request *rq, bool at_head,
-				 bool run_queue, bool async, bool can_block);
+				 bool run_queue, bool async);
 void blk_mq_sched_insert_requests(struct request_queue *q,
 				  struct blk_mq_ctx *ctx,
 				  struct list_head *list, bool run_queue_async);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0b64f7210a89..06ef6a7cc29c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -745,13 +745,13 @@ static void blk_mq_requeue_work(struct work_struct *work)
 
 		rq->rq_flags &= ~RQF_SOFTBARRIER;
 		list_del_init(&rq->queuelist);
-		blk_mq_sched_insert_request(rq, true, false, false, true);
+		blk_mq_sched_insert_request(rq, true, false, false);
 	}
 
 	while (!list_empty(&rq_list)) {
 		rq = list_entry(rq_list.next, struct request, queuelist);
 		list_del_init(&rq->queuelist);
-		blk_mq_sched_insert_request(rq, false, false, false, true);
+		blk_mq_sched_insert_request(rq, false, false, false);
 	}
 
 	blk_mq_run_hw_queues(q, false);
@@ -1732,16 +1732,14 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
 	return ret;
 }
 
-static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
-					struct request *rq,
+static void __blk_mq_fallback_to_insert(struct request *rq,
 					bool run_queue, bool bypass_insert)
 {
 	if (bypass_insert) {
 		blk_mq_request_bypass_insert(rq, run_queue);
 		return;
 	}
-	blk_mq_sched_insert_request(rq, false, run_queue, false,
-					hctx->flags & BLK_MQ_F_BLOCKING);
+	blk_mq_sched_insert_request(rq, false, run_queue, false);
 }
 
 static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
@@ -1771,7 +1769,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 	return __blk_mq_issue_directly(hctx, rq, cookie);
 insert:
-	__blk_mq_fallback_to_insert(hctx, rq, run_queue, bypass_insert);
+	__blk_mq_fallback_to_insert(rq, run_queue, bypass_insert);
 	if (bypass_insert)
 		return BLK_STS_RESOURCE;
 
@@ -1790,7 +1788,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
 
 	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
 	if (ret == BLK_STS_RESOURCE)
-		__blk_mq_fallback_to_insert(hctx, rq, true, false);
+		__blk_mq_fallback_to_insert(rq, true, false);
 	else if (ret != BLK_STS_OK)
 		blk_mq_end_request(rq, ret);
 
@@ -1919,7 +1917,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	} else if (q->elevator) {
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_sched_insert_request(rq, false, true, true, true);
+		blk_mq_sched_insert_request(rq, false, true, true);
 	} else {
 		blk_mq_put_ctx(data.ctx);
 		blk_mq_bio_to_request(rq, bio);
-- 
2.15.0

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging performance
  2018-01-17  4:33 ` [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging performance Mike Snitzer
@ 2018-01-17  4:39   ` Mike Snitzer
  0 siblings, 0 replies; 7+ messages in thread
From: Mike Snitzer @ 2018-01-17  4:39 UTC (permalink / raw)
  To: axboe; +Cc: linux-block, Ming Lei, dm-devel, hch

This one is redundant and should be dropped.  I ran git-format-patch
twice after a quick rebase to tweak the subject and header.

Sorry for the confusion.

On Tue, Jan 16 2018 at 11:33pm -0500,
Mike Snitzer <snitzer@redhat.com> wrote:

> From: Ming Lei <ming.lei@redhat.com>
> 
> blk_insert_cloned_request() is called in the fast path of a dm-rq driver
> (e.g. blk-mq request-based DM mpath).  blk_insert_cloned_request() uses
> blk_mq_request_bypass_insert() to directly append the request to the
> blk-mq hctx->dispatch_list of the underlying queue.
> 
> 1) This way isn't efficient enough because the hctx spinlock is always
> used.
> 
> 2) With blk_insert_cloned_request(), we completely bypass underlying
> queue's elevator and depend on the upper-level dm-rq driver's elevator
> to schedule IO.  But dm-rq currently can't get the underlying queue's
> dispatch feedback at all.  Without knowing whether a request was issued
> or not (e.g. due to underlying queue being busy) the dm-rq elevator will
> not be able to provide effective IO merging (as a side-effect of dm-rq
> currently blindly destaging a request from its elevator only to requeue
> it after a delay, which kills any opportunity for merging).  This
> obviously causes very bad sequential IO performance.
> 
> Fix this by updating blk_insert_cloned_request() to use
> blk_mq_request_direct_issue().  blk_mq_request_direct_issue() allows a
> request to be issued directly to the underlying queue and returns the
> dispatch feedback (blk_status_t).  If blk_mq_request_direct_issue()
> returns BLK_SYS_RESOURCE the dm-rq driver will now use DM_MAPIO_REQUEUE
> to _not_ destage the request.  Whereby preserving the opportunity to
> merge IO.
> 
> With this, request-based DM's blk-mq sequential IO performance is vastly
> improved (as much as 3X in mpath/virtio-scsi testing).
> 
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> [based _heavily_ on Ming Lei's initial solution, but blk-mq.c changes
> were refactored to make them less fragile and easier to read/review]
> Signed-off-by: Mike Snitzer <snitzer@redhat.com>
> ---
>  block/blk-core.c   |  3 +--
>  block/blk-mq.c     | 42 +++++++++++++++++++++++++++++++++---------
>  block/blk-mq.h     |  3 +++
>  drivers/md/dm-rq.c | 19 ++++++++++++++++---
>  4 files changed, 53 insertions(+), 14 deletions(-)
> 
> diff --git a/block/blk-core.c b/block/blk-core.c
> index 7ba607527487..55f338020254 100644
> --- a/block/blk-core.c
> +++ b/block/blk-core.c
> @@ -2500,8 +2500,7 @@ blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *
>  		 * bypass a potential scheduler on the bottom device for
>  		 * insert.
>  		 */
> -		blk_mq_request_bypass_insert(rq, true);
> -		return BLK_STS_OK;
> +		return blk_mq_request_direct_issue(rq);
>  	}
>  
>  	spin_lock_irqsave(q->queue_lock, flags);
> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index f30e34a22a6c..81ee3f9124dc 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1706,7 +1706,8 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
>  	blk_qc_t new_cookie;
>  	blk_status_t ret;
>  
> -	new_cookie = request_to_qc_t(hctx, rq);
> +	if (cookie)
> +		new_cookie = request_to_qc_t(hctx, rq);
>  
>  	/*
>  	 * For OK queue, we are done. For error, caller may kill it.
> @@ -1716,13 +1717,15 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
>  	ret = q->mq_ops->queue_rq(hctx, &bd);
>  	switch (ret) {
>  	case BLK_STS_OK:
> -		*cookie = new_cookie;
> +		if (cookie)
> +			*cookie = new_cookie;
>  		break;
>  	case BLK_STS_RESOURCE:
>  		__blk_mq_requeue_request(rq);
>  		break;
>  	default:
> -		*cookie = BLK_QC_T_NONE;
> +		if (cookie)
> +			*cookie = BLK_QC_T_NONE;
>  		break;
>  	}
>  
> @@ -1731,15 +1734,20 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
>  
>  static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
>  					struct request *rq,
> -					bool run_queue)
> +					bool run_queue, bool bypass_insert)
>  {
> +	if (bypass_insert) {
> +		blk_mq_request_bypass_insert(rq, run_queue);
> +		return;
> +	}
>  	blk_mq_sched_insert_request(rq, false, run_queue, false,
>  					hctx->flags & BLK_MQ_F_BLOCKING);
>  }
>  
>  static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
>  						struct request *rq,
> -						blk_qc_t *cookie)
> +						blk_qc_t *cookie,
> +						bool bypass_insert)
>  {
>  	struct request_queue *q = rq->q;
>  	bool run_queue = true;
> @@ -1750,7 +1758,7 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
>  		goto insert;
>  	}
>  
> -	if (q->elevator)
> +	if (q->elevator && !bypass_insert)
>  		goto insert;
>  
>  	if (!blk_mq_get_driver_tag(rq, NULL, false))
> @@ -1763,7 +1771,9 @@ static blk_status_t __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
>  
>  	return __blk_mq_issue_directly(hctx, rq, cookie);
>  insert:
> -	__blk_mq_fallback_to_insert(hctx. rq, run_queue);
> +	__blk_mq_fallback_to_insert(hctx. rq, run_queue, bypass_insert);
> +	if (bypass_insert)
> +		return BLK_STS_RESOURCE;
>  
>  	return BLK_STS_OK;
>  }
> @@ -1778,15 +1788,29 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
>  
>  	hctx_lock(hctx, &srcu_idx);
>  
> -	ret = __blk_mq_try_issue_directly(hctx, rq, cookie);
> +	ret = __blk_mq_try_issue_directly(hctx, rq, cookie, false);
>  	if (ret == BLK_STS_RESOURCE)
> -		__blk_mq_fallback_to_insert(hctx. rq, true);
> +		__blk_mq_fallback_to_insert(hctx. rq, true, false);
>  	else if (ret != BLK_STS_OK)
>  		blk_mq_end_request(rq, ret);
>  
>  	hctx_unlock(hctx, srcu_idx);
>  }
>  
> +blk_status_t blk_mq_request_direct_issue(struct request *rq)
> +{
> +	blk_status_t ret;
> +	int srcu_idx;
> +	struct blk_mq_ctx *ctx = rq->mq_ctx;
> +	struct blk_mq_hw_ctx *hctx = blk_mq_map_queue(rq->q, ctx->cpu);
> +
> +	hctx_lock(hctx, &srcu_idx);
> +	ret = __blk_mq_try_issue_directly(hctx, rq, NULL, true);
> +	hctx_unlock(hctx, srcu_idx);
> +
> +	return ret;
> +}
> +
>  static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
>  {
>  	const int is_sync = op_is_sync(bio->bi_opf);
> diff --git a/block/blk-mq.h b/block/blk-mq.h
> index 8591a54d989b..e3ebc93646ca 100644
> --- a/block/blk-mq.h
> +++ b/block/blk-mq.h
> @@ -74,6 +74,9 @@ void blk_mq_request_bypass_insert(struct request *rq, bool run_queue);
>  void blk_mq_insert_requests(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx,
>  				struct list_head *list);
>  
> +/* Used by blk_insert_cloned_request() to issue request directly */
> +blk_status_t blk_mq_request_direct_issue(struct request *rq);
> +
>  /*
>   * CPU -> queue mappings
>   */
> diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
> index c28357f5cb0e..b7d175e94a02 100644
> --- a/drivers/md/dm-rq.c
> +++ b/drivers/md/dm-rq.c
> @@ -395,7 +395,7 @@ static void end_clone_request(struct request *clone, blk_status_t error)
>  	dm_complete_request(tio->orig, error);
>  }
>  
> -static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
> +static blk_status_t dm_dispatch_clone_request(struct request *clone, struct request *rq)
>  {
>  	blk_status_t r;
>  
> @@ -404,9 +404,10 @@ static void dm_dispatch_clone_request(struct request *clone, struct request *rq)
>  
>  	clone->start_time = jiffies;
>  	r = blk_insert_cloned_request(clone->q, clone);
> -	if (r)
> +	if (r != BLK_STS_OK && r != BLK_STS_RESOURCE)
>  		/* must complete clone in terms of original request */
>  		dm_complete_request(rq, r);
> +	return r;
>  }
>  
>  static int dm_rq_bio_constructor(struct bio *bio, struct bio *bio_orig,
> @@ -476,8 +477,10 @@ static int map_request(struct dm_rq_target_io *tio)
>  	struct mapped_device *md = tio->md;
>  	struct request *rq = tio->orig;
>  	struct request *clone = NULL;
> +	blk_status_t ret;
>  
>  	r = ti->type->clone_and_map_rq(ti, rq, &tio->info, &clone);
> +check_again:
>  	switch (r) {
>  	case DM_MAPIO_SUBMITTED:
>  		/* The target has taken the I/O to submit by itself later */
> @@ -492,7 +495,17 @@ static int map_request(struct dm_rq_target_io *tio)
>  		/* The target has remapped the I/O so dispatch it */
>  		trace_block_rq_remap(clone->q, clone, disk_devt(dm_disk(md)),
>  				     blk_rq_pos(rq));
> -		dm_dispatch_clone_request(clone, rq);
> +		ret = dm_dispatch_clone_request(clone, rq);
> +		if (ret == BLK_STS_RESOURCE) {
> +			blk_rq_unprep_clone(clone);
> +			tio->ti->type->release_clone_rq(clone);
> +			tio->clone = NULL;
> +			if (!rq->q->mq_ops)
> +				r = DM_MAPIO_DELAY_REQUEUE;
> +			else
> +				r = DM_MAPIO_REQUEUE;
> +			goto check_again;
> +		}
>  		break;
>  	case DM_MAPIO_REQUEUE:
>  		/* The target wants to requeue the I/O */
> -- 
> 2.15.0
> 
> --
> dm-devel mailing list
> dm-devel@redhat.com
> https://www.redhat.com/mailman/listinfo/dm-devel

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback
  2018-01-17  4:33 ` [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
@ 2018-01-17 15:34   ` Jens Axboe
  0 siblings, 0 replies; 7+ messages in thread
From: Jens Axboe @ 2018-01-17 15:34 UTC (permalink / raw)
  To: Mike Snitzer; +Cc: Ming Lei, hch, dm-devel, linux-block

On 1/16/18 9:33 PM, Mike Snitzer wrote:
> From: Ming Lei <ming.lei@redhat.com>
> 
> blk_insert_cloned_request() is called in the fast path of a dm-rq driver
> (e.g. blk-mq request-based DM mpath).  blk_insert_cloned_request() uses
> blk_mq_request_bypass_insert() to directly append the request to the
> blk-mq hctx->dispatch_list of the underlying queue.
> 
> 1) This way isn't efficient enough because the hctx spinlock is always
> used.
> 
> 2) With blk_insert_cloned_request(), we completely bypass underlying
> queue's elevator and depend on the upper-level dm-rq driver's elevator
> to schedule IO.  But dm-rq currently can't get the underlying queue's
> dispatch feedback at all.  Without knowing whether a request was issued
> or not (e.g. due to underlying queue being busy) the dm-rq elevator will
> not be able to provide effective IO merging (as a side-effect of dm-rq
> currently blindly destaging a request from its elevator only to requeue
> it after a delay, which kills any opportunity for merging).  This
> obviously causes very bad sequential IO performance.
> 
> Fix this by updating blk_insert_cloned_request() to use
> blk_mq_request_direct_issue().  blk_mq_request_direct_issue() allows a
> request to be issued directly to the underlying queue and returns the
> dispatch feedback (blk_status_t).  If blk_mq_request_direct_issue()
> returns BLK_SYS_RESOURCE the dm-rq driver will now use DM_MAPIO_REQUEUE
> to _not_ destage the request.  Whereby preserving the opportunity to
> merge IO.
> 
> With this, request-based DM's blk-mq sequential IO performance is vastly
> improved (as much as 3X in mpath/virtio-scsi testing).

This looks better. Two minor nit picks:

> diff --git a/block/blk-mq.c b/block/blk-mq.c
> index c117c2baf2c9..0b64f7210a89 100644
> --- a/block/blk-mq.c
> +++ b/block/blk-mq.c
> @@ -1706,7 +1706,8 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
>  	blk_qc_t new_cookie;
>  	blk_status_t ret;
>  
> -	new_cookie = request_to_qc_t(hctx, rq);
> +	if (cookie)
> +		new_cookie = request_to_qc_t(hctx, rq);
>  
>  	/*
>  	 * For OK queue, we are done. For error, caller may kill it.
> @@ -1716,13 +1717,15 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
>  	ret = q->mq_ops->queue_rq(hctx, &bd);
>  	switch (ret) {
>  	case BLK_STS_OK:
> -		*cookie = new_cookie;
> +		if (cookie)
> +			*cookie = new_cookie;
>  		break;
>  	case BLK_STS_RESOURCE:
>  		__blk_mq_requeue_request(rq);
>  		break;
>  	default:
> -		*cookie = BLK_QC_T_NONE;
> +		if (cookie)
> +			*cookie = BLK_QC_T_NONE;
>  		break;
>  	}

Instead of adding these three conditions, always pass in a valid pointer
to a cookie and get rid of them.

> @@ -1731,15 +1734,20 @@ static blk_status_t __blk_mq_issue_directly(struct blk_mq_hw_ctx *hctx,
>  
>  static void __blk_mq_fallback_to_insert(struct blk_mq_hw_ctx *hctx,
>  					struct request *rq,
> -					bool run_queue)
> +					bool run_queue, bool bypass_insert)
>  {
> +	if (bypass_insert) {
> +		blk_mq_request_bypass_insert(rq, run_queue);
> +		return;
> +	}
>  	blk_mq_sched_insert_request(rq, false, run_queue, false,
>  					hctx->flags & BLK_MQ_F_BLOCKING);
>  }

Lose the return and just make it an if/else.

-- 
Jens Axboe

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2018-01-17 15:34 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-01-17  4:33 [for-4.16 PATCH v5 0/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
2018-01-17  4:33 ` [for-4.16 PATCH v5 1/3] blk-mq: factor out a few helpers from __blk_mq_try_issue_directly Mike Snitzer
2018-01-17  4:33 ` [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging performance Mike Snitzer
2018-01-17  4:39   ` Mike Snitzer
2018-01-17  4:33 ` [for-4.16 PATCH v5 2/3] blk-mq: improve DM's blk-mq IO merging via blk_insert_cloned_request feedback Mike Snitzer
2018-01-17 15:34   ` Jens Axboe
2018-01-17  4:33 ` [for-4.16 PATCH v5 3/3] blk-mq-sched: remove unused 'can_block' arg from blk_mq_sched_insert_request Mike Snitzer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox