[PATCH 5/5] dm-mpath: use blk_mq_alloc_request_notify for allocating blk-mq req

dm-devel.redhat.com archive mirror
 help / color / mirror / Atom feed

From: Ming Lei <ming.lei@redhat.com>
To: Jens Axboe <axboe@kernel.dk>,
	linux-block@vger.kernel.org,
	Christoph Hellwig <hch@infradead.org>,
	Mike Snitzer <snitzer@redhat.com>
Cc: dm-devel@redhat.com, Ming Lei <ming.lei@redhat.com>,
	Laurence Oberman <loberman@redhat.com>,
	Bart Van Assche <bart.vanassche@sandisk.com>
Subject: [PATCH 5/5] dm-mpath: use blk_mq_alloc_request_notify for allocating blk-mq req
Date: Mon, 22 Jan 2018 11:35:50 +0800	[thread overview]
Message-ID: <20180122033550.27855-6-ming.lei@redhat.com> (raw)
In-Reply-To: <20180122033550.27855-1-ming.lei@redhat.com>

When blk_get_request() fails to allocate one request, there is no way
to make underlying queue's RESTART to restart DM's queue, this patch
applies the new introduced blk_mq_alloc_request_notify() to deal with
this issue.

The following issues can be addressed:

1) When the blk_get_request() fails, finaly dm-rq will return
BLK_STS_RESOURCE to blk-mq, then blk-mq will run queue after 10ms, this
delay may degrade dm-rq performance a lot when any underlying device
attached to the same host is accessed directly.

2) there can't be a way to figure out a perfect delay for running queue
in the situation 1), and if the delay is too small, request can be dequeued
from DM queue too quick, then sequential IO performance gets hurt.

This approach is suggested by Jens, and has been used in blk-mq dispatch
patch for a while, see blk_mq_mark_tag_wait().

Suggested-by: Jens Axboe <axboe@kernel.dk>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Laurence Oberman <loberman@redhat.com>
Cc: Bart Van Assche <bart.vanassche@sandisk.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
 drivers/md/dm-mpath.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++++-
 drivers/md/dm.c       |  1 +
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 7d3e572072f5..f7753a2e9229 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -11,6 +11,7 @@
 #include "dm-bio-record.h"
 #include "dm-path-selector.h"
 #include "dm-uevent.h"
+#include "dm.h"
 
 #include <linux/blkdev.h>
 #include <linux/ctype.h>
@@ -30,6 +31,15 @@
 #define DM_PG_INIT_DELAY_MSECS 2000
 #define DM_PG_INIT_DELAY_DEFAULT ((unsigned) -1)
 
+/*
+ * When running out of request from underlying request, use this
+ * structure to get notified from blk-mq
+ */
+struct underlying_notifier {
+	struct request_queue *q;
+	wait_queue_entry_t wait;
+};
+
 /* Path properties */
 struct pgpath {
 	struct list_head list;
@@ -40,6 +50,7 @@ struct pgpath {
 	struct dm_path path;
 	struct delayed_work activate_path;
 
+	struct underlying_notifier notifier;
 	bool is_active:1;		/* Path status */
 };
 
@@ -125,6 +136,17 @@ static void process_queued_bios(struct work_struct *work);
  * Allocation routines
  *-----------------------------------------------*/
 
+static int req_notify(wait_queue_entry_t *wait, unsigned mode, int flags,
+		void *key)
+{
+	struct underlying_notifier *notifier;
+
+	notifier = container_of(wait, struct underlying_notifier, wait);
+	list_del_init(&notifier->wait.entry);
+	blk_mq_run_hw_queues(notifier->q, true);
+	return 1;
+}
+
 static struct pgpath *alloc_pgpath(void)
 {
 	struct pgpath *pgpath = kzalloc(sizeof(*pgpath), GFP_KERNEL);
@@ -490,6 +512,27 @@ static bool must_push_back_bio(struct multipath *m)
 	return __must_push_back(m, flags);
 }
 
+static struct request *
+multipath_get_req(struct pgpath *pgpath, struct request_queue *q,
+		  struct request *rq)
+{
+	if (!q->mq_ops)
+		return blk_get_request(q, rq->cmd_flags | REQ_NOMERGE,
+				GFP_ATOMIC);
+
+	/*
+	 * Even BLK_MQ_REQ_ALLOC_NOTIFY is passed, we can't return
+	 * BLK_STS_DEV_RESOUCE to blk-mq if this allocation fails because
+	 * the notification can come before adding dm's request to
+	 * dispatch list, so still need to return BLK_STS_RESOUCE to
+	 * blk-mq, and the notification will run queue and cancel the
+	 * delay caused by BLK_STS_RESOUCE immediately.
+	 */
+	return blk_mq_alloc_request_notify(q, rq->cmd_flags | REQ_NOMERGE,
+                                BLK_MQ_REQ_NOWAIT | BLK_MQ_REQ_ALLOC_NOTIFY,
+				&pgpath->notifier.wait);
+}
+
 /*
  * Map cloned requests (request-based multipath)
  */
@@ -526,7 +569,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq,
 
 	bdev = pgpath->path.dev->bdev;
 	q = bdev_get_queue(bdev);
-	clone = blk_get_request(q, rq->cmd_flags | REQ_NOMERGE, GFP_ATOMIC);
+	clone = multipath_get_req(pgpath, q, rq);
 	if (IS_ERR(clone)) {
 		/* EBUSY, ENODEV or EWOULDBLOCK: requeue */
 		if (blk_queue_dying(q)) {
@@ -873,6 +916,7 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 	int r;
 	struct pgpath *p;
 	struct multipath *m = ti->private;
+	struct mapped_device *md = dm_table_get_md((m)->ti->table);
 
 	/* we need at least a path arg */
 	if (as->argc < 1) {
@@ -906,6 +950,10 @@ static struct pgpath *parse_path(struct dm_arg_set *as, struct path_selector *ps
 		goto bad;
 	}
 
+	p->notifier.q = dm_get_md_queue(md);
+	init_waitqueue_func_entry(&p->notifier.wait, req_notify);
+	INIT_LIST_HEAD(&p->notifier.wait.entry);
+
 	return p;
  bad:
 	free_pgpath(p);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index d6de00f367ef..ff06efcaf36e 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -445,6 +445,7 @@ struct request_queue *dm_get_md_queue(struct mapped_device *md)
 {
 	return md->queue;
 }
+EXPORT_SYMBOL_GPL(dm_get_md_queue);
 
 struct dm_stats *dm_get_stats(struct mapped_device *md)
 {
-- 
2.9.5

     prev parent reply	other threads:[~2018-01-22  3:35 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-01-22  3:35 [PATCH 0/5] blk-mq & dm: fix IO hang and deal with one performance issue Ming Lei
2018-01-22  3:35 ` [PATCH 1/5] blk-mq: introduce BLK_STS_DEV_RESOURCE Ming Lei
2018-01-22 16:32   ` Christoph Hellwig
2018-01-22 16:49   ` Bart Van Assche
2018-01-23  0:57     ` Ming Lei
2018-01-23 16:17       ` Bart Van Assche
2018-01-23 16:26         ` Ming Lei
2018-01-23 16:37           ` Bart Van Assche
2018-01-23 16:41             ` Ming Lei
2018-01-23 16:47               ` Bart Van Assche
2018-01-23 16:49                 ` Ming Lei
2018-01-23 16:54                   ` Bart Van Assche
2018-01-23 16:59                     ` Ming Lei
2018-01-23 22:01                       ` Bart Van Assche
2018-01-24  2:31                         ` Ming Lei
2018-01-22  3:35 ` [PATCH 2/5] dm-rq: handle dispatch exception in dm_dispatch_clone_request() Ming Lei
2018-01-22  3:35 ` [PATCH 3/5] dm-rq: return BLK_STS_* from map_request() Ming Lei
2018-01-22  5:35   ` Ming Lei
2018-01-22  3:35 ` [PATCH 4/5] blk-mq: introduce blk_get_request_notify Ming Lei
2018-01-22 10:19   ` Ming Lei
2018-01-22 17:13   ` Bart Van Assche
2018-01-23  1:29     ` Ming Lei
2018-01-22  3:35 ` Ming Lei [this message]

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:7d3e572072f dfblob:f7753a2e922 dfblob:d6de00f367e
dfblob:ff06efcaf36 )
 OR (
bs:"[PATCH 5/5] dm-mpath: use blk_mq_alloc_request_notify for allocating blk-mq req" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180122033550.27855-6-ming.lei@redhat.com \
    --to=ming.lei@redhat.com \
    --cc=axboe@kernel.dk \
    --cc=bart.vanassche@sandisk.com \
    --cc=dm-devel@redhat.com \
    --cc=hch@infradead.org \
    --cc=linux-block@vger.kernel.org \
    --cc=loberman@redhat.com \
    --cc=snitzer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).