[PATCH v2 4/7] blk-mq: Introduce blk_quiesce_queue() and blk_resume_queue()

public inbox for linux-rdma@vger.kernel.org
 help / color / mirror / Atom feed

From: Bart Van Assche <bart.vanassche@sandisk.com>
To: Jens Axboe <axboe@fb.com>
Cc: Christoph Hellwig <hch@lst.de>,
	James Bottomley <jejb@linux.vnet.ibm.com>,
	"Martin K. Petersen" <martin.petersen@oracle.com>,
	Mike Snitzer <snitzer@redhat.com>,
	Doug Ledford <dledford@redhat.com>,
	Keith Busch <keith.busch@intel.com>,
	"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
	"linux-scsi@vger.kernel.org" <linux-scsi@vger.kernel.org>,
	"linux-rdma@vger.kernel.org" <linux-rdma@vger.kernel.org>,
	"linux-nvme@lists.infradead.org" <linux-nvme@lists.infradead.org>
Subject: [PATCH v2 4/7] blk-mq: Introduce blk_quiesce_queue() and blk_resume_queue()
Date: Wed, 28 Sep 2016 16:59:46 -0700	[thread overview]
Message-ID: <a9eb4e95-6d77-e527-c42a-fd2e6f104bed@sandisk.com> (raw)
In-Reply-To: <f153ee47-e599-c333-92b0-d5da475012f3@sandisk.com>

blk_quiesce_queue() prevents that new queue_rq() invocations
occur and waits until ongoing invocations have finished. This
function does *not* wait until all outstanding requests have
finished (this means invocation of request.end_io()).
blk_resume_queue() resumes normal I/O processing.

Signed-off-by: Bart Van Assche <bart.vanassche@sandisk.com>
Cc: Hannes Reinecke <hare@suse.com>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
---
 block/blk-mq.c         | 137 ++++++++++++++++++++++++++++++++++++++++++++-----
 include/linux/blk-mq.h |   2 +
 include/linux/blkdev.h |   5 ++
 3 files changed, 131 insertions(+), 13 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index d8c45de..f5c71ad 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -58,15 +58,23 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx,
 	sbitmap_clear_bit(&hctx->ctx_map, ctx->index_hw);
 }
 
-void blk_mq_freeze_queue_start(struct request_queue *q)
+static bool __blk_mq_freeze_queue_start(struct request_queue *q,
+					bool kill_percpu_ref)
 {
 	int freeze_depth;
 
 	freeze_depth = atomic_inc_return(&q->mq_freeze_depth);
 	if (freeze_depth == 1) {
-		percpu_ref_kill(&q->q_usage_counter);
+		if (kill_percpu_ref)
+			percpu_ref_kill(&q->q_usage_counter);
 		blk_mq_run_hw_queues(q, false);
 	}
+	return freeze_depth == 1;
+}
+
+void blk_mq_freeze_queue_start(struct request_queue *q)
+{
+	__blk_mq_freeze_queue_start(q, true);
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_start);
 
@@ -102,19 +110,90 @@ void blk_mq_freeze_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL_GPL(blk_mq_freeze_queue);
 
-void blk_mq_unfreeze_queue(struct request_queue *q)
+static bool __blk_mq_unfreeze_queue(struct request_queue *q,
+				    bool reinit_percpu_ref)
 {
 	int freeze_depth;
 
 	freeze_depth = atomic_dec_return(&q->mq_freeze_depth);
 	WARN_ON_ONCE(freeze_depth < 0);
 	if (!freeze_depth) {
-		percpu_ref_reinit(&q->q_usage_counter);
+		if (reinit_percpu_ref)
+			percpu_ref_reinit(&q->q_usage_counter);
 		wake_up_all(&q->mq_freeze_wq);
 	}
+	return freeze_depth == 0;
+}
+
+void blk_mq_unfreeze_queue(struct request_queue *q)
+{
+	__blk_mq_unfreeze_queue(q, true);
 }
 EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
 
+/**
+ * blk_mq_quiesce_queue() - wait until all pending queue_rq calls have finished
+ *
+ * Prevent that new I/O requests are queued and wait until all pending
+ * queue_rq() calls have finished. Must not be called if the queue has already
+ * been frozen. Additionally, freezing the queue after having quiesced the
+ * queue and before resuming the queue is not allowed.
+ *
+ * Note: this function does not prevent that the struct request end_io()
+ * callback function is invoked.
+ */
+void blk_mq_quiesce_queue(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	unsigned int i;
+	bool res, rcu = false;
+
+	spin_lock_irq(q->queue_lock);
+	WARN_ON_ONCE(blk_queue_quiescing(q));
+	queue_flag_set(QUEUE_FLAG_QUIESCING, q);
+	spin_unlock_irq(q->queue_lock);
+
+	res = __blk_mq_freeze_queue_start(q, false);
+	WARN_ON_ONCE(!res);
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (hctx->flags & BLK_MQ_F_BLOCKING) {
+			mutex_lock(&hctx->queue_rq_mutex);
+			mutex_unlock(&hctx->queue_rq_mutex);
+		} else {
+			rcu = true;
+		}
+	}
+	if (rcu)
+		synchronize_rcu();
+
+	spin_lock_irq(q->queue_lock);
+	WARN_ON_ONCE(!blk_queue_quiescing(q));
+	queue_flag_clear(QUEUE_FLAG_QUIESCING, q);
+	spin_unlock_irq(q->queue_lock);
+}
+EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
+
+/**
+ * blk_mq_resume_queue() - resume request processing
+ *
+ * Resume blk_queue_enter() calls that have been suspended by
+ * blk_mq_quiesce_queue().
+ *
+ * The caller is responsible for serializing blk_mq_quiesce_queue() and
+ * blk_mq_resume_queue().
+ */
+void blk_mq_resume_queue(struct request_queue *q)
+{
+	bool res;
+
+	res = __blk_mq_unfreeze_queue(q, false);
+	WARN_ON_ONCE(!res);
+	WARN_ON_ONCE(blk_queue_quiescing(q));
+
+	blk_mq_run_hw_queues(q, false);
+}
+EXPORT_SYMBOL_GPL(blk_mq_resume_queue);
+
 void blk_mq_wake_waiters(struct request_queue *q)
 {
 	struct blk_mq_hw_ctx *hctx;
@@ -488,6 +567,9 @@ static void blk_mq_requeue_work(struct work_struct *work)
 	struct request *rq, *next;
 	unsigned long flags;
 
+	if (blk_queue_quiescing(q))
+		return;
+
 	spin_lock_irqsave(&q->requeue_lock, flags);
 	list_splice_init(&q->requeue_list, &rq_list);
 	spin_unlock_irqrestore(&q->requeue_lock, flags);
@@ -782,7 +864,7 @@ static inline unsigned int queued_to_index(unsigned int queued)
  * of IO. In particular, we'd like FIFO behaviour on handling existing
  * items on the hctx->dispatch list. Ignore that for now.
  */
-static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
+static void blk_mq_process_rq_list(struct blk_mq_hw_ctx *hctx)
 {
 	struct request_queue *q = hctx->queue;
 	struct request *rq;
@@ -791,9 +873,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	struct list_head *dptr;
 	int queued;
 
-	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
-		return;
-
 	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask) &&
 		cpu_online(hctx->next_cpu));
 
@@ -883,7 +962,24 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		 *
 		 * blk_mq_run_hw_queue() already checks the STOPPED bit
 		 **/
-		blk_mq_run_hw_queue(hctx, true);
+		if (!blk_queue_quiescing(q))
+			blk_mq_run_hw_queue(hctx, true);
+	}
+}
+
+static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
+{
+	if (unlikely(test_bit(BLK_MQ_S_STOPPED, &hctx->state)))
+		return;
+
+	if (hctx->flags & BLK_MQ_F_BLOCKING) {
+		mutex_lock(&hctx->queue_rq_mutex);
+		blk_mq_process_rq_list(hctx);
+		mutex_unlock(&hctx->queue_rq_mutex);
+	} else {
+		rcu_read_lock();
+		blk_mq_process_rq_list(hctx);
+		rcu_read_unlock();
 	}
 }
 
@@ -1341,7 +1437,7 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_bio_to_request(rq, bio);
 
 		/*
-		 * We do limited pluging. If the bio can be merged, do that.
+		 * We do limited plugging. If the bio can be merged, do that.
 		 * Otherwise the existing request in the plug list will be
 		 * issued. So the plug list will have one request at most
 		 */
@@ -1361,9 +1457,23 @@ static blk_qc_t blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		blk_mq_put_ctx(data.ctx);
 		if (!old_rq)
 			goto done;
-		if (!blk_mq_direct_issue_request(old_rq, &cookie))
-			goto done;
-		blk_mq_insert_request(old_rq, false, true, true);
+
+		if (data.hctx->flags & BLK_MQ_F_BLOCKING) {
+			mutex_lock(&data.hctx->queue_rq_mutex);
+			if (blk_queue_quiescing(q) ||
+			    blk_mq_direct_issue_request(old_rq, &cookie) != 0)
+				blk_mq_insert_request(old_rq, false, true,
+						      true);
+			mutex_unlock(&data.hctx->queue_rq_mutex);
+		} else {
+			rcu_read_lock();
+			if (blk_queue_quiescing(q) ||
+			    blk_mq_direct_issue_request(old_rq, &cookie) != 0)
+				blk_mq_insert_request(old_rq, false, true,
+						      true);
+			rcu_read_unlock();
+		}
+
 		goto done;
 	}
 
@@ -1702,6 +1812,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	INIT_DELAYED_WORK(&hctx->delay_work, blk_mq_delay_work_fn);
 	spin_lock_init(&hctx->lock);
 	INIT_LIST_HEAD(&hctx->dispatch);
+	mutex_init(&hctx->queue_rq_mutex);
 	hctx->queue = q;
 	hctx->queue_num = hctx_idx;
 	hctx->flags = set->flags & ~BLK_MQ_F_TAG_SHARED;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 368c460d..4b970f1 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -41,6 +41,8 @@ struct blk_mq_hw_ctx {
 
 	struct blk_mq_tags	*tags;
 
+	struct mutex		queue_rq_mutex;
+
 	unsigned long		queued;
 	unsigned long		run;
 #define BLK_MQ_MAX_DISPATCH_ORDER	7
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c47c358..d365cdf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -505,6 +505,7 @@ struct request_queue {
 #define QUEUE_FLAG_FUA	       24	/* device supports FUA writes */
 #define QUEUE_FLAG_FLUSH_NQ    25	/* flush not queueuable */
 #define QUEUE_FLAG_DAX         26	/* device supports DAX */
+#define QUEUE_FLAG_QUIESCING   27
 
 #define QUEUE_FLAG_DEFAULT	((1 << QUEUE_FLAG_IO_STAT) |		\
 				 (1 << QUEUE_FLAG_STACKABLE)	|	\
@@ -595,6 +596,8 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
 #define blk_queue_secure_erase(q) \
 	(test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags))
 #define blk_queue_dax(q)	test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags)
+#define blk_queue_quiescing(q)	test_bit(QUEUE_FLAG_QUIESCING,	\
+					 &(q)->queue_flags)
 
 #define blk_noretry_request(rq) \
 	((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
@@ -824,6 +827,8 @@ extern void __blk_run_queue(struct request_queue *q);
 extern void __blk_run_queue_uncond(struct request_queue *q);
 extern void blk_run_queue(struct request_queue *);
 extern void blk_run_queue_async(struct request_queue *q);
+extern void blk_mq_quiesce_queue(struct request_queue *q);
+extern void blk_mq_resume_queue(struct request_queue *q);
 extern int blk_rq_map_user(struct request_queue *, struct request *,
 			   struct rq_map_data *, void __user *, unsigned long,
 			   gfp_t);
-- 
2.10.0

next prev parent reply	other threads:[~2016-09-28 23:59 UTC|newest]

Thread overview: 32+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-09-28 23:57 [PATCH v2 0/7] Introduce blk_quiesce_queue() and blk_resume_queue() Bart Van Assche
2016-09-28 23:57 ` [PATCH v2 1/7] blk-mq: Introduce blk_mq_queue_stopped() Bart Van Assche
2016-10-05 17:50   ` Sagi Grimberg
2016-10-11 16:40   ` Christoph Hellwig
2016-09-28 23:59 ` Bart Van Assche [this message]
     [not found]   ` <a9eb4e95-6d77-e527-c42a-fd2e6f104bed-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
2016-09-29  5:52     ` [PATCH v2 4/7] blk-mq: Introduce blk_quiesce_queue() and blk_resume_queue() Hannes Reinecke
2016-09-29 21:51     ` Ming Lei
2016-09-30 15:55       ` Bart Van Assche
     [not found]         ` <9fb67745-954c-a70f-a89a-d184c99ae9aa-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
2016-10-01 22:56           ` Ming Lei
2016-10-05  4:16             ` Bart Van Assche
     [not found]               ` <a11bb09a-6207-f4a8-cfd4-4fe5627c1700-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
2016-10-05  4:32                 ` Ming Lei
2016-10-05 14:46                   ` Bart Van Assche
2016-10-05 16:11                     ` Ming Lei
2016-10-05 18:14                 ` Sagi Grimberg
2016-10-05 19:05                   ` Bart Van Assche
2016-10-05 19:10                     ` Sagi Grimberg
     [not found]                       ` <732bf5d6-d7a8-6ff0-6501-8a3d46f12fab-NQWnxTmZq1alnMjI0IkVqw@public.gmane.org>
2016-10-05 21:08                         ` Bart Van Assche
     [not found]                           ` <07628c2f-3a4e-b12e-663a-b0711ce20cc9-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
2016-10-05 22:49                             ` Ming Lei
2016-10-05 23:00                               ` Bart Van Assche
     [not found] ` <f153ee47-e599-c333-92b0-d5da475012f3-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
2016-09-29  0:00   ` [PATCH v2 5/7] dm: Fix a race condition related to stopping and starting queues Bart Van Assche
2016-09-29  0:01 ` [PATCH v2 6/7] SRP transport: Port srp_wait_for_queuecommand() to scsi-mq Bart Van Assche
2016-09-29  5:54   ` Hannes Reinecke
     [not found]   ` <bedf4b45-00b0-6844-047d-f1e05bb1092f-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
2016-10-05 17:38     ` Sagi Grimberg
2016-10-05 21:51       ` Bart Van Assche
2016-10-11 16:44         ` Christoph Hellwig
2016-09-29  0:01 ` [PATCH v2 7/7] [RFC] nvme: Fix a race condition Bart Van Assche
2016-10-05 17:40   ` Sagi Grimberg
2016-10-11 16:46   ` Christoph Hellwig
2016-10-12  0:41     ` Bart Van Assche
2016-09-29  0:02 ` [PATCH v2 2/7] dm: Use BLK_MQ_S_STOPPED instead of QUEUE_FLAG_STOPPED in blk-mq code Bart Van Assche
2016-09-29  0:02 ` [PATCH v2 3/7] [RFC] nvme: " Bart Van Assche
     [not found]   ` <9faa7607-eb3e-ce41-b09e-1e10492a5f45-XdAiOPVOjttBDgjK7y7TUQ@public.gmane.org>
2016-10-05 17:43     ` Sagi Grimberg

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:d8c45de dfblob:f5c71ad dfblob:368c460 dfblob:4b970f1
dfblob:c47c358 dfblob:d365cdf )
 OR (
bs:"[PATCH v2 4/7] blk-mq: Introduce blk_quiesce_queue() and blk_resume_queue()" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=a9eb4e95-6d77-e527-c42a-fd2e6f104bed@sandisk.com \
    --to=bart.vanassche@sandisk.com \
    --cc=axboe@fb.com \
    --cc=dledford@redhat.com \
    --cc=hch@lst.de \
    --cc=jejb@linux.vnet.ibm.com \
    --cc=keith.busch@intel.com \
    --cc=linux-block@vger.kernel.org \
    --cc=linux-nvme@lists.infradead.org \
    --cc=linux-rdma@vger.kernel.org \
    --cc=linux-scsi@vger.kernel.org \
    --cc=martin.petersen@oracle.com \
    --cc=snitzer@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox