[PATCH 0/3] blk-mq: rework I/O completions

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH 0/3] blk-mq: rework I/O completions
@ 2014-02-10 11:24 Christoph Hellwig
  2014-02-10 11:24 ` [PATCH 1/3] " Christoph Hellwig
                   ` (3 more replies)
  0 siblings, 4 replies; 5+ messages in thread
From: Christoph Hellwig @ 2014-02-10 11:24 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

This series reworks how the blk-mq core handles I/O completion.  The
prime motivation was that the current code does not work when using
a timeout handler that needs to complete requests after doing error
handling.  A fallout from fixing that by making the completions more
similar to the old request model is that we now can use the remote
IPIs for driver-internal completions as well instead of just the final
completion and thus get rid of duplicate code both in null_blk and
the pending scsi work.

^ permalink raw reply	[flat|nested] 5+ messages in thread

* [PATCH 1/3] blk-mq: rework I/O completions
  2014-02-10 11:24 [PATCH 0/3] blk-mq: rework I/O completions Christoph Hellwig
@ 2014-02-10 11:24 ` Christoph Hellwig
  2014-02-10 11:24 ` [PATCH 2/3] virtio_blk: use blk_mq_complete_request Christoph Hellwig
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2014-02-10 11:24 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

[-- Attachment #1: 0001-blk-mq-rework-I-O-completions.patch --]
[-- Type: text/plain, Size: 5005 bytes --]

Rework I/O completions to work more like the old code path.  blk_mq_end_io
now stays out of the business of deferring completions to others CPUs
and calling blk_mark_rq_complete.  The latter is very important to allow
completing requests that have timed out and thus are already marked completed,
the former allows using the IPI callout even for driver specific completions
instead of having to reimplement them.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c         |   52 +++++++++++++++++++++++++++++-------------------
 block/blk-mq.h         |    3 +--
 block/blk-timeout.c    |    2 +-
 include/linux/blk-mq.h |    4 ++++
 4 files changed, 37 insertions(+), 24 deletions(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index cee9623..14c8f35 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -326,7 +326,7 @@ static void blk_mq_bio_endio(struct request *rq, struct bio *bio, int error)
 		bio_endio(bio, error);
 }
 
-void blk_mq_complete_request(struct request *rq, int error)
+void blk_mq_end_io(struct request *rq, int error)
 {
 	struct bio *bio = rq->bio;
 	unsigned int bytes = 0;
@@ -351,46 +351,53 @@ void blk_mq_complete_request(struct request *rq, int error)
 	else
 		blk_mq_free_request(rq);
 }
+EXPORT_SYMBOL(blk_mq_end_io);
 
-void __blk_mq_end_io(struct request *rq, int error)
-{
-	if (!blk_mark_rq_complete(rq))
-		blk_mq_complete_request(rq, error);
-}
-
-static void blk_mq_end_io_remote(void *data)
+static void __blk_mq_complete_request_remote(void *data)
 {
 	struct request *rq = data;
 
-	__blk_mq_end_io(rq, rq->errors);
+	rq->q->softirq_done_fn(rq);
 }
 
-/*
- * End IO on this request on a multiqueue enabled driver. We'll either do
- * it directly inline, or punt to a local IPI handler on the matching
- * remote CPU.
- */
-void blk_mq_end_io(struct request *rq, int error)
+void __blk_mq_complete_request(struct request *rq)
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
 	int cpu;
 
-	if (!ctx->ipi_redirect)
-		return __blk_mq_end_io(rq, error);
+	if (!ctx->ipi_redirect) {
+		rq->q->softirq_done_fn(rq);
+		return;
+	}
 
 	cpu = get_cpu();
 	if (cpu != ctx->cpu && cpu_online(ctx->cpu)) {
-		rq->errors = error;
-		rq->csd.func = blk_mq_end_io_remote;
+		rq->csd.func = __blk_mq_complete_request_remote;
 		rq->csd.info = rq;
 		rq->csd.flags = 0;
 		__smp_call_function_single(ctx->cpu, &rq->csd, 0);
 	} else {
-		__blk_mq_end_io(rq, error);
+		rq->q->softirq_done_fn(rq);
 	}
 	put_cpu();
 }
-EXPORT_SYMBOL(blk_mq_end_io);
+
+/**
+ * blk_mq_complete_request - end I/O on a request
+ * @rq:		the request being processed
+ *
+ * Description:
+ *	Ends all I/O on a request. It does not handle partial completions.
+ *	The actual completion happens out-of-order, through a IPI handler.
+ **/
+void blk_mq_complete_request(struct request *rq)
+{
+	if (unlikely(blk_should_fake_timeout(rq->q)))
+		return;
+	if (!blk_mark_rq_complete(rq))
+		__blk_mq_complete_request(rq);
+}
+EXPORT_SYMBOL(blk_mq_complete_request);
 
 static void blk_mq_start_request(struct request *rq)
 {
@@ -1399,6 +1406,9 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_reg *reg,
 	if (reg->timeout)
 		blk_queue_rq_timeout(q, reg->timeout);
 
+	if (reg->ops->complete)
+		blk_queue_softirq_done(q, reg->ops->complete);
+
 	blk_mq_init_flush(q);
 	blk_mq_init_cpu_queues(q, reg->nr_hw_queues);
 
diff --git a/block/blk-mq.h b/block/blk-mq.h
index 5c39179..f29b645 100644
--- a/block/blk-mq.h
+++ b/block/blk-mq.h
@@ -22,8 +22,7 @@ struct blk_mq_ctx {
 	struct kobject		kobj;
 };
 
-void __blk_mq_end_io(struct request *rq, int error);
-void blk_mq_complete_request(struct request *rq, int error);
+void __blk_mq_complete_request(struct request *rq);
 void blk_mq_run_request(struct request *rq, bool run_queue, bool async);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_init_flush(struct request_queue *q);
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index bba81c9..d96f706 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -91,7 +91,7 @@ static void blk_rq_timed_out(struct request *req)
 	case BLK_EH_HANDLED:
 		/* Can we use req->errors here? */
 		if (q->mq_ops)
-			blk_mq_complete_request(req, req->errors);
+			__blk_mq_complete_request(req);
 		else
 			__blk_complete_request(req);
 		break;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b7638be..468be242d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -86,6 +86,8 @@ struct blk_mq_ops {
 	 */
 	rq_timed_out_fn		*timeout;
 
+	softirq_done_fn		*complete;
+
 	/*
 	 * Override for hctx allocations (should probably go)
 	 */
@@ -137,6 +139,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int);
 
 void blk_mq_end_io(struct request *rq, int error);
 
+void blk_mq_complete_request(struct request *rq);
+
 void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx);
 void blk_mq_stop_hw_queues(struct request_queue *q);
-- 
1.7.10.4



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 2/3] virtio_blk: use blk_mq_complete_request
  2014-02-10 11:24 [PATCH 0/3] blk-mq: rework I/O completions Christoph Hellwig
  2014-02-10 11:24 ` [PATCH 1/3] " Christoph Hellwig
@ 2014-02-10 11:24 ` Christoph Hellwig
  2014-02-10 11:24 ` [PATCH 3/3] null_blk: use blk_complete_request and blk_mq_complete_request Christoph Hellwig
  2014-02-10 19:49 ` [PATCH 0/3] blk-mq: rework I/O completions Jens Axboe
  3 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2014-02-10 11:24 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

[-- Attachment #1: 0002-virtio_blk-use-blk_mq_complete_request.patch --]
[-- Type: text/plain, Size: 1480 bytes --]

Make sure to complete requests on the submitting CPU.  Previously this
was done in blk_mq_end_io, but the responsibility shifted to the drivers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/virtio_blk.c |    7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 6a680d4..b1cb3f4 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -110,9 +110,9 @@ static int __virtblk_add_req(struct virtqueue *vq,
 	return virtqueue_add_sgs(vq, sgs, num_out, num_in, vbr, GFP_ATOMIC);
 }
 
-static inline void virtblk_request_done(struct virtblk_req *vbr)
+static inline void virtblk_request_done(struct request *req)
 {
-	struct request *req = vbr->req;
+	struct virtblk_req *vbr = req->special;
 	int error = virtblk_result(vbr);
 
 	if (req->cmd_type == REQ_TYPE_BLOCK_PC) {
@@ -138,7 +138,7 @@ static void virtblk_done(struct virtqueue *vq)
 	do {
 		virtqueue_disable_cb(vq);
 		while ((vbr = virtqueue_get_buf(vblk->vq, &len)) != NULL) {
-			virtblk_request_done(vbr);
+			blk_mq_complete_request(vbr->req);
 			req_done = true;
 		}
 		if (unlikely(virtqueue_is_broken(vq)))
@@ -479,6 +479,7 @@ static struct blk_mq_ops virtio_mq_ops = {
 	.map_queue	= blk_mq_map_queue,
 	.alloc_hctx	= blk_mq_alloc_single_hw_queue,
 	.free_hctx	= blk_mq_free_single_hw_queue,
+	.complete	= virtblk_request_done,
 };
 
 static struct blk_mq_reg virtio_mq_reg = {
-- 
1.7.10.4



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* [PATCH 3/3] null_blk: use blk_complete_request and blk_mq_complete_request
  2014-02-10 11:24 [PATCH 0/3] blk-mq: rework I/O completions Christoph Hellwig
  2014-02-10 11:24 ` [PATCH 1/3] " Christoph Hellwig
  2014-02-10 11:24 ` [PATCH 2/3] virtio_blk: use blk_mq_complete_request Christoph Hellwig
@ 2014-02-10 11:24 ` Christoph Hellwig
  2014-02-10 19:49 ` [PATCH 0/3] blk-mq: rework I/O completions Jens Axboe
  3 siblings, 0 replies; 5+ messages in thread
From: Christoph Hellwig @ 2014-02-10 11:24 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-kernel

[-- Attachment #1: 0003-null_blk-use-blk_complete_request-and-blk_mq_complet.patch --]
[-- Type: text/plain, Size: 3741 bytes --]

Use the block layer helpers for CPU-local completions instead of
reimplementing them locally.

Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/block/null_blk.c |   97 +++++++++++++++-------------------------------
 1 file changed, 32 insertions(+), 65 deletions(-)

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index cc801cd..091b9ea 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -60,7 +60,9 @@ enum {
 	NULL_IRQ_NONE		= 0,
 	NULL_IRQ_SOFTIRQ	= 1,
 	NULL_IRQ_TIMER		= 2,
+};
 
+enum {
 	NULL_Q_BIO		= 0,
 	NULL_Q_RQ		= 1,
 	NULL_Q_MQ		= 2,
@@ -172,18 +174,20 @@ static struct nullb_cmd *alloc_cmd(struct nullb_queue *nq, int can_wait)
 
 static void end_cmd(struct nullb_cmd *cmd)
 {
-	if (cmd->rq) {
-		if (queue_mode == NULL_Q_MQ)
-			blk_mq_end_io(cmd->rq, 0);
-		else {
-			INIT_LIST_HEAD(&cmd->rq->queuelist);
-			blk_end_request_all(cmd->rq, 0);
-		}
-	} else if (cmd->bio)
+	switch (queue_mode)  {
+	case NULL_Q_MQ:
+		blk_mq_end_io(cmd->rq, 0);
+		return;
+	case NULL_Q_RQ:
+		INIT_LIST_HEAD(&cmd->rq->queuelist);
+		blk_end_request_all(cmd->rq, 0);
+		break;
+	case NULL_Q_BIO:
 		bio_endio(cmd->bio, 0);
+		break;
+	}
 
-	if (queue_mode != NULL_Q_MQ)
-		free_cmd(cmd);
+	free_cmd(cmd);
 }
 
 static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer)
@@ -222,62 +226,31 @@ static void null_cmd_end_timer(struct nullb_cmd *cmd)
 
 static void null_softirq_done_fn(struct request *rq)
 {
-	blk_end_request_all(rq, 0);
-}
-
-#ifdef CONFIG_SMP
-
-static void null_ipi_cmd_end_io(void *data)
-{
-	struct completion_queue *cq;
-	struct llist_node *entry, *next;
-	struct nullb_cmd *cmd;
-
-	cq = &per_cpu(completion_queues, smp_processor_id());
-
-	entry = llist_del_all(&cq->list);
-	entry = llist_reverse_order(entry);
-
-	while (entry) {
-		next = entry->next;
-		cmd = llist_entry(entry, struct nullb_cmd, ll_list);
-		end_cmd(cmd);
-		entry = next;
-	}
-}
-
-static void null_cmd_end_ipi(struct nullb_cmd *cmd)
-{
-	struct call_single_data *data = &cmd->csd;
-	int cpu = get_cpu();
-	struct completion_queue *cq = &per_cpu(completion_queues, cpu);
-
-	cmd->ll_list.next = NULL;
-
-	if (llist_add(&cmd->ll_list, &cq->list)) {
-		data->func = null_ipi_cmd_end_io;
-		data->flags = 0;
-		__smp_call_function_single(cpu, data, 0);
-	}
-
-	put_cpu();
+	end_cmd(rq->special);
 }
 
-#endif /* CONFIG_SMP */
-
 static inline void null_handle_cmd(struct nullb_cmd *cmd)
 {
 	/* Complete IO by inline, softirq or timer */
 	switch (irqmode) {
-	case NULL_IRQ_NONE:
-		end_cmd(cmd);
-		break;
 	case NULL_IRQ_SOFTIRQ:
-#ifdef CONFIG_SMP
-		null_cmd_end_ipi(cmd);
-#else
+		switch (queue_mode)  {
+		case NULL_Q_MQ:
+			blk_mq_complete_request(cmd->rq);
+			break;
+		case NULL_Q_RQ:
+			blk_complete_request(cmd->rq);
+			break;
+		case NULL_Q_BIO:
+			/*
+			 * XXX: no proper submitting cpu information available.
+			 */
+			end_cmd(cmd);
+			break;
+		}
+		break;
+	case NULL_IRQ_NONE:
 		end_cmd(cmd);
-#endif
 		break;
 	case NULL_IRQ_TIMER:
 		null_cmd_end_timer(cmd);
@@ -413,6 +386,7 @@ static struct blk_mq_ops null_mq_ops = {
 	.queue_rq       = null_queue_rq,
 	.map_queue      = blk_mq_map_queue,
 	.init_hctx	= null_init_hctx,
+	.complete	= null_softirq_done_fn,
 };
 
 static struct blk_mq_reg null_mq_reg = {
@@ -611,13 +585,6 @@ static int __init null_init(void)
 {
 	unsigned int i;
 
-#if !defined(CONFIG_SMP)
-	if (irqmode == NULL_IRQ_SOFTIRQ) {
-		pr_warn("null_blk: softirq completions not available.\n");
-		pr_warn("null_blk: using direct completions.\n");
-		irqmode = NULL_IRQ_NONE;
-	}
-#endif
 	if (bs > PAGE_SIZE) {
 		pr_warn("null_blk: invalid block size\n");
 		pr_warn("null_blk: defaults block size to %lu\n", PAGE_SIZE);
-- 
1.7.10.4



^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH 0/3] blk-mq: rework I/O completions
  2014-02-10 11:24 [PATCH 0/3] blk-mq: rework I/O completions Christoph Hellwig
                   ` (2 preceding siblings ...)
  2014-02-10 11:24 ` [PATCH 3/3] null_blk: use blk_complete_request and blk_mq_complete_request Christoph Hellwig
@ 2014-02-10 19:49 ` Jens Axboe
  3 siblings, 0 replies; 5+ messages in thread
From: Jens Axboe @ 2014-02-10 19:49 UTC (permalink / raw)
  To: Christoph Hellwig; +Cc: linux-kernel

On Mon, Feb 10 2014, Christoph Hellwig wrote:
> This series reworks how the blk-mq core handles I/O completion.  The
> prime motivation was that the current code does not work when using
> a timeout handler that needs to complete requests after doing error
> handling.  A fallout from fixing that by making the completions more
> similar to the old request model is that we now can use the remote
> IPIs for driver-internal completions as well instead of just the final
> completion and thus get rid of duplicate code both in null_blk and
> the pending scsi work.

Thanks Christoph, applied all 3.

-- 
Jens Axboe


^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2014-02-10 19:49 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2014-02-10 11:24 [PATCH 0/3] blk-mq: rework I/O completions Christoph Hellwig
2014-02-10 11:24 ` [PATCH 1/3] " Christoph Hellwig
2014-02-10 11:24 ` [PATCH 2/3] virtio_blk: use blk_mq_complete_request Christoph Hellwig
2014-02-10 11:24 ` [PATCH 3/3] null_blk: use blk_complete_request and blk_mq_complete_request Christoph Hellwig
2014-02-10 19:49 ` [PATCH 0/3] blk-mq: rework I/O completions Jens Axboe

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox