Re: lk 3.17-rc4 blk_mq large write problems

public inbox for linux-scsi@vger.kernel.org
 help / color / mirror / Atom feed

From: Christoph Hellwig <hch@infradead.org>
To: Douglas Gilbert <dgilbert@interlog.com>
Cc: SCSI development list <linux-scsi@vger.kernel.org>,
	Jens Axboe <axboe@kernel.dk>
Subject: Re: lk 3.17-rc4 blk_mq large write problems
Date: Wed, 10 Sep 2014 08:41:44 -0700	[thread overview]
Message-ID: <20140910154144.GA22296@infradead.org> (raw)
In-Reply-To: <540FCB96.8000606@interlog.com>

While it might not help with a blown stack, can you give the patch below
a try?  I tries to solve a problem where the timeout handler hits
before we've fully set up a command.  While I'd like to understand the
root cause of why we're hitting it as well, I'd also really to fix that
race. It would also be good to get a gdb listing of the exact area in
scsi_times_out listed in the oops.

---
From: Christoph Hellwig <hch@lst.de>
Subject: blk-mq: call blk_mq_start_request from ->queue_rq

When we call blk_mq_start_request from the core blk-mq code before calling into
->queue_rq there is a racy window where the timeout handler can hit before we've
fully set up the driver specific part of the command.

Move the call to blk_mq_start_request into the driver so the driver can start
the request only once it is fully set up.

Signed-off-by: Christoph Hellwig <hch@lst.de>

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5189cb1..db9990b 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -374,7 +374,7 @@ void blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
-static void blk_mq_start_request(struct request *rq, bool last)
+void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
@@ -405,29 +405,18 @@ static void blk_mq_start_request(struct request *rq, bool last)
 		 */
 		rq->nr_phys_segments++;
 	}
-
-	/*
-	 * Flag the last request in the series so that drivers know when IO
-	 * should be kicked off, if they don't do it on a per-request basis.
-	 *
-	 * Note: the flag isn't the only condition drivers should do kick off.
-	 * If drive is busy, the last request might not have the bit set.
-	 */
-	if (last)
-		rq->cmd_flags |= REQ_END;
 }
+EXPORT_SYMBOL(blk_mq_start_request);
 
 static void __blk_mq_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
 	trace_block_rq_requeue(q, rq);
-	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
-
-	rq->cmd_flags &= ~REQ_END;
-
-	if (q->dma_drain_size && blk_rq_bytes(rq))
-		rq->nr_phys_segments--;
+	if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
+		if (q->dma_drain_size && blk_rq_bytes(rq))
+			rq->nr_phys_segments--;
+	}
 }
 
 void blk_mq_requeue_request(struct request *rq)
@@ -735,9 +724,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		rq = list_first_entry(&rq_list, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 
-		blk_mq_start_request(rq, list_empty(&rq_list));
-
-		ret = q->mq_ops->queue_rq(hctx, rq);
+		ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
 		switch (ret) {
 		case BLK_MQ_RQ_QUEUE_OK:
 			queued++;
@@ -1177,14 +1164,13 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		int ret;
 
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_start_request(rq, true);
 
 		/*
 		 * For OK queue, we are done. For error, kill it. Any other
 		 * error (busy), just add it to our list as we previously
 		 * would have done
 		 */
-		ret = q->mq_ops->queue_rq(data.hctx, rq);
+		ret = q->mq_ops->queue_rq(data.hctx, rq, true);
 		if (ret == BLK_MQ_RQ_QUEUE_OK)
 			goto done;
 		else {
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index db1e956..9b0127a 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3775,13 +3775,16 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
+		bool last)
 {
 	int ret;
 
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
+	blk_mq_start_request(rq);
+
 	ret = mtip_submit_request(hctx, rq);
 	if (likely(!ret))
 		return BLK_MQ_RQ_QUEUE_OK;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index a3b042c..d098adfbb 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -313,10 +313,13 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
+		bool last)
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
+	blk_mq_start_request(rq);
+
 	cmd->rq = rq;
 	cmd->nq = hctx->driver_data;
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0a58140..4b08906 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -164,14 +164,14 @@ static void virtblk_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
+		bool last)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
 	unsigned int num;
 	int qid = hctx->queue_num;
-	const bool last = (req->cmd_flags & REQ_END) != 0;
 	int err;
 	bool notify = false;
 
@@ -213,6 +213,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 			vbr->out_hdr.type |= VIRTIO_BLK_T_IN;
 	}
 
+	blk_mq_start_request(req);
+
 	spin_lock_irqsave(&vblk->vqs[qid].lock, flags);
 	err = __virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num);
 	if (err) {
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9c44392..dd6e912 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1856,7 +1856,8 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
+static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
+		bool last)
 {
 	struct request_queue *q = req->q;
 	struct scsi_device *sdev = q->queuedata;
@@ -1890,6 +1891,8 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
 	scsi_init_cmd_errh(cmd);
 	cmd->scsi_done = scsi_mq_done;
 
+	blk_mq_start_request(req);
+
 	reason = scsi_dispatch_cmd(cmd);
 	if (reason) {
 		scsi_set_blocked(cmd, reason);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index eb726b9..aed92d5 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -77,7 +77,7 @@ struct blk_mq_tag_set {
 	struct list_head	tag_list;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
+typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
@@ -160,6 +160,7 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
 
+void blk_mq_start_request(struct request *rq);
 void blk_mq_end_io(struct request *rq, int error);
 void __blk_mq_end_io(struct request *rq, int error);
 
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 66c2167..bb7d664 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -188,7 +188,6 @@ enum rq_flag_bits {
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_PM,		/* runtime pm request */
-	__REQ_END,		/* last of chain of requests */
 	__REQ_HASHED,		/* on IO scheduler merge hash */
 	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
 	__REQ_NR_BITS,		/* stops here */
@@ -242,7 +241,6 @@ enum rq_flag_bits {
 #define REQ_SECURE		(1ULL << __REQ_SECURE)
 #define REQ_KERNEL		(1ULL << __REQ_KERNEL)
 #define REQ_PM			(1ULL << __REQ_PM)
-#define REQ_END			(1ULL << __REQ_END)
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)

next prev parent reply	other threads:[~2014-09-10 15:41 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-09-10  3:55 lk 3.17-rc4 blk_mq large write problems Douglas Gilbert
2014-09-10 15:41 ` Christoph Hellwig [this message]
2014-09-10 16:47   ` Jens Axboe
2014-09-10 18:09     ` Christoph Hellwig
2014-09-10 18:26       ` Jens Axboe
2014-09-10 18:40         ` Christoph Hellwig
2014-09-10 18:42           ` Jens Axboe
2014-09-11  0:58   ` Douglas Gilbert
2014-09-11  2:00     ` Jens Axboe
2014-09-11  3:48       ` Elliott, Robert (Server Storage)
2014-09-11  5:37         ` Douglas Gilbert
2014-09-17 17:04           ` Christoph Hellwig
2014-09-22 23:14 ` Douglas Gilbert

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:5189cb1 dfblob:db9990b dfblob:db1e956 dfblob:9b0127a
dfblob:a3b042c dfblob:d098adfb dfblob:0a58140 dfblob:4b08906
dfblob:9c44392 dfblob:dd6e912 dfblob:eb726b9 dfblob:aed92d5
dfblob:66c2167 dfblob:bb7d664 )
 OR (
bs:"Re: lk 3.17-rc4 blk_mq large write problems" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140910154144.GA22296@infradead.org \
    --to=hch@infradead.org \
    --cc=axboe@kernel.dk \
    --cc=dgilbert@interlog.com \
    --cc=linux-scsi@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox