Re: [PATCH v4] blk-mq: Fix race conditions in request timeout handling

stable.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: "tj@kernel.org" <tj@kernel.org>
To: Bart Van Assche <Bart.VanAssche@wdc.com>
Cc: "ming.lei@redhat.com" <ming.lei@redhat.com>,
	"hch@lst.de" <hch@lst.de>,
	"maxg@mellanox.com" <maxg@mellanox.com>,
	"israelr@mellanox.com" <israelr@mellanox.com>,
	"linux-block@vger.kernel.org" <linux-block@vger.kernel.org>,
	"stable@vger.kernel.org" <stable@vger.kernel.org>,
	"sagi@grimberg.me" <sagi@grimberg.me>,
	"axboe@kernel.dk" <axboe@kernel.dk>
Subject: Re: [PATCH v4] blk-mq: Fix race conditions in request timeout handling
Date: Tue, 10 Apr 2018 14:54:56 -0700	[thread overview]
Message-ID: <20180410215456.GD793541@devbig577.frc2.facebook.com> (raw)
In-Reply-To: <d88becef654cde4b5382c1ebddb0aeba963c0578.camel@wdc.com>

On Tue, Apr 10, 2018 at 09:46:23PM +0000, Bart Van Assche wrote:
> On Tue, 2018-04-10 at 14:33 -0700, tj@kernel.org wrote:
> > +       else
> > +               rq->missed_completion = true;
> 
> In this patch I found code that sets rq->missed_completion but no code that
> clears it. Did I perhaps miss something?

Ah, yeah, I was moving it out of add_timer but forgot to actully add
it to the issue path.  Fixed patch below.

BTW, no matter what we do w/ the request handover between normal and
timeout paths, we'd need something similar.  Otherwise, while we can
reduce the window, we can't get rid of it.

Thanks.

---
 block/blk-mq.c         |   45 ++++++++++++++++++++++++++++++++-------------
 include/linux/blkdev.h |    2 ++
 2 files changed, 34 insertions(+), 13 deletions(-)

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -642,6 +642,8 @@ void blk_mq_complete_request(struct requ
 	hctx_lock(hctx, &srcu_idx);
 	if (blk_mq_rq_aborted_gstate(rq) != rq->gstate)
 		__blk_mq_complete_request(rq);
+	else
+		rq->missed_completion = true;
 	hctx_unlock(hctx, srcu_idx);
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
@@ -684,6 +686,7 @@ void blk_mq_start_request(struct request
 
 	blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
 	blk_add_timer(rq);
+	rq->missed_completion = false;
 
 	write_seqcount_end(&rq->gstate_seq);
 	preempt_enable();
@@ -881,7 +884,7 @@ static void blk_mq_check_expired(struct
 	}
 }
 
-static void blk_mq_timeout_sync_rcu(struct request_queue *q)
+static void blk_mq_timeout_sync_rcu(struct request_queue *q, bool clear)
 {
 	struct blk_mq_hw_ctx *hctx;
 	bool has_rcu = false;
@@ -896,7 +899,8 @@ static void blk_mq_timeout_sync_rcu(stru
 		else
 			synchronize_srcu(hctx->srcu);
 
-		hctx->need_sync_rcu = false;
+		if (clear)
+			hctx->need_sync_rcu = false;
 	}
 	if (has_rcu)
 		synchronize_rcu();
@@ -917,25 +921,37 @@ static void blk_mq_terminate_expired(str
 		blk_mq_rq_timed_out(hctx, rq, priv, reserved);
 }
 
-static void blk_mq_finish_timeout_reset(struct blk_mq_hw_ctx *hctx,
+static void blk_mq_timeout_reset_return(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, void *priv, bool reserved)
 {
 	/*
 	 * @rq's timer reset has gone through rcu synchronization and is
 	 * visible now.  Allow normal completions again by resetting
 	 * ->aborted_gstate.  Don't clear RQF_MQ_TIMEOUT_RESET here as
-	 * there's no memory ordering around ->aborted_gstate making it the
-	 * only field safe to update.  Let blk_add_timer() clear it later
-	 * when the request is recycled or times out again.
-	 *
-	 * As nothing prevents from completion happening while
-	 * ->aborted_gstate is set, this may lead to ignored completions
-	 * and further spurious timeouts.
+	 * blk_mq_timeout_reset_cleanup() needs it again and there's no
+	 * memory ordering around ->aborted_gstate making it the only field
+	 * safe to update.  Let blk_add_timer() clear it later when the
+	 * request is recycled or times out again.
 	 */
 	if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
 		blk_mq_rq_update_aborted_gstate(rq, 0);
 }
 
+static void blk_mq_timeout_reset_cleanup(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, void *priv, bool reserved)
+{
+	/*
+	 * @rq is now fully returned to the normal path.  If normal
+	 * completion raced timeout handling, execute the missed completion
+	 * here.  This is safe because 1. ->missed_completion can no longer
+	 * be asserted because nothing is timing out right now and 2. if
+	 * ->missed_completion is set, @rq is safe from recycling because
+	 * nobody could have completed it.
+	 */
+	if ((rq->rq_flags & RQF_MQ_TIMEOUT_RESET) && rq->missed_completion)
+		blk_mq_complete_request(rq);
+}
+
 static void blk_mq_timeout_work(struct work_struct *work)
 {
 	struct request_queue *q =
@@ -976,7 +992,7 @@ static void blk_mq_timeout_work(struct w
 		 * becomes a problem, we can add per-hw_ctx rcu_head and
 		 * wait in parallel.
 		 */
-		blk_mq_timeout_sync_rcu(q);
+		blk_mq_timeout_sync_rcu(q, true);
 
 		/* terminate the ones we won */
 		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired,
@@ -988,9 +1004,12 @@ static void blk_mq_timeout_work(struct w
 		 * reset racing against recycling.
 		 */
 		if (nr_resets) {
-			blk_mq_timeout_sync_rcu(q);
+			blk_mq_timeout_sync_rcu(q, false);
+			blk_mq_queue_tag_busy_iter(q,
+					blk_mq_timeout_reset_return, NULL);
+			blk_mq_timeout_sync_rcu(q, true);
 			blk_mq_queue_tag_busy_iter(q,
-					blk_mq_finish_timeout_reset, NULL);
+					blk_mq_timeout_reset_cleanup, NULL);
 		}
 	}
 
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -227,6 +227,8 @@ struct request {
 
 	unsigned int extra_len;	/* length of alignment and padding */
 
+	bool missed_completion;
+
 	/*
 	 * On blk-mq, the lower bits of ->gstate (generation number and
 	 * state) carry the MQ_RQ_* state value and the upper bits the

next prev parent reply	other threads:[~2018-04-10 21:55 UTC|newest]

Thread overview: 38+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-10  1:34 [PATCH v4] blk-mq: Fix race conditions in request timeout handling Bart Van Assche
2018-04-10  7:59 ` jianchao.wang
2018-04-10 10:04   ` Ming Lei
2018-04-10 12:04     ` Shan Hai
2018-04-10 13:01   ` Bart Van Assche
2018-04-10 14:32     ` jianchao.wang
2018-04-10  8:41 ` Ming Lei
2018-04-10 12:58   ` Bart Van Assche
2018-04-10 13:55     ` Ming Lei
2018-04-10 14:09       ` Bart Van Assche
2018-04-10 14:30         ` Ming Lei
2018-04-10 15:02           ` Bart Van Assche
2018-04-10 15:25             ` Ming Lei
2018-04-10 15:30               ` tj
2018-04-10 15:38                 ` Ming Lei
2018-04-10 15:40                   ` tj
2018-04-10 21:33                     ` tj
2018-04-10 21:46                       ` Bart Van Assche
2018-04-10 21:54                         ` tj [this message]
2018-04-11 12:50                           ` Bart Van Assche
2018-04-11 14:16                             ` tj
2018-04-11 18:38                             ` Martin Steigerwald
2018-04-11 14:24                           ` Sagi Grimberg
2018-04-11 14:43                             ` tj
2018-04-11 16:16                             ` Israel Rukshin
2018-04-11 17:07                               ` tj
2018-04-11 21:31                                 ` tj
2018-04-12  8:59                                   ` Israel Rukshin
2018-04-12 13:35                                     ` tj
2018-04-15 12:28                                       ` Israel Rukshin
2018-04-18 16:34                           ` Bart Van Assche
2018-04-10  9:55 ` Christoph Hellwig
2018-04-10 13:26   ` Bart Van Assche
2018-04-10 14:50     ` hch
2018-04-10 14:41   ` Jens Axboe
2018-04-10 14:20 ` Tejun Heo
2018-04-10 14:30   ` Bart Van Assche
2018-04-10 14:33     ` tj

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20180410215456.GD793541@devbig577.frc2.facebook.com \
    --to=tj@kernel.org \
    --cc=Bart.VanAssche@wdc.com \
    --cc=axboe@kernel.dk \
    --cc=hch@lst.de \
    --cc=israelr@mellanox.com \
    --cc=linux-block@vger.kernel.org \
    --cc=maxg@mellanox.com \
    --cc=ming.lei@redhat.com \
    --cc=sagi@grimberg.me \
    --cc=stable@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).