From: Tejun Heo <tj@kernel.org>
To: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org, linux-kernel@vger.kernel.org,
kernel-team@fb.com
Subject: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
Date: Mon, 2 Apr 2018 12:01:20 -0700 [thread overview]
Message-ID: <20180402190120.GD388343@devbig577.frc2.facebook.com> (raw)
In-Reply-To: <20180402190053.GC388343@devbig577.frc2.facebook.com>
When a request is handed over from normal execution to timeout, we
synchronize using ->aborted_gstate and RCU grace periods; however,
when a request is being returned from timeout handling to normal
execution for BLK_EH_RESET_TIMER, we were skipping the same
synchronization.
This means that it theoretically is possible for a returned request's
completion and recycling compete against the reordered and delayed
writes from timeout path.
This patch adds an equivalent synchronization when a request is
returned from timeout path to normal completion path.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Bart Van Assche <Bart.VanAssche@wdc.com>
---
block/blk-mq.c | 49 ++++++++++++++++++++++++++++++++++++++++---------
block/blk-timeout.c | 2 +-
include/linux/blkdev.h | 4 +++-
3 files changed, 44 insertions(+), 11 deletions(-)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -818,7 +818,8 @@ struct blk_mq_timeout_data {
unsigned int nr_expired;
};
-static void blk_mq_rq_timed_out(struct request *req, bool reserved)
+static void blk_mq_rq_timed_out(struct blk_mq_hw_ctx *hctx, struct request *req,
+ int *nr_resets, bool reserved)
{
const struct blk_mq_ops *ops = req->q->mq_ops;
enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
@@ -833,13 +834,10 @@ static void blk_mq_rq_timed_out(struct r
__blk_mq_complete_request(req);
break;
case BLK_EH_RESET_TIMER:
- /*
- * As nothing prevents from completion happening while
- * ->aborted_gstate is set, this may lead to ignored
- * completions and further spurious timeouts.
- */
- blk_mq_rq_update_aborted_gstate(req, 0);
blk_add_timer(req);
+ req->rq_flags |= RQF_MQ_TIMEOUT_RESET;
+ (*nr_resets)++;
+ hctx->need_sync_rcu = true;
break;
case BLK_EH_NOT_HANDLED:
break;
@@ -916,7 +914,26 @@ static void blk_mq_terminate_expired(str
*/
if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
READ_ONCE(rq->gstate) == rq->aborted_gstate)
- blk_mq_rq_timed_out(rq, reserved);
+ blk_mq_rq_timed_out(hctx, rq, priv, reserved);
+}
+
+static void blk_mq_finish_timeout_reset(struct blk_mq_hw_ctx *hctx,
+ struct request *rq, void *priv, bool reserved)
+{
+ /*
+ * @rq's timer reset has gone through rcu synchronization and is
+ * visible now. Allow normal completions again by resetting
+ * ->aborted_gstate. Don't clear RQF_MQ_TIMEOUT_RESET here as
+ * there's no memory ordering around ->aborted_gstate making it the
+ * only field safe to update. Let blk_add_timer() clear it later
+ * when the request is recycled or times out again.
+ *
+ * As nothing prevents from completion happening while
+ * ->aborted_gstate is set, this may lead to ignored completions
+ * and further spurious timeouts.
+ */
+ if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
+ blk_mq_rq_update_aborted_gstate(rq, 0);
}
static void blk_mq_timeout_work(struct work_struct *work)
@@ -951,6 +968,8 @@ static void blk_mq_timeout_work(struct w
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
if (data.nr_expired) {
+ int nr_resets = 0;
+
/*
* Wait till everyone sees ->aborted_gstate. The
* sequential waits for SRCUs aren't ideal. If this ever
@@ -960,7 +979,19 @@ static void blk_mq_timeout_work(struct w
blk_mq_timeout_sync_rcu(q);
/* terminate the ones we won */
- blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
+ blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired,
+ &nr_resets);
+
+ /*
+ * For BLK_EH_RESET_TIMER, release the requests after
+ * blk_add_timer() from above is visible to avoid timer
+ * reset racing against recycling.
+ */
+ if (nr_resets) {
+ blk_mq_timeout_sync_rcu(q);
+ blk_mq_queue_tag_busy_iter(q,
+ blk_mq_finish_timeout_reset, NULL);
+ }
}
if (data.next_set) {
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -216,7 +216,7 @@ void blk_add_timer(struct request *req)
req->timeout = q->rq_timeout;
blk_rq_set_deadline(req, jiffies + req->timeout);
- req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
+ req->rq_flags &= ~(RQF_MQ_TIMEOUT_EXPIRED | RQF_MQ_TIMEOUT_RESET);
/*
* Only the non-mq case needs to add the request to a protected list.
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -127,8 +127,10 @@ typedef __u32 __bitwise req_flags_t;
#define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19))
/* timeout is expired */
#define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20))
+/* timeout is expired */
+#define RQF_MQ_TIMEOUT_RESET ((__force req_flags_t)(1 << 21))
/* already slept for hybrid poll */
-#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21))
+#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 22))
/* flags that prevent us from merging requests: */
#define RQF_NOMERGE_FLAGS \
next prev parent reply other threads:[~2018-04-02 19:01 UTC|newest]
Thread overview: 12+ messages / expand[flat|nested] mbox.gz Atom feed top
2018-04-02 19:00 [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Tejun Heo
2018-04-02 19:01 ` Tejun Heo [this message]
2018-04-02 21:08 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Bart Van Assche
2018-04-02 21:10 ` Tejun Heo
2018-04-02 21:31 ` Bart Van Assche
2018-04-02 21:39 ` tj
2018-04-02 21:56 ` Bart Van Assche
2018-04-02 22:01 ` tj
2018-04-02 22:09 ` Bart Van Assche
2018-04-02 22:16 ` tj
2018-04-02 22:49 ` Bart Van Assche
2018-04-02 20:48 ` [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20180402190120.GD388343@devbig577.frc2.facebook.com \
--to=tj@kernel.org \
--cc=axboe@kernel.dk \
--cc=kernel-team@fb.com \
--cc=linux-block@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).