[PATCH 1/2] blk-mq: Factor out [s]rcu synchronization

All of lore.kernel.org
 help / color / mirror / Atom feed

* [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization
@ 2018-04-02 19:00 Tejun Heo
  2018-04-02 19:01 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Tejun Heo
  2018-04-02 20:48 ` [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Bart Van Assche
  0 siblings, 2 replies; 16+ messages in thread
From: Tejun Heo @ 2018-04-02 19:00 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, kernel-team

Factor out [s]rcu synchronization in blk_mq_timeout_work() into
blk_mq_timeout_sync_rcu().  This is to add another user in the future
and doesn't cause any functional changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Bart Van Assche <Bart.VanAssche@wdc.com>
---
Hello,

We were tracking this down in the following thread

  http://lkml.kernel.org/r/20180207011133.25957-1-bart.vanassche@wdc.com

but lost the reproducer in the middle and couldn't fully verify these
two patches fix the problem; however, the identified race is real and
a bug, so I think it'd be best to apply these two patches.

Given the lack of further reports on this front, I don't think it's
necessary to rush these patches.  I think it'd be best to apply these
once the merge window closes.  If we need to backport these to
-stable, we can tag them later.

Thanks.

 block/blk-mq.c         |   39 +++++++++++++++++++++++----------------
 include/linux/blk-mq.h |    2 +-
 2 files changed, 24 insertions(+), 17 deletions(-)

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -876,13 +876,34 @@ static void blk_mq_check_expired(struct
 	    time_after_eq(jiffies, deadline)) {
 		blk_mq_rq_update_aborted_gstate(rq, gstate);
 		data->nr_expired++;
-		hctx->nr_expired++;
+		hctx->need_sync_rcu = true;
 	} else if (!data->next_set || time_after(data->next, deadline)) {
 		data->next = deadline;
 		data->next_set = 1;
 	}
 }
 
+static void blk_mq_timeout_sync_rcu(struct request_queue *q)
+{
+	struct blk_mq_hw_ctx *hctx;
+	bool has_rcu = false;
+	int i;
+
+	queue_for_each_hw_ctx(q, hctx, i) {
+		if (!hctx->need_sync_rcu)
+			continue;
+
+		if (!(hctx->flags & BLK_MQ_F_BLOCKING))
+			has_rcu = true;
+		else
+			synchronize_srcu(hctx->srcu);
+
+		hctx->need_sync_rcu = false;
+	}
+	if (has_rcu)
+		synchronize_rcu();
+}
+
 static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
 		struct request *rq, void *priv, bool reserved)
 {
@@ -930,27 +951,13 @@ static void blk_mq_timeout_work(struct w
 	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
 
 	if (data.nr_expired) {
-		bool has_rcu = false;
-
 		/*
 		 * Wait till everyone sees ->aborted_gstate.  The
 		 * sequential waits for SRCUs aren't ideal.  If this ever
 		 * becomes a problem, we can add per-hw_ctx rcu_head and
 		 * wait in parallel.
 		 */
-		queue_for_each_hw_ctx(q, hctx, i) {
-			if (!hctx->nr_expired)
-				continue;
-
-			if (!(hctx->flags & BLK_MQ_F_BLOCKING))
-				has_rcu = true;
-			else
-				synchronize_srcu(hctx->srcu);
-
-			hctx->nr_expired = 0;
-		}
-		if (has_rcu)
-			synchronize_rcu();
+		blk_mq_timeout_sync_rcu(q);
 
 		/* terminate the ones we won */
 		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -51,7 +51,7 @@ struct blk_mq_hw_ctx {
 	unsigned int		queue_num;
 
 	atomic_t		nr_active;
-	unsigned int		nr_expired;
+	bool			need_sync_rcu;
 
 	struct hlist_node	cpuhp_dead;
 	struct kobject		kobj;

^ permalink raw reply	[flat|nested] 16+ messages in thread

* [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 19:00 [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Tejun Heo
@ 2018-04-02 19:01 ` Tejun Heo
  2018-04-02 21:08   ` Bart Van Assche
  2018-04-02 20:48 ` [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Bart Van Assche
  1 sibling, 1 reply; 16+ messages in thread
From: Tejun Heo @ 2018-04-02 19:01 UTC (permalink / raw)
  To: Jens Axboe; +Cc: linux-block, linux-kernel, kernel-team

When a request is handed over from normal execution to timeout, we
synchronize using ->aborted_gstate and RCU grace periods; however,
when a request is being returned from timeout handling to normal
execution for BLK_EH_RESET_TIMER, we were skipping the same
synchronization.

This means that it theoretically is possible for a returned request's
completion and recycling compete against the reordered and delayed
writes from timeout path.

This patch adds an equivalent synchronization when a request is
returned from timeout path to normal completion path.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Bart Van Assche <Bart.VanAssche@wdc.com>
---
 block/blk-mq.c         |   49 ++++++++++++++++++++++++++++++++++++++++---------
 block/blk-timeout.c    |    2 +-
 include/linux/blkdev.h |    4 +++-
 3 files changed, 44 insertions(+), 11 deletions(-)

--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -818,7 +818,8 @@ struct blk_mq_timeout_data {
 	unsigned int nr_expired;
 };
 
-static void blk_mq_rq_timed_out(struct request *req, bool reserved)
+static void blk_mq_rq_timed_out(struct blk_mq_hw_ctx *hctx, struct request *req,
+				int *nr_resets, bool reserved)
 {
 	const struct blk_mq_ops *ops = req->q->mq_ops;
 	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
@@ -833,13 +834,10 @@ static void blk_mq_rq_timed_out(struct r
 		__blk_mq_complete_request(req);
 		break;
 	case BLK_EH_RESET_TIMER:
-		/*
-		 * As nothing prevents from completion happening while
-		 * ->aborted_gstate is set, this may lead to ignored
-		 * completions and further spurious timeouts.
-		 */
-		blk_mq_rq_update_aborted_gstate(req, 0);
 		blk_add_timer(req);
+		req->rq_flags |= RQF_MQ_TIMEOUT_RESET;
+		(*nr_resets)++;
+		hctx->need_sync_rcu = true;
 		break;
 	case BLK_EH_NOT_HANDLED:
 		break;
@@ -916,7 +914,26 @@ static void blk_mq_terminate_expired(str
 	 */
 	if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) &&
 	    READ_ONCE(rq->gstate) == rq->aborted_gstate)
-		blk_mq_rq_timed_out(rq, reserved);
+		blk_mq_rq_timed_out(hctx, rq, priv, reserved);
+}
+
+static void blk_mq_finish_timeout_reset(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, void *priv, bool reserved)
+{
+	/*
+	 * @rq's timer reset has gone through rcu synchronization and is
+	 * visible now.  Allow normal completions again by resetting
+	 * ->aborted_gstate.  Don't clear RQF_MQ_TIMEOUT_RESET here as
+	 * there's no memory ordering around ->aborted_gstate making it the
+	 * only field safe to update.  Let blk_add_timer() clear it later
+	 * when the request is recycled or times out again.
+	 *
+	 * As nothing prevents from completion happening while
+	 * ->aborted_gstate is set, this may lead to ignored completions
+	 * and further spurious timeouts.
+	 */
+	if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
+		blk_mq_rq_update_aborted_gstate(rq, 0);
 }
 
 static void blk_mq_timeout_work(struct work_struct *work)
@@ -951,6 +968,8 @@ static void blk_mq_timeout_work(struct w
 	blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
 
 	if (data.nr_expired) {
+		int nr_resets = 0;
+
 		/*
 		 * Wait till everyone sees ->aborted_gstate.  The
 		 * sequential waits for SRCUs aren't ideal.  If this ever
@@ -960,7 +979,19 @@ static void blk_mq_timeout_work(struct w
 		blk_mq_timeout_sync_rcu(q);
 
 		/* terminate the ones we won */
-		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
+		blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired,
+					   &nr_resets);
+
+		/*
+		 * For BLK_EH_RESET_TIMER, release the requests after
+		 * blk_add_timer() from above is visible to avoid timer
+		 * reset racing against recycling.
+		 */
+		if (nr_resets) {
+			blk_mq_timeout_sync_rcu(q);
+			blk_mq_queue_tag_busy_iter(q,
+					blk_mq_finish_timeout_reset, NULL);
+		}
 	}
 
 	if (data.next_set) {
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -216,7 +216,7 @@ void blk_add_timer(struct request *req)
 		req->timeout = q->rq_timeout;
 
 	blk_rq_set_deadline(req, jiffies + req->timeout);
-	req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED;
+	req->rq_flags &= ~(RQF_MQ_TIMEOUT_EXPIRED | RQF_MQ_TIMEOUT_RESET);
 
 	/*
 	 * Only the non-mq case needs to add the request to a protected list.
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -127,8 +127,10 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_ZONE_WRITE_LOCKED	((__force req_flags_t)(1 << 19))
 /* timeout is expired */
 #define RQF_MQ_TIMEOUT_EXPIRED	((__force req_flags_t)(1 << 20))
+/* timeout is expired */
+#define RQF_MQ_TIMEOUT_RESET	((__force req_flags_t)(1 << 21))
 /* already slept for hybrid poll */
-#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 21))
+#define RQF_MQ_POLL_SLEPT	((__force req_flags_t)(1 << 22))
 
 /* flags that prevent us from merging requests: */
 #define RQF_NOMERGE_FLAGS \

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 19:01 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Tejun Heo
@ 2018-04-02 21:08   ` Bart Van Assche
  2018-04-02 21:10     ` Tejun Heo
  0 siblings, 1 reply; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 21:08 UTC (permalink / raw)
  To: Tejun Heo, Jens Axboe; +Cc: linux-block, linux-kernel, kernel-team

On 04/02/18 12:01, Tejun Heo wrote:
> +	 * As nothing prevents from completion happening while
> +	 * ->aborted_gstate is set, this may lead to ignored completions
> +	 * and further spurious timeouts.
> +	 */
> +	if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
> +		blk_mq_rq_update_aborted_gstate(rq, 0);

Hello Tejun,

Since this patch fixes one race but introduces another race, is this 
patch really an improvement?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 21:08   ` Bart Van Assche
@ 2018-04-02 21:10     ` Tejun Heo
  2018-04-02 21:31         ` Bart Van Assche
  2018-04-02 21:56         ` Bart Van Assche
  0 siblings, 2 replies; 16+ messages in thread
From: Tejun Heo @ 2018-04-02 21:10 UTC (permalink / raw)
  To: Bart Van Assche; +Cc: Jens Axboe, linux-block, linux-kernel, kernel-team

On Mon, Apr 02, 2018 at 02:08:37PM -0700, Bart Van Assche wrote:
> On 04/02/18 12:01, Tejun Heo wrote:
> >+	 * As nothing prevents from completion happening while
> >+	 * ->aborted_gstate is set, this may lead to ignored completions
> >+	 * and further spurious timeouts.
> >+	 */
> >+	if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
> >+		blk_mq_rq_update_aborted_gstate(rq, 0);
> 
> Hello Tejun,
> 
> Since this patch fixes one race but introduces another race, is this
> patch really an improvement?

Oh, that's not a new race.  That's the same non-critical race which
always existed.  It's just being documented.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 21:10     ` Tejun Heo
@ 2018-04-02 21:31         ` Bart Van Assche
  2018-04-02 21:56         ` Bart Van Assche
  1 sibling, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 21:31 UTC (permalink / raw)
  To: tj@kernel.org
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

T24gTW9uLCAyMDE4LTA0LTAyIGF0IDE0OjEwIC0wNzAwLCBUZWp1biBIZW8gd3JvdGU6DQo+IE9u
IE1vbiwgQXByIDAyLCAyMDE4IGF0IDAyOjA4OjM3UE0gLTA3MDAsIEJhcnQgVmFuIEFzc2NoZSB3
cm90ZToNCj4gPiBPbiAwNC8wMi8xOCAxMjowMSwgVGVqdW4gSGVvIHdyb3RlOg0KPiA+ID4gKwkg
KiBBcyBub3RoaW5nIHByZXZlbnRzIGZyb20gY29tcGxldGlvbiBoYXBwZW5pbmcgd2hpbGUNCj4g
PiA+ICsJICogLT5hYm9ydGVkX2dzdGF0ZSBpcyBzZXQsIHRoaXMgbWF5IGxlYWQgdG8gaWdub3Jl
ZCBjb21wbGV0aW9ucw0KPiA+ID4gKwkgKiBhbmQgZnVydGhlciBzcHVyaW91cyB0aW1lb3V0cy4N
Cj4gPiA+ICsJICovDQo+ID4gPiArCWlmIChycS0+cnFfZmxhZ3MgJiBSUUZfTVFfVElNRU9VVF9S
RVNFVCkNCj4gPiA+ICsJCWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocnEsIDApOw0K
PiA+IA0KPiA+IEhlbGxvIFRlanVuLA0KPiA+IA0KPiA+IFNpbmNlIHRoaXMgcGF0Y2ggZml4ZXMg
b25lIHJhY2UgYnV0IGludHJvZHVjZXMgYW5vdGhlciByYWNlLCBpcyB0aGlzDQo+ID4gcGF0Y2gg
cmVhbGx5IGFuIGltcHJvdmVtZW50Pw0KPiANCj4gT2gsIHRoYXQncyBub3QgYSBuZXcgcmFjZS4g
IFRoYXQncyB0aGUgc2FtZSBub24tY3JpdGljYWwgcmFjZSB3aGljaA0KPiBhbHdheXMgZXhpc3Rl
ZC4gIEl0J3MganVzdCBiZWluZyBkb2N1bWVudGVkLg0KDQpIZWxsbyBUZWp1biwNCg0KSSB0aGlu
ayBpdCBjYW4gaGFwcGVuIHRoYXQgdGhlIGFib3ZlIGNvZGUgc2VlcyB0aGF0IChycS0+cnFfZmxh
Z3MgJg0KUlFGX01RX1RJTUVPVVRfUkVTRVQpICE9IDAsIHRoYXQgYmxrX21xX3N0YXJ0X3JlcXVl
c3QoKSBleGVjdXRlcyB0aGUNCmZvbGxvd2luZyBjb2RlOg0KDQoJYmxrX21xX3JxX3VwZGF0ZV9z
dGF0ZShycSwgTVFfUlFfSU5fRkxJR0hUKTsNCglibGtfYWRkX3RpbWVyKHJxKTsNCg0KYW5kIHRo
YXQgc3Vic2VxdWVudGx5IGJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocnEsIDApIGlz
IGNhbGxlZCwNCndoaWNoIHdpbGwgY2F1c2UgdGhlIG5leHQgY29tcGxldGlvbiB0byBiZSBsb3N0
LiBJcyBmaXhpbmcgb25lIG9jY3VycmVuY2UNCm9mIGEgcmFjZSBhbmQgcmVpbnRyb2R1Y2luZyBp
dCBpbiBhbm90aGVyIGNvZGUgcGF0aCByZWFsbHkgYW4gaW1wcm92ZW1lbnQ/DQoNClRoYW5rcywN
Cg0KQmFydC4NCg0KDQoNCg==

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
@ 2018-04-02 21:31         ` Bart Van Assche
  0 siblings, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 21:31 UTC (permalink / raw)
  To: tj@kernel.org
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

On Mon, 2018-04-02 at 14:10 -0700, Tejun Heo wrote:
> On Mon, Apr 02, 2018 at 02:08:37PM -0700, Bart Van Assche wrote:
> > On 04/02/18 12:01, Tejun Heo wrote:
> > > +	 * As nothing prevents from completion happening while
> > > +	 * ->aborted_gstate is set, this may lead to ignored completions
> > > +	 * and further spurious timeouts.
> > > +	 */
> > > +	if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
> > > +		blk_mq_rq_update_aborted_gstate(rq, 0);
> > 
> > Hello Tejun,
> > 
> > Since this patch fixes one race but introduces another race, is this
> > patch really an improvement?
> 
> Oh, that's not a new race.  That's the same non-critical race which
> always existed.  It's just being documented.

Hello Tejun,

I think it can happen that the above code sees that (rq->rq_flags &
RQF_MQ_TIMEOUT_RESET) != 0, that blk_mq_start_request() executes the
following code:

	blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
	blk_add_timer(rq);

and that subsequently blk_mq_rq_update_aborted_gstate(rq, 0) is called,
which will cause the next completion to be lost. Is fixing one occurrence
of a race and reintroducing it in another code path really an improvement?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 21:31         ` Bart Van Assche
  (?)
@ 2018-04-02 21:39         ` tj
  -1 siblings, 0 replies; 16+ messages in thread
From: tj @ 2018-04-02 21:39 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

Hello,

On Mon, Apr 02, 2018 at 09:31:34PM +0000, Bart Van Assche wrote:
> > > > +	 * As nothing prevents from completion happening while
> > > > +	 * ->aborted_gstate is set, this may lead to ignored completions
> > > > +	 * and further spurious timeouts.
> > > > +	 */
> > > > +	if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
> > > > +		blk_mq_rq_update_aborted_gstate(rq, 0);
...
> I think it can happen that the above code sees that (rq->rq_flags &
> RQF_MQ_TIMEOUT_RESET) != 0, that blk_mq_start_request() executes the
> following code:
> 
> 	blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT);
> 	blk_add_timer(rq);
> 
> and that subsequently blk_mq_rq_update_aborted_gstate(rq, 0) is called,
> which will cause the next completion to be lost. Is fixing one occurrence
> of a race and reintroducing it in another code path really an improvement?

I'm not following at all.  How would blk_mq_start_request() get called
on the request while it's still owned by the timeout handler?  That
gstate clearing is what transfers the ownership back to the
non-timeout path.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 21:10     ` Tejun Heo
@ 2018-04-02 21:56         ` Bart Van Assche
  2018-04-02 21:56         ` Bart Van Assche
  1 sibling, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 21:56 UTC (permalink / raw)
  To: tj@kernel.org
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

T24gTW9uLCAyMDE4LTA0LTAyIGF0IDE0OjEwIC0wNzAwLCBUZWp1biBIZW8gd3JvdGU6DQo+IE9u
IE1vbiwgQXByIDAyLCAyMDE4IGF0IDAyOjA4OjM3UE0gLTA3MDAsIEJhcnQgVmFuIEFzc2NoZSB3
cm90ZToNCj4gPiBPbiAwNC8wMi8xOCAxMjowMSwgVGVqdW4gSGVvIHdyb3RlOg0KPiA+ID4gKwkg
KiBBcyBub3RoaW5nIHByZXZlbnRzIGZyb20gY29tcGxldGlvbiBoYXBwZW5pbmcgd2hpbGUNCj4g
PiA+ICsJICogLT5hYm9ydGVkX2dzdGF0ZSBpcyBzZXQsIHRoaXMgbWF5IGxlYWQgdG8gaWdub3Jl
ZCBjb21wbGV0aW9ucw0KPiA+ID4gKwkgKiBhbmQgZnVydGhlciBzcHVyaW91cyB0aW1lb3V0cy4N
Cj4gPiA+ICsJICovDQo+ID4gPiArCWlmIChycS0+cnFfZmxhZ3MgJiBSUUZfTVFfVElNRU9VVF9S
RVNFVCkNCj4gPiA+ICsJCWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocnEsIDApOw0K
PiA+IA0KPiA+IEhlbGxvIFRlanVuLA0KPiA+IA0KPiA+IFNpbmNlIHRoaXMgcGF0Y2ggZml4ZXMg
b25lIHJhY2UgYnV0IGludHJvZHVjZXMgYW5vdGhlciByYWNlLCBpcyB0aGlzDQo+ID4gcGF0Y2gg
cmVhbGx5IGFuIGltcHJvdmVtZW50Pw0KPiANCj4gT2gsIHRoYXQncyBub3QgYSBuZXcgcmFjZS4g
IFRoYXQncyB0aGUgc2FtZSBub24tY3JpdGljYWwgcmFjZSB3aGljaA0KPiBhbHdheXMgZXhpc3Rl
ZC4gIEl0J3MganVzdCBiZWluZyBkb2N1bWVudGVkLg0KDQpUaGlzIHBhdGNoIGluY3JlYXNlcyB0
aGUgdGltZSBkdXJpbmcgd2hpY2ggLmFib3J0ZWRfZ3N0YXRlID09IC5nc3RhdGUgaWYgdGhlDQp0
aW1lb3V0IGlzIHJlc2V0LiBEb2VzIHRoYXQgaW5jcmVhc2UgdGhlIGNoYW5jZSB0aGF0IGEgY29t
cGxldGlvbiB3aWxsIGJlIG1pc3NlZA0KaWYgdGhlIHJlcXVlc3QgdGltZW91dCBpcyByZXNldD8N
Cg0KVGhhbmtzLA0KDQpCYXJ0Lg0KDQoNCg==

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
@ 2018-04-02 21:56         ` Bart Van Assche
  0 siblings, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 21:56 UTC (permalink / raw)
  To: tj@kernel.org
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

On Mon, 2018-04-02 at 14:10 -0700, Tejun Heo wrote:
> On Mon, Apr 02, 2018 at 02:08:37PM -0700, Bart Van Assche wrote:
> > On 04/02/18 12:01, Tejun Heo wrote:
> > > +	 * As nothing prevents from completion happening while
> > > +	 * ->aborted_gstate is set, this may lead to ignored completions
> > > +	 * and further spurious timeouts.
> > > +	 */
> > > +	if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET)
> > > +		blk_mq_rq_update_aborted_gstate(rq, 0);
> > 
> > Hello Tejun,
> > 
> > Since this patch fixes one race but introduces another race, is this
> > patch really an improvement?
> 
> Oh, that's not a new race.  That's the same non-critical race which
> always existed.  It's just being documented.

This patch increases the time during which .aborted_gstate == .gstate if the
timeout is reset. Does that increase the chance that a completion will be missed
if the request timeout is reset?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 21:56         ` Bart Van Assche
  (?)
@ 2018-04-02 22:01         ` tj
  2018-04-02 22:09             ` Bart Van Assche
  -1 siblings, 1 reply; 16+ messages in thread
From: tj @ 2018-04-02 22:01 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

Hello,

On Mon, Apr 02, 2018 at 09:56:41PM +0000, Bart Van Assche wrote:
> This patch increases the time during which .aborted_gstate == .gstate if the
> timeout is reset. Does that increase the chance that a completion will be missed
> if the request timeout is reset?

It sure does, but we're comparing an outright kernel bug vs. an
inherently opportunistic mechanism being a bit more lossy.  I think
the answer is pretty clear.

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 22:01         ` tj
@ 2018-04-02 22:09             ` Bart Van Assche
  0 siblings, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 22:09 UTC (permalink / raw)
  To: tj@kernel.org
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

T24gTW9uLCAyMDE4LTA0LTAyIGF0IDE1OjAxIC0wNzAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBPbiBNb24sIEFwciAwMiwgMjAxOCBhdCAwOTo1Njo0MVBNICswMDAwLCBCYXJ0IFZhbiBBc3Nj
aGUgd3JvdGU6DQo+ID4gVGhpcyBwYXRjaCBpbmNyZWFzZXMgdGhlIHRpbWUgZHVyaW5nIHdoaWNo
IC5hYm9ydGVkX2dzdGF0ZSA9PSAuZ3N0YXRlIGlmIHRoZQ0KPiA+IHRpbWVvdXQgaXMgcmVzZXQu
IERvZXMgdGhhdCBpbmNyZWFzZSB0aGUgY2hhbmNlIHRoYXQgYSBjb21wbGV0aW9uIHdpbGwgYmUg
bWlzc2VkDQo+ID4gaWYgdGhlIHJlcXVlc3QgdGltZW91dCBpcyByZXNldD8NCj4gDQo+IEl0IHN1
cmUgZG9lcywgYnV0IHdlJ3JlIGNvbXBhcmluZyBhbiBvdXRyaWdodCBrZXJuZWwgYnVnIHZzLiBh
bg0KPiBpbmhlcmVudGx5IG9wcG9ydHVuaXN0aWMgbWVjaGFuaXNtIGJlaW5nIGEgYml0IG1vcmUg
bG9zc3kuICBJIHRoaW5rDQo+IHRoZSBhbnN3ZXIgaXMgcHJldHR5IGNsZWFyLg0KDQpIZWxsbyBU
ZWp1biwNCg0KUGxlYXNlIGVsYWJvcmF0ZSB3aGF0IHlvdXIgbG9uZy10ZXJtIGdvYWwgaXMgZm9y
IHRoZSBibGstbXEgdGltZW91dCBoYW5kbGVyLg0KVGhlIGxlZ2FjeSBibG9jayBsYXllciBzdXNw
ZW5kcyByZXF1ZXN0IHN0YXRlIGNoYW5nZXMgd2hpbGUgYSB0aW1lb3V0IGlzDQpiZWluZyBwcm9j
ZXNzZWQgYnkgaG9sZGluZyB0aGUgcmVxdWVzdCBxdWV1ZSBsb2NrIHdoaWxlIHJlcXVlc3RzIGFy
ZSBiZWluZw0KcHJvY2Vzc2VkLCB3aGlsZSBwcm9jZXNzaW5nIGEgdGltZW91dCBhbmQgd2hpbGUg
Y2FsbGluZyBxLT5ycV90aW1lZF9vdXRfZm4ocnEpLg0KRG8geW91IHRoaW5rIGl0IGlzIHBvc3Np
YmxlIHRvIG1ha2UgdGhlIGJsay1tcSBjb3JlIHN1c3BlbmQgcmVxdWVzdCBwcm9jZXNzaW5nDQp3
aGlsZSBwcm9jZXNzaW5nIGEgdGltZW91dCB3aXRob3V0IGludHJvZHVjaW5nIGxvY2tpbmcgaW4N
CmJsa19tcV9jb21wbGV0ZV9yZXF1ZXN0KCk/IElmIHlvdSBkbyBub3QgcGxhbiB0byBhZGQgbG9j
a2luZyBpbg0KYmxrX21xX2NvbXBsZXRlX3JlcXVlc3QoKSwgZG8geW91IHRoaW5rIGl0IGlzIHBv
c3NpYmxlIHRvIGZpeCBhbGwgdGhlIHJhY2VzIHdlDQpkaXNjdXNzZWQgaW4gcHJldmlvdXMgZS1t
YWlscz8NCg0KVGhhbmtzLA0KDQpCYXJ0Lg0KDQoNCg==

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
@ 2018-04-02 22:09             ` Bart Van Assche
  0 siblings, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 22:09 UTC (permalink / raw)
  To: tj@kernel.org
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

On Mon, 2018-04-02 at 15:01 -0700, tj@kernel.org wrote:
> On Mon, Apr 02, 2018 at 09:56:41PM +0000, Bart Van Assche wrote:
> > This patch increases the time during which .aborted_gstate == .gstate if the
> > timeout is reset. Does that increase the chance that a completion will be missed
> > if the request timeout is reset?
> 
> It sure does, but we're comparing an outright kernel bug vs. an
> inherently opportunistic mechanism being a bit more lossy.  I think
> the answer is pretty clear.

Hello Tejun,

Please elaborate what your long-term goal is for the blk-mq timeout handler.
The legacy block layer suspends request state changes while a timeout is
being processed by holding the request queue lock while requests are being
processed, while processing a timeout and while calling q->rq_timed_out_fn(rq).
Do you think it is possible to make the blk-mq core suspend request processing
while processing a timeout without introducing locking in
blk_mq_complete_request()? If you do not plan to add locking in
blk_mq_complete_request(), do you think it is possible to fix all the races we
discussed in previous e-mails?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 22:09             ` Bart Van Assche
  (?)
@ 2018-04-02 22:16             ` tj
  2018-04-02 22:49                 ` Bart Van Assche
  -1 siblings, 1 reply; 16+ messages in thread
From: tj @ 2018-04-02 22:16 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

Hello,

On Mon, Apr 02, 2018 at 10:09:18PM +0000, Bart Van Assche wrote:
> Please elaborate what your long-term goal is for the blk-mq timeout handler.

Hmm... I don't really have any plans beyond what's been posted.

> The legacy block layer suspends request state changes while a timeout is
> being processed by holding the request queue lock while requests are being
> processed, while processing a timeout and while calling q->rq_timed_out_fn(rq).
> Do you think it is possible to make the blk-mq core suspend request processing
> while processing a timeout without introducing locking in
> blk_mq_complete_request()? If you do not plan to add locking in
> blk_mq_complete_request(), do you think it is possible to fix all the races we
> discussed in previous e-mails?

I don't know of multiple race conditions.  What am I missing?  AFAIK,
there's one non-critical race condition which has always been there.
We have a larger race window for that case but don't yet know whether
that's problematic or not.  If that actually is problematic, we can
figure out a way to solve that but such effort / added complexity
doesn't seem justified yet.  No?

Thanks.

-- 
tejun

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
  2018-04-02 22:16             ` tj
@ 2018-04-02 22:49                 ` Bart Van Assche
  0 siblings, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 22:49 UTC (permalink / raw)
  To: tj@kernel.org
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

T24gTW9uLCAyMDE4LTA0LTAyIGF0IDE1OjE2IC0wNzAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K
PiBBRkFJSywNCj4gdGhlcmUncyBvbmUgbm9uLWNyaXRpY2FsIHJhY2UgY29uZGl0aW9uIHdoaWNo
IGhhcyBhbHdheXMgYmVlbiB0aGVyZS4NCj4gV2UgaGF2ZSBhIGxhcmdlciByYWNlIHdpbmRvdyBm
b3IgdGhhdCBjYXNlIGJ1dCBkb24ndCB5ZXQga25vdyB3aGV0aGVyDQo+IHRoYXQncyBwcm9ibGVt
YXRpYyBvciBub3QuICBJZiB0aGF0IGFjdHVhbGx5IGlzIHByb2JsZW1hdGljLCB3ZSBjYW4NCj4g
ZmlndXJlIG91dCBhIHdheSB0byBzb2x2ZSB0aGF0IGJ1dCBzdWNoIGVmZm9ydCAvIGFkZGVkIGNv
bXBsZXhpdHkNCj4gZG9lc24ndCBzZWVtIGp1c3RpZmllZCB5ZXQuICBObz8NCg0KSGVsbG8gVGVq
dW4sDQoNClNvbWUgaW1wb3J0YW50IGJsb2NrIGRyaXZlcnMgc3lzdGVtYXRpY2FsbHkgcmV0dXJu
IEJMS19FSF9SRVNFVF9USU1FUg0KZnJvbSB0aGVpciB0aW1lb3V0IGhhbmRsZXIsIGUuZy4gdGhl
IHZpcnRpby1zY3NpIGluaXRpYXRvciBkcml2ZXIuIFdoYXQNCndpbGwgdGhlIGNvbnNlcXVlbmNl
IGJlIGZvciB0aGVzZSBkcml2ZXJzIGlmIGEgYmxrX21xX2NvbXBsZXRlX3JlcXVlc3QoKQ0KY2Fs
bCBpcyBpZ25vcmVkPyBXaWxsIHRoZSByZXF1ZXN0IGZvciB3aGljaCB0aGUgY29tcGxldGlvbiB3
YXMgaWdub3JlZA0KZ2V0IHN0dWNrPw0KDQpUaGFua3MsDQoNCkJhcnQuDQoNCg0KDQo=

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution
@ 2018-04-02 22:49                 ` Bart Van Assche
  0 siblings, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 22:49 UTC (permalink / raw)
  To: tj@kernel.org
  Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org,
	linux-block@vger.kernel.org, axboe@kernel.dk

On Mon, 2018-04-02 at 15:16 -0700, tj@kernel.org wrote:
> AFAIK,
> there's one non-critical race condition which has always been there.
> We have a larger race window for that case but don't yet know whether
> that's problematic or not.  If that actually is problematic, we can
> figure out a way to solve that but such effort / added complexity
> doesn't seem justified yet.  No?

Hello Tejun,

Some important block drivers systematically return BLK_EH_RESET_TIMER
from their timeout handler, e.g. the virtio-scsi initiator driver. What
will the consequence be for these drivers if a blk_mq_complete_request()
call is ignored? Will the request for which the completion was ignored
get stuck?

Thanks,

Bart.

^ permalink raw reply	[flat|nested] 16+ messages in thread

* Re: [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization
  2018-04-02 19:00 [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Tejun Heo
  2018-04-02 19:01 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Tejun Heo
@ 2018-04-02 20:48 ` Bart Van Assche
  1 sibling, 0 replies; 16+ messages in thread
From: Bart Van Assche @ 2018-04-02 20:48 UTC (permalink / raw)
  To: Tejun Heo, Jens Axboe; +Cc: linux-block, linux-kernel, kernel-team

On 04/02/18 12:00, Tejun Heo wrote:
> Factor out [s]rcu synchronization in blk_mq_timeout_work() into
> blk_mq_timeout_sync_rcu().  This is to add another user in the future
> and doesn't cause any functional changes.

Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com>

^ permalink raw reply	[flat|nested] 16+ messages in thread

end of thread, other threads:[~2018-04-02 22:49 UTC | newest]

Thread overview: 16+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-04-02 19:00 [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Tejun Heo
2018-04-02 19:01 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Tejun Heo
2018-04-02 21:08   ` Bart Van Assche
2018-04-02 21:10     ` Tejun Heo
2018-04-02 21:31       ` Bart Van Assche
2018-04-02 21:31         ` Bart Van Assche
2018-04-02 21:39         ` tj
2018-04-02 21:56       ` Bart Van Assche
2018-04-02 21:56         ` Bart Van Assche
2018-04-02 22:01         ` tj
2018-04-02 22:09           ` Bart Van Assche
2018-04-02 22:09             ` Bart Van Assche
2018-04-02 22:16             ` tj
2018-04-02 22:49               ` Bart Van Assche
2018-04-02 22:49                 ` Bart Van Assche
2018-04-02 20:48 ` [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Bart Van Assche

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.