* [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization
@ 2018-04-02 19:00 Tejun Heo
2018-04-02 19:01 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Tejun Heo
2018-04-02 20:48 ` [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Bart Van Assche
0 siblings, 2 replies; 16+ messages in thread
From: Tejun Heo @ 2018-04-02 19:00 UTC (permalink / raw)
To: Jens Axboe; +Cc: linux-block, linux-kernel, kernel-team
Factor out [s]rcu synchronization in blk_mq_timeout_work() into
blk_mq_timeout_sync_rcu(). This is to add another user in the future
and doesn't cause any functional changes.
Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Bart Van Assche <Bart.VanAssche@wdc.com>
---
Hello,
We were tracking this down in the following thread
http://lkml.kernel.org/r/20180207011133.25957-1-bart.vanassche@wdc.com
but lost the reproducer in the middle and couldn't fully verify these
two patches fix the problem; however, the identified race is real and
a bug, so I think it'd be best to apply these two patches.
Given the lack of further reports on this front, I don't think it's
necessary to rush these patches. I think it'd be best to apply these
once the merge window closes. If we need to backport these to
-stable, we can tag them later.
Thanks.
block/blk-mq.c | 39 +++++++++++++++++++++++----------------
include/linux/blk-mq.h | 2 +-
2 files changed, 24 insertions(+), 17 deletions(-)
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -876,13 +876,34 @@ static void blk_mq_check_expired(struct
time_after_eq(jiffies, deadline)) {
blk_mq_rq_update_aborted_gstate(rq, gstate);
data->nr_expired++;
- hctx->nr_expired++;
+ hctx->need_sync_rcu = true;
} else if (!data->next_set || time_after(data->next, deadline)) {
data->next = deadline;
data->next_set = 1;
}
}
+static void blk_mq_timeout_sync_rcu(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ bool has_rcu = false;
+ int i;
+
+ queue_for_each_hw_ctx(q, hctx, i) {
+ if (!hctx->need_sync_rcu)
+ continue;
+
+ if (!(hctx->flags & BLK_MQ_F_BLOCKING))
+ has_rcu = true;
+ else
+ synchronize_srcu(hctx->srcu);
+
+ hctx->need_sync_rcu = false;
+ }
+ if (has_rcu)
+ synchronize_rcu();
+}
+
static void blk_mq_terminate_expired(struct blk_mq_hw_ctx *hctx,
struct request *rq, void *priv, bool reserved)
{
@@ -930,27 +951,13 @@ static void blk_mq_timeout_work(struct w
blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data);
if (data.nr_expired) {
- bool has_rcu = false;
-
/*
* Wait till everyone sees ->aborted_gstate. The
* sequential waits for SRCUs aren't ideal. If this ever
* becomes a problem, we can add per-hw_ctx rcu_head and
* wait in parallel.
*/
- queue_for_each_hw_ctx(q, hctx, i) {
- if (!hctx->nr_expired)
- continue;
-
- if (!(hctx->flags & BLK_MQ_F_BLOCKING))
- has_rcu = true;
- else
- synchronize_srcu(hctx->srcu);
-
- hctx->nr_expired = 0;
- }
- if (has_rcu)
- synchronize_rcu();
+ blk_mq_timeout_sync_rcu(q);
/* terminate the ones we won */
blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL);
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -51,7 +51,7 @@ struct blk_mq_hw_ctx {
unsigned int queue_num;
atomic_t nr_active;
- unsigned int nr_expired;
+ bool need_sync_rcu;
struct hlist_node cpuhp_dead;
struct kobject kobj;
^ permalink raw reply [flat|nested] 16+ messages in thread* [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 19:00 [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Tejun Heo @ 2018-04-02 19:01 ` Tejun Heo 2018-04-02 21:08 ` Bart Van Assche 2018-04-02 20:48 ` [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Bart Van Assche 1 sibling, 1 reply; 16+ messages in thread From: Tejun Heo @ 2018-04-02 19:01 UTC (permalink / raw) To: Jens Axboe; +Cc: linux-block, linux-kernel, kernel-team When a request is handed over from normal execution to timeout, we synchronize using ->aborted_gstate and RCU grace periods; however, when a request is being returned from timeout handling to normal execution for BLK_EH_RESET_TIMER, we were skipping the same synchronization. This means that it theoretically is possible for a returned request's completion and recycling compete against the reordered and delayed writes from timeout path. This patch adds an equivalent synchronization when a request is returned from timeout path to normal completion path. Signed-off-by: Tejun Heo <tj@kernel.org> Cc: Bart Van Assche <Bart.VanAssche@wdc.com> --- block/blk-mq.c | 49 ++++++++++++++++++++++++++++++++++++++++--------- block/blk-timeout.c | 2 +- include/linux/blkdev.h | 4 +++- 3 files changed, 44 insertions(+), 11 deletions(-) --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -818,7 +818,8 @@ struct blk_mq_timeout_data { unsigned int nr_expired; }; -static void blk_mq_rq_timed_out(struct request *req, bool reserved) +static void blk_mq_rq_timed_out(struct blk_mq_hw_ctx *hctx, struct request *req, + int *nr_resets, bool reserved) { const struct blk_mq_ops *ops = req->q->mq_ops; enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER; @@ -833,13 +834,10 @@ static void blk_mq_rq_timed_out(struct r __blk_mq_complete_request(req); break; case BLK_EH_RESET_TIMER: - /* - * As nothing prevents from completion happening while - * ->aborted_gstate is set, this may lead to ignored - * completions and further spurious timeouts. - */ - blk_mq_rq_update_aborted_gstate(req, 0); blk_add_timer(req); + req->rq_flags |= RQF_MQ_TIMEOUT_RESET; + (*nr_resets)++; + hctx->need_sync_rcu = true; break; case BLK_EH_NOT_HANDLED: break; @@ -916,7 +914,26 @@ static void blk_mq_terminate_expired(str */ if (!(rq->rq_flags & RQF_MQ_TIMEOUT_EXPIRED) && READ_ONCE(rq->gstate) == rq->aborted_gstate) - blk_mq_rq_timed_out(rq, reserved); + blk_mq_rq_timed_out(hctx, rq, priv, reserved); +} + +static void blk_mq_finish_timeout_reset(struct blk_mq_hw_ctx *hctx, + struct request *rq, void *priv, bool reserved) +{ + /* + * @rq's timer reset has gone through rcu synchronization and is + * visible now. Allow normal completions again by resetting + * ->aborted_gstate. Don't clear RQF_MQ_TIMEOUT_RESET here as + * there's no memory ordering around ->aborted_gstate making it the + * only field safe to update. Let blk_add_timer() clear it later + * when the request is recycled or times out again. + * + * As nothing prevents from completion happening while + * ->aborted_gstate is set, this may lead to ignored completions + * and further spurious timeouts. + */ + if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET) + blk_mq_rq_update_aborted_gstate(rq, 0); } static void blk_mq_timeout_work(struct work_struct *work) @@ -951,6 +968,8 @@ static void blk_mq_timeout_work(struct w blk_mq_queue_tag_busy_iter(q, blk_mq_check_expired, &data); if (data.nr_expired) { + int nr_resets = 0; + /* * Wait till everyone sees ->aborted_gstate. The * sequential waits for SRCUs aren't ideal. If this ever @@ -960,7 +979,19 @@ static void blk_mq_timeout_work(struct w blk_mq_timeout_sync_rcu(q); /* terminate the ones we won */ - blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, NULL); + blk_mq_queue_tag_busy_iter(q, blk_mq_terminate_expired, + &nr_resets); + + /* + * For BLK_EH_RESET_TIMER, release the requests after + * blk_add_timer() from above is visible to avoid timer + * reset racing against recycling. + */ + if (nr_resets) { + blk_mq_timeout_sync_rcu(q); + blk_mq_queue_tag_busy_iter(q, + blk_mq_finish_timeout_reset, NULL); + } } if (data.next_set) { --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -216,7 +216,7 @@ void blk_add_timer(struct request *req) req->timeout = q->rq_timeout; blk_rq_set_deadline(req, jiffies + req->timeout); - req->rq_flags &= ~RQF_MQ_TIMEOUT_EXPIRED; + req->rq_flags &= ~(RQF_MQ_TIMEOUT_EXPIRED | RQF_MQ_TIMEOUT_RESET); /* * Only the non-mq case needs to add the request to a protected list. --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -127,8 +127,10 @@ typedef __u32 __bitwise req_flags_t; #define RQF_ZONE_WRITE_LOCKED ((__force req_flags_t)(1 << 19)) /* timeout is expired */ #define RQF_MQ_TIMEOUT_EXPIRED ((__force req_flags_t)(1 << 20)) +/* timeout is expired */ +#define RQF_MQ_TIMEOUT_RESET ((__force req_flags_t)(1 << 21)) /* already slept for hybrid poll */ -#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 21)) +#define RQF_MQ_POLL_SLEPT ((__force req_flags_t)(1 << 22)) /* flags that prevent us from merging requests: */ #define RQF_NOMERGE_FLAGS \ ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 19:01 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Tejun Heo @ 2018-04-02 21:08 ` Bart Van Assche 2018-04-02 21:10 ` Tejun Heo 0 siblings, 1 reply; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 21:08 UTC (permalink / raw) To: Tejun Heo, Jens Axboe; +Cc: linux-block, linux-kernel, kernel-team On 04/02/18 12:01, Tejun Heo wrote: > + * As nothing prevents from completion happening while > + * ->aborted_gstate is set, this may lead to ignored completions > + * and further spurious timeouts. > + */ > + if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET) > + blk_mq_rq_update_aborted_gstate(rq, 0); Hello Tejun, Since this patch fixes one race but introduces another race, is this patch really an improvement? Thanks, Bart. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 21:08 ` Bart Van Assche @ 2018-04-02 21:10 ` Tejun Heo 2018-04-02 21:31 ` Bart Van Assche 2018-04-02 21:56 ` Bart Van Assche 0 siblings, 2 replies; 16+ messages in thread From: Tejun Heo @ 2018-04-02 21:10 UTC (permalink / raw) To: Bart Van Assche; +Cc: Jens Axboe, linux-block, linux-kernel, kernel-team On Mon, Apr 02, 2018 at 02:08:37PM -0700, Bart Van Assche wrote: > On 04/02/18 12:01, Tejun Heo wrote: > >+ * As nothing prevents from completion happening while > >+ * ->aborted_gstate is set, this may lead to ignored completions > >+ * and further spurious timeouts. > >+ */ > >+ if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET) > >+ blk_mq_rq_update_aborted_gstate(rq, 0); > > Hello Tejun, > > Since this patch fixes one race but introduces another race, is this > patch really an improvement? Oh, that's not a new race. That's the same non-critical race which always existed. It's just being documented. Thanks. -- tejun ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 21:10 ` Tejun Heo @ 2018-04-02 21:31 ` Bart Van Assche 2018-04-02 21:56 ` Bart Van Assche 1 sibling, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 21:31 UTC (permalink / raw) To: tj@kernel.org Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk T24gTW9uLCAyMDE4LTA0LTAyIGF0IDE0OjEwIC0wNzAwLCBUZWp1biBIZW8gd3JvdGU6DQo+IE9u IE1vbiwgQXByIDAyLCAyMDE4IGF0IDAyOjA4OjM3UE0gLTA3MDAsIEJhcnQgVmFuIEFzc2NoZSB3 cm90ZToNCj4gPiBPbiAwNC8wMi8xOCAxMjowMSwgVGVqdW4gSGVvIHdyb3RlOg0KPiA+ID4gKwkg KiBBcyBub3RoaW5nIHByZXZlbnRzIGZyb20gY29tcGxldGlvbiBoYXBwZW5pbmcgd2hpbGUNCj4g PiA+ICsJICogLT5hYm9ydGVkX2dzdGF0ZSBpcyBzZXQsIHRoaXMgbWF5IGxlYWQgdG8gaWdub3Jl ZCBjb21wbGV0aW9ucw0KPiA+ID4gKwkgKiBhbmQgZnVydGhlciBzcHVyaW91cyB0aW1lb3V0cy4N Cj4gPiA+ICsJICovDQo+ID4gPiArCWlmIChycS0+cnFfZmxhZ3MgJiBSUUZfTVFfVElNRU9VVF9S RVNFVCkNCj4gPiA+ICsJCWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocnEsIDApOw0K PiA+IA0KPiA+IEhlbGxvIFRlanVuLA0KPiA+IA0KPiA+IFNpbmNlIHRoaXMgcGF0Y2ggZml4ZXMg b25lIHJhY2UgYnV0IGludHJvZHVjZXMgYW5vdGhlciByYWNlLCBpcyB0aGlzDQo+ID4gcGF0Y2gg cmVhbGx5IGFuIGltcHJvdmVtZW50Pw0KPiANCj4gT2gsIHRoYXQncyBub3QgYSBuZXcgcmFjZS4g IFRoYXQncyB0aGUgc2FtZSBub24tY3JpdGljYWwgcmFjZSB3aGljaA0KPiBhbHdheXMgZXhpc3Rl ZC4gIEl0J3MganVzdCBiZWluZyBkb2N1bWVudGVkLg0KDQpIZWxsbyBUZWp1biwNCg0KSSB0aGlu ayBpdCBjYW4gaGFwcGVuIHRoYXQgdGhlIGFib3ZlIGNvZGUgc2VlcyB0aGF0IChycS0+cnFfZmxh Z3MgJg0KUlFGX01RX1RJTUVPVVRfUkVTRVQpICE9IDAsIHRoYXQgYmxrX21xX3N0YXJ0X3JlcXVl c3QoKSBleGVjdXRlcyB0aGUNCmZvbGxvd2luZyBjb2RlOg0KDQoJYmxrX21xX3JxX3VwZGF0ZV9z dGF0ZShycSwgTVFfUlFfSU5fRkxJR0hUKTsNCglibGtfYWRkX3RpbWVyKHJxKTsNCg0KYW5kIHRo YXQgc3Vic2VxdWVudGx5IGJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocnEsIDApIGlz IGNhbGxlZCwNCndoaWNoIHdpbGwgY2F1c2UgdGhlIG5leHQgY29tcGxldGlvbiB0byBiZSBsb3N0 LiBJcyBmaXhpbmcgb25lIG9jY3VycmVuY2UNCm9mIGEgcmFjZSBhbmQgcmVpbnRyb2R1Y2luZyBp dCBpbiBhbm90aGVyIGNvZGUgcGF0aCByZWFsbHkgYW4gaW1wcm92ZW1lbnQ/DQoNClRoYW5rcywN Cg0KQmFydC4NCg0KDQoNCg== ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution @ 2018-04-02 21:31 ` Bart Van Assche 0 siblings, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 21:31 UTC (permalink / raw) To: tj@kernel.org Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk On Mon, 2018-04-02 at 14:10 -0700, Tejun Heo wrote: > On Mon, Apr 02, 2018 at 02:08:37PM -0700, Bart Van Assche wrote: > > On 04/02/18 12:01, Tejun Heo wrote: > > > + * As nothing prevents from completion happening while > > > + * ->aborted_gstate is set, this may lead to ignored completions > > > + * and further spurious timeouts. > > > + */ > > > + if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET) > > > + blk_mq_rq_update_aborted_gstate(rq, 0); > > > > Hello Tejun, > > > > Since this patch fixes one race but introduces another race, is this > > patch really an improvement? > > Oh, that's not a new race. That's the same non-critical race which > always existed. It's just being documented. Hello Tejun, I think it can happen that the above code sees that (rq->rq_flags & RQF_MQ_TIMEOUT_RESET) != 0, that blk_mq_start_request() executes the following code: blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT); blk_add_timer(rq); and that subsequently blk_mq_rq_update_aborted_gstate(rq, 0) is called, which will cause the next completion to be lost. Is fixing one occurrence of a race and reintroducing it in another code path really an improvement? Thanks, Bart. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 21:31 ` Bart Van Assche (?) @ 2018-04-02 21:39 ` tj -1 siblings, 0 replies; 16+ messages in thread From: tj @ 2018-04-02 21:39 UTC (permalink / raw) To: Bart Van Assche Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk Hello, On Mon, Apr 02, 2018 at 09:31:34PM +0000, Bart Van Assche wrote: > > > > + * As nothing prevents from completion happening while > > > > + * ->aborted_gstate is set, this may lead to ignored completions > > > > + * and further spurious timeouts. > > > > + */ > > > > + if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET) > > > > + blk_mq_rq_update_aborted_gstate(rq, 0); ... > I think it can happen that the above code sees that (rq->rq_flags & > RQF_MQ_TIMEOUT_RESET) != 0, that blk_mq_start_request() executes the > following code: > > blk_mq_rq_update_state(rq, MQ_RQ_IN_FLIGHT); > blk_add_timer(rq); > > and that subsequently blk_mq_rq_update_aborted_gstate(rq, 0) is called, > which will cause the next completion to be lost. Is fixing one occurrence > of a race and reintroducing it in another code path really an improvement? I'm not following at all. How would blk_mq_start_request() get called on the request while it's still owned by the timeout handler? That gstate clearing is what transfers the ownership back to the non-timeout path. Thanks. -- tejun ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 21:10 ` Tejun Heo @ 2018-04-02 21:56 ` Bart Van Assche 2018-04-02 21:56 ` Bart Van Assche 1 sibling, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 21:56 UTC (permalink / raw) To: tj@kernel.org Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk T24gTW9uLCAyMDE4LTA0LTAyIGF0IDE0OjEwIC0wNzAwLCBUZWp1biBIZW8gd3JvdGU6DQo+IE9u IE1vbiwgQXByIDAyLCAyMDE4IGF0IDAyOjA4OjM3UE0gLTA3MDAsIEJhcnQgVmFuIEFzc2NoZSB3 cm90ZToNCj4gPiBPbiAwNC8wMi8xOCAxMjowMSwgVGVqdW4gSGVvIHdyb3RlOg0KPiA+ID4gKwkg KiBBcyBub3RoaW5nIHByZXZlbnRzIGZyb20gY29tcGxldGlvbiBoYXBwZW5pbmcgd2hpbGUNCj4g PiA+ICsJICogLT5hYm9ydGVkX2dzdGF0ZSBpcyBzZXQsIHRoaXMgbWF5IGxlYWQgdG8gaWdub3Jl ZCBjb21wbGV0aW9ucw0KPiA+ID4gKwkgKiBhbmQgZnVydGhlciBzcHVyaW91cyB0aW1lb3V0cy4N Cj4gPiA+ICsJICovDQo+ID4gPiArCWlmIChycS0+cnFfZmxhZ3MgJiBSUUZfTVFfVElNRU9VVF9S RVNFVCkNCj4gPiA+ICsJCWJsa19tcV9ycV91cGRhdGVfYWJvcnRlZF9nc3RhdGUocnEsIDApOw0K PiA+IA0KPiA+IEhlbGxvIFRlanVuLA0KPiA+IA0KPiA+IFNpbmNlIHRoaXMgcGF0Y2ggZml4ZXMg b25lIHJhY2UgYnV0IGludHJvZHVjZXMgYW5vdGhlciByYWNlLCBpcyB0aGlzDQo+ID4gcGF0Y2gg cmVhbGx5IGFuIGltcHJvdmVtZW50Pw0KPiANCj4gT2gsIHRoYXQncyBub3QgYSBuZXcgcmFjZS4g IFRoYXQncyB0aGUgc2FtZSBub24tY3JpdGljYWwgcmFjZSB3aGljaA0KPiBhbHdheXMgZXhpc3Rl ZC4gIEl0J3MganVzdCBiZWluZyBkb2N1bWVudGVkLg0KDQpUaGlzIHBhdGNoIGluY3JlYXNlcyB0 aGUgdGltZSBkdXJpbmcgd2hpY2ggLmFib3J0ZWRfZ3N0YXRlID09IC5nc3RhdGUgaWYgdGhlDQp0 aW1lb3V0IGlzIHJlc2V0LiBEb2VzIHRoYXQgaW5jcmVhc2UgdGhlIGNoYW5jZSB0aGF0IGEgY29t cGxldGlvbiB3aWxsIGJlIG1pc3NlZA0KaWYgdGhlIHJlcXVlc3QgdGltZW91dCBpcyByZXNldD8N Cg0KVGhhbmtzLA0KDQpCYXJ0Lg0KDQoNCg== ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution @ 2018-04-02 21:56 ` Bart Van Assche 0 siblings, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 21:56 UTC (permalink / raw) To: tj@kernel.org Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk On Mon, 2018-04-02 at 14:10 -0700, Tejun Heo wrote: > On Mon, Apr 02, 2018 at 02:08:37PM -0700, Bart Van Assche wrote: > > On 04/02/18 12:01, Tejun Heo wrote: > > > + * As nothing prevents from completion happening while > > > + * ->aborted_gstate is set, this may lead to ignored completions > > > + * and further spurious timeouts. > > > + */ > > > + if (rq->rq_flags & RQF_MQ_TIMEOUT_RESET) > > > + blk_mq_rq_update_aborted_gstate(rq, 0); > > > > Hello Tejun, > > > > Since this patch fixes one race but introduces another race, is this > > patch really an improvement? > > Oh, that's not a new race. That's the same non-critical race which > always existed. It's just being documented. This patch increases the time during which .aborted_gstate == .gstate if the timeout is reset. Does that increase the chance that a completion will be missed if the request timeout is reset? Thanks, Bart. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 21:56 ` Bart Van Assche (?) @ 2018-04-02 22:01 ` tj 2018-04-02 22:09 ` Bart Van Assche -1 siblings, 1 reply; 16+ messages in thread From: tj @ 2018-04-02 22:01 UTC (permalink / raw) To: Bart Van Assche Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk Hello, On Mon, Apr 02, 2018 at 09:56:41PM +0000, Bart Van Assche wrote: > This patch increases the time during which .aborted_gstate == .gstate if the > timeout is reset. Does that increase the chance that a completion will be missed > if the request timeout is reset? It sure does, but we're comparing an outright kernel bug vs. an inherently opportunistic mechanism being a bit more lossy. I think the answer is pretty clear. Thanks. -- tejun ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 22:01 ` tj @ 2018-04-02 22:09 ` Bart Van Assche 0 siblings, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 22:09 UTC (permalink / raw) To: tj@kernel.org Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk T24gTW9uLCAyMDE4LTA0LTAyIGF0IDE1OjAxIC0wNzAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K PiBPbiBNb24sIEFwciAwMiwgMjAxOCBhdCAwOTo1Njo0MVBNICswMDAwLCBCYXJ0IFZhbiBBc3Nj aGUgd3JvdGU6DQo+ID4gVGhpcyBwYXRjaCBpbmNyZWFzZXMgdGhlIHRpbWUgZHVyaW5nIHdoaWNo IC5hYm9ydGVkX2dzdGF0ZSA9PSAuZ3N0YXRlIGlmIHRoZQ0KPiA+IHRpbWVvdXQgaXMgcmVzZXQu IERvZXMgdGhhdCBpbmNyZWFzZSB0aGUgY2hhbmNlIHRoYXQgYSBjb21wbGV0aW9uIHdpbGwgYmUg bWlzc2VkDQo+ID4gaWYgdGhlIHJlcXVlc3QgdGltZW91dCBpcyByZXNldD8NCj4gDQo+IEl0IHN1 cmUgZG9lcywgYnV0IHdlJ3JlIGNvbXBhcmluZyBhbiBvdXRyaWdodCBrZXJuZWwgYnVnIHZzLiBh bg0KPiBpbmhlcmVudGx5IG9wcG9ydHVuaXN0aWMgbWVjaGFuaXNtIGJlaW5nIGEgYml0IG1vcmUg bG9zc3kuICBJIHRoaW5rDQo+IHRoZSBhbnN3ZXIgaXMgcHJldHR5IGNsZWFyLg0KDQpIZWxsbyBU ZWp1biwNCg0KUGxlYXNlIGVsYWJvcmF0ZSB3aGF0IHlvdXIgbG9uZy10ZXJtIGdvYWwgaXMgZm9y IHRoZSBibGstbXEgdGltZW91dCBoYW5kbGVyLg0KVGhlIGxlZ2FjeSBibG9jayBsYXllciBzdXNw ZW5kcyByZXF1ZXN0IHN0YXRlIGNoYW5nZXMgd2hpbGUgYSB0aW1lb3V0IGlzDQpiZWluZyBwcm9j ZXNzZWQgYnkgaG9sZGluZyB0aGUgcmVxdWVzdCBxdWV1ZSBsb2NrIHdoaWxlIHJlcXVlc3RzIGFy ZSBiZWluZw0KcHJvY2Vzc2VkLCB3aGlsZSBwcm9jZXNzaW5nIGEgdGltZW91dCBhbmQgd2hpbGUg Y2FsbGluZyBxLT5ycV90aW1lZF9vdXRfZm4ocnEpLg0KRG8geW91IHRoaW5rIGl0IGlzIHBvc3Np YmxlIHRvIG1ha2UgdGhlIGJsay1tcSBjb3JlIHN1c3BlbmQgcmVxdWVzdCBwcm9jZXNzaW5nDQp3 aGlsZSBwcm9jZXNzaW5nIGEgdGltZW91dCB3aXRob3V0IGludHJvZHVjaW5nIGxvY2tpbmcgaW4N CmJsa19tcV9jb21wbGV0ZV9yZXF1ZXN0KCk/IElmIHlvdSBkbyBub3QgcGxhbiB0byBhZGQgbG9j a2luZyBpbg0KYmxrX21xX2NvbXBsZXRlX3JlcXVlc3QoKSwgZG8geW91IHRoaW5rIGl0IGlzIHBv c3NpYmxlIHRvIGZpeCBhbGwgdGhlIHJhY2VzIHdlDQpkaXNjdXNzZWQgaW4gcHJldmlvdXMgZS1t YWlscz8NCg0KVGhhbmtzLA0KDQpCYXJ0Lg0KDQoNCg== ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution @ 2018-04-02 22:09 ` Bart Van Assche 0 siblings, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 22:09 UTC (permalink / raw) To: tj@kernel.org Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk On Mon, 2018-04-02 at 15:01 -0700, tj@kernel.org wrote: > On Mon, Apr 02, 2018 at 09:56:41PM +0000, Bart Van Assche wrote: > > This patch increases the time during which .aborted_gstate == .gstate if the > > timeout is reset. Does that increase the chance that a completion will be missed > > if the request timeout is reset? > > It sure does, but we're comparing an outright kernel bug vs. an > inherently opportunistic mechanism being a bit more lossy. I think > the answer is pretty clear. Hello Tejun, Please elaborate what your long-term goal is for the blk-mq timeout handler. The legacy block layer suspends request state changes while a timeout is being processed by holding the request queue lock while requests are being processed, while processing a timeout and while calling q->rq_timed_out_fn(rq). Do you think it is possible to make the blk-mq core suspend request processing while processing a timeout without introducing locking in blk_mq_complete_request()? If you do not plan to add locking in blk_mq_complete_request(), do you think it is possible to fix all the races we discussed in previous e-mails? Thanks, Bart. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 22:09 ` Bart Van Assche (?) @ 2018-04-02 22:16 ` tj 2018-04-02 22:49 ` Bart Van Assche -1 siblings, 1 reply; 16+ messages in thread From: tj @ 2018-04-02 22:16 UTC (permalink / raw) To: Bart Van Assche Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk Hello, On Mon, Apr 02, 2018 at 10:09:18PM +0000, Bart Van Assche wrote: > Please elaborate what your long-term goal is for the blk-mq timeout handler. Hmm... I don't really have any plans beyond what's been posted. > The legacy block layer suspends request state changes while a timeout is > being processed by holding the request queue lock while requests are being > processed, while processing a timeout and while calling q->rq_timed_out_fn(rq). > Do you think it is possible to make the blk-mq core suspend request processing > while processing a timeout without introducing locking in > blk_mq_complete_request()? If you do not plan to add locking in > blk_mq_complete_request(), do you think it is possible to fix all the races we > discussed in previous e-mails? I don't know of multiple race conditions. What am I missing? AFAIK, there's one non-critical race condition which has always been there. We have a larger race window for that case but don't yet know whether that's problematic or not. If that actually is problematic, we can figure out a way to solve that but such effort / added complexity doesn't seem justified yet. No? Thanks. -- tejun ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution 2018-04-02 22:16 ` tj @ 2018-04-02 22:49 ` Bart Van Assche 0 siblings, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 22:49 UTC (permalink / raw) To: tj@kernel.org Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk T24gTW9uLCAyMDE4LTA0LTAyIGF0IDE1OjE2IC0wNzAwLCB0akBrZXJuZWwub3JnIHdyb3RlOg0K PiBBRkFJSywNCj4gdGhlcmUncyBvbmUgbm9uLWNyaXRpY2FsIHJhY2UgY29uZGl0aW9uIHdoaWNo IGhhcyBhbHdheXMgYmVlbiB0aGVyZS4NCj4gV2UgaGF2ZSBhIGxhcmdlciByYWNlIHdpbmRvdyBm b3IgdGhhdCBjYXNlIGJ1dCBkb24ndCB5ZXQga25vdyB3aGV0aGVyDQo+IHRoYXQncyBwcm9ibGVt YXRpYyBvciBub3QuICBJZiB0aGF0IGFjdHVhbGx5IGlzIHByb2JsZW1hdGljLCB3ZSBjYW4NCj4g ZmlndXJlIG91dCBhIHdheSB0byBzb2x2ZSB0aGF0IGJ1dCBzdWNoIGVmZm9ydCAvIGFkZGVkIGNv bXBsZXhpdHkNCj4gZG9lc24ndCBzZWVtIGp1c3RpZmllZCB5ZXQuICBObz8NCg0KSGVsbG8gVGVq dW4sDQoNClNvbWUgaW1wb3J0YW50IGJsb2NrIGRyaXZlcnMgc3lzdGVtYXRpY2FsbHkgcmV0dXJu IEJMS19FSF9SRVNFVF9USU1FUg0KZnJvbSB0aGVpciB0aW1lb3V0IGhhbmRsZXIsIGUuZy4gdGhl IHZpcnRpby1zY3NpIGluaXRpYXRvciBkcml2ZXIuIFdoYXQNCndpbGwgdGhlIGNvbnNlcXVlbmNl IGJlIGZvciB0aGVzZSBkcml2ZXJzIGlmIGEgYmxrX21xX2NvbXBsZXRlX3JlcXVlc3QoKQ0KY2Fs bCBpcyBpZ25vcmVkPyBXaWxsIHRoZSByZXF1ZXN0IGZvciB3aGljaCB0aGUgY29tcGxldGlvbiB3 YXMgaWdub3JlZA0KZ2V0IHN0dWNrPw0KDQpUaGFua3MsDQoNCkJhcnQuDQoNCg0KDQo= ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution @ 2018-04-02 22:49 ` Bart Van Assche 0 siblings, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 22:49 UTC (permalink / raw) To: tj@kernel.org Cc: kernel-team@fb.com, linux-kernel@vger.kernel.org, linux-block@vger.kernel.org, axboe@kernel.dk On Mon, 2018-04-02 at 15:16 -0700, tj@kernel.org wrote: > AFAIK, > there's one non-critical race condition which has always been there. > We have a larger race window for that case but don't yet know whether > that's problematic or not. If that actually is problematic, we can > figure out a way to solve that but such effort / added complexity > doesn't seem justified yet. No? Hello Tejun, Some important block drivers systematically return BLK_EH_RESET_TIMER from their timeout handler, e.g. the virtio-scsi initiator driver. What will the consequence be for these drivers if a blk_mq_complete_request() call is ignored? Will the request for which the completion was ignored get stuck? Thanks, Bart. ^ permalink raw reply [flat|nested] 16+ messages in thread
* Re: [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization 2018-04-02 19:00 [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Tejun Heo 2018-04-02 19:01 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Tejun Heo @ 2018-04-02 20:48 ` Bart Van Assche 1 sibling, 0 replies; 16+ messages in thread From: Bart Van Assche @ 2018-04-02 20:48 UTC (permalink / raw) To: Tejun Heo, Jens Axboe; +Cc: linux-block, linux-kernel, kernel-team On 04/02/18 12:00, Tejun Heo wrote: > Factor out [s]rcu synchronization in blk_mq_timeout_work() into > blk_mq_timeout_sync_rcu(). This is to add another user in the future > and doesn't cause any functional changes. Reviewed-by: Bart Van Assche <bart.vanassche@wdc.com> ^ permalink raw reply [flat|nested] 16+ messages in thread
end of thread, other threads:[~2018-04-02 22:49 UTC | newest] Thread overview: 16+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2018-04-02 19:00 [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Tejun Heo 2018-04-02 19:01 ` [PATCH 2/2] blk-mq: Fix request handover from timeout path to normal execution Tejun Heo 2018-04-02 21:08 ` Bart Van Assche 2018-04-02 21:10 ` Tejun Heo 2018-04-02 21:31 ` Bart Van Assche 2018-04-02 21:31 ` Bart Van Assche 2018-04-02 21:39 ` tj 2018-04-02 21:56 ` Bart Van Assche 2018-04-02 21:56 ` Bart Van Assche 2018-04-02 22:01 ` tj 2018-04-02 22:09 ` Bart Van Assche 2018-04-02 22:09 ` Bart Van Assche 2018-04-02 22:16 ` tj 2018-04-02 22:49 ` Bart Van Assche 2018-04-02 22:49 ` Bart Van Assche 2018-04-02 20:48 ` [PATCH 1/2] blk-mq: Factor out [s]rcu synchronization Bart Van Assche
This is an external index of several public inboxes, see mirroring instructions on how to clone and mirror all data and code used by this external index.