* [PATCH v4 01/11] blk-mq: fix direct issue
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 21:25 ` Bart Van Assche
2017-06-05 15:59 ` [PATCH v4 02/11] blk-mq: move blk_mq_quiesce_queue() into include/linux/blk-mq.h Ming Lei
` (9 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig
Cc: Bart Van Assche, Ming Lei, stable
If queue is stopped, we shouldn't dispatch request into driver and
hardware, unfortunately the check is removed in bd166ef183c2(blk-mq-sched:
add framework for MQ capable IO schedulers).
This patch fixes the issue by moving the check back into
__blk_mq_try_issue_directly().
This patch fixes request use-after-free[1][2] during canceling requets
of NVMe in nvme_dev_disable(), which can be triggered easily during
NVMe reset & remove test.
[1] oops kernel log when CONFIG_BLK_DEV_INTEGRITY is on
[ 103.412969] BUG: unable to handle kernel NULL pointer dereference at 000000000000000a
[ 103.412980] IP: bio_integrity_advance+0x48/0xf0
[ 103.412981] PGD 275a88067
[ 103.412981] P4D 275a88067
[ 103.412982] PUD 276c43067
[ 103.412983] PMD 0
[ 103.412984]
[ 103.412986] Oops: 0000 [#1] SMP
[ 103.412989] Modules linked in: vfat fat intel_rapl sb_edac x86_pkg_temp_thermal intel_powerclamp coretemp kvm_intel kvm irqbypass crct10dif_pclmul crc32_pclmul ghash_clmulni_intel pcbc aesni_intel crypto_simd cryptd ipmi_ssif iTCO_wdt iTCO_vendor_support mxm_wmi glue_helper dcdbas ipmi_si mei_me pcspkr mei sg ipmi_devintf lpc_ich ipmi_msghandler shpchp acpi_power_meter wmi nfsd auth_rpcgss nfs_acl lockd grace sunrpc ip_tables xfs libcrc32c sd_mod mgag200 i2c_algo_bit drm_kms_helper syscopyarea sysfillrect sysimgblt fb_sys_fops ttm drm crc32c_intel nvme ahci nvme_core libahci libata tg3 i2c_core megaraid_sas ptp pps_core dm_mirror dm_region_hash dm_log dm_mod
[ 103.413035] CPU: 0 PID: 102 Comm: kworker/0:2 Not tainted 4.11.0+ #1
[ 103.413036] Hardware name: Dell Inc. PowerEdge R730xd/072T6D, BIOS 2.2.5 09/06/2016
[ 103.413041] Workqueue: events nvme_remove_dead_ctrl_work [nvme]
[ 103.413043] task: ffff9cc8775c8000 task.stack: ffffc033c252c000
[ 103.413045] RIP: 0010:bio_integrity_advance+0x48/0xf0
[ 103.413046] RSP: 0018:ffffc033c252fc10 EFLAGS: 00010202
[ 103.413048] RAX: 0000000000000000 RBX: ffff9cc8720a8cc0 RCX: ffff9cca72958240
[ 103.413049] RDX: ffff9cca72958000 RSI: 0000000000000008 RDI: ffff9cc872537f00
[ 103.413049] RBP: ffffc033c252fc28 R08: 0000000000000000 R09: ffffffffb963a0d5
[ 103.413050] R10: 000000000000063e R11: 0000000000000000 R12: ffff9cc8720a8d18
[ 103.413051] R13: 0000000000001000 R14: ffff9cc872682e00 R15: 00000000fffffffb
[ 103.413053] FS: 0000000000000000(0000) GS:ffff9cc877c00000(0000) knlGS:0000000000000000
[ 103.413054] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[ 103.413055] CR2: 000000000000000a CR3: 0000000276c41000 CR4: 00000000001406f0
[ 103.413056] Call Trace:
[ 103.413063] bio_advance+0x2a/0xe0
[ 103.413067] blk_update_request+0x76/0x330
[ 103.413072] blk_mq_end_request+0x1a/0x70
[ 103.413074] blk_mq_dispatch_rq_list+0x370/0x410
[ 103.413076] ? blk_mq_flush_busy_ctxs+0x94/0xe0
[ 103.413080] blk_mq_sched_dispatch_requests+0x173/0x1a0
[ 103.413083] __blk_mq_run_hw_queue+0x8e/0xa0
[ 103.413085] __blk_mq_delay_run_hw_queue+0x9d/0xa0
[ 103.413088] blk_mq_start_hw_queue+0x17/0x20
[ 103.413090] blk_mq_start_hw_queues+0x32/0x50
[ 103.413095] nvme_kill_queues+0x54/0x80 [nvme_core]
[ 103.413097] nvme_remove_dead_ctrl_work+0x1f/0x40 [nvme]
[ 103.413103] process_one_work+0x149/0x360
[ 103.413105] worker_thread+0x4d/0x3c0
[ 103.413109] kthread+0x109/0x140
[ 103.413111] ? rescuer_thread+0x380/0x380
[ 103.413113] ? kthread_park+0x60/0x60
[ 103.413120] ret_from_fork+0x2c/0x40
[ 103.413121] Code: 08 4c 8b 63 50 48 8b 80 80 00 00 00 48 8b 90 d0 03 00 00 31 c0 48 83 ba 40 02 00 00 00 48 8d 8a 40 02 00 00 48 0f 45 c1 c1 ee 09 <0f> b6 48 0a 0f b6 40 09 41 89 f5 83 e9 09 41 d3 ed 44 0f af e8
[ 103.413145] RIP: bio_integrity_advance+0x48/0xf0 RSP: ffffc033c252fc10
[ 103.413146] CR2: 000000000000000a
[ 103.413157] ---[ end trace cd6875d16eb5a11e ]---
[ 103.455368] Kernel panic - not syncing: Fatal exception
[ 103.459826] Kernel Offset: 0x37600000 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffffbfffffff)
[ 103.850916] ---[ end Kernel panic - not syncing: Fatal exception
[ 103.857637] sched: Unexpected reschedule of offline CPU#1!
[ 103.863762] ------------[ cut here ]------------
[2] kernel hang in blk_mq_freeze_queue_wait() when CONFIG_BLK_DEV_INTEGRITY is off
[ 247.129825] INFO: task nvme-test:1772 blocked for more than 120 seconds.
[ 247.137311] Not tainted 4.12.0-rc2.upstream+ #4
[ 247.142954] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[ 247.151704] Call Trace:
[ 247.154445] __schedule+0x28a/0x880
[ 247.158341] schedule+0x36/0x80
[ 247.161850] blk_mq_freeze_queue_wait+0x4b/0xb0
[ 247.166913] ? remove_wait_queue+0x60/0x60
[ 247.171485] blk_freeze_queue+0x1a/0x20
[ 247.175770] blk_cleanup_queue+0x7f/0x140
[ 247.180252] nvme_ns_remove+0xa3/0xb0 [nvme_core]
[ 247.185503] nvme_remove_namespaces+0x32/0x50 [nvme_core]
[ 247.191532] nvme_uninit_ctrl+0x2d/0xa0 [nvme_core]
[ 247.196977] nvme_remove+0x70/0x110 [nvme]
[ 247.201545] pci_device_remove+0x39/0xc0
[ 247.205927] device_release_driver_internal+0x141/0x200
[ 247.211761] device_release_driver+0x12/0x20
[ 247.216531] pci_stop_bus_device+0x8c/0xa0
[ 247.221104] pci_stop_and_remove_bus_device_locked+0x1a/0x30
[ 247.227420] remove_store+0x7c/0x90
[ 247.231320] dev_attr_store+0x18/0x30
[ 247.235409] sysfs_kf_write+0x3a/0x50
[ 247.239497] kernfs_fop_write+0xff/0x180
[ 247.243867] __vfs_write+0x37/0x160
[ 247.247757] ? selinux_file_permission+0xe5/0x120
[ 247.253011] ? security_file_permission+0x3b/0xc0
[ 247.258260] vfs_write+0xb2/0x1b0
[ 247.261964] ? syscall_trace_enter+0x1d0/0x2b0
[ 247.266924] SyS_write+0x55/0xc0
[ 247.270540] do_syscall_64+0x67/0x150
[ 247.274636] entry_SYSCALL64_slow_path+0x25/0x25
[ 247.279794] RIP: 0033:0x7f5c96740840
[ 247.283785] RSP: 002b:00007ffd00e87ee8 EFLAGS: 00000246 ORIG_RAX: 0000000000000001
[ 247.292238] RAX: ffffffffffffffda RBX: 0000000000000002 RCX: 00007f5c96740840
[ 247.300194] RDX: 0000000000000002 RSI: 00007f5c97060000 RDI: 0000000000000001
[ 247.308159] RBP: 00007f5c97060000 R08: 000000000000000a R09: 00007f5c97059740
[ 247.316123] R10: 0000000000000001 R11: 0000000000000246 R12: 00007f5c96a14400
[ 247.324087] R13: 0000000000000002 R14: 0000000000000001 R15: 0000000000000000
[ 370.016340] INFO: task nvme-test:1772 blocked for more than 120 seconds.
Fixes: bd166ef183c2(blk-mq-sched: add framework for MQ capable IO schedulers)
Cc: stable@vger.kernel.org
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 20 +++++++++++++-------
1 file changed, 13 insertions(+), 7 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 58688205c8f4..2da2cb8d9ff4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1400,22 +1400,28 @@ static blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, struct request *rq)
return blk_tag_to_qc_t(rq->internal_tag, hctx->queue_num, true);
}
-static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
- bool may_sleep)
+static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
+ struct request *rq,
+ blk_qc_t *cookie, bool may_sleep)
{
struct request_queue *q = rq->q;
struct blk_mq_queue_data bd = {
.rq = rq,
.last = true,
};
- struct blk_mq_hw_ctx *hctx;
blk_qc_t new_cookie;
int ret;
+ bool run_queue = true;
+
+ if (blk_mq_hctx_stopped(hctx)) {
+ run_queue = false;
+ goto insert;
+ }
if (q->elevator)
goto insert;
- if (!blk_mq_get_driver_tag(rq, &hctx, false))
+ if (!blk_mq_get_driver_tag(rq, NULL, false))
goto insert;
new_cookie = request_to_qc_t(hctx, rq);
@@ -1439,7 +1445,7 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
__blk_mq_requeue_request(rq);
insert:
- blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
+ blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
}
static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
@@ -1447,7 +1453,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
{
if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
rcu_read_lock();
- __blk_mq_try_issue_directly(rq, cookie, false);
+ __blk_mq_try_issue_directly(hctx, rq, cookie, false);
rcu_read_unlock();
} else {
unsigned int srcu_idx;
@@ -1455,7 +1461,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
might_sleep();
srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
- __blk_mq_try_issue_directly(rq, cookie, true);
+ __blk_mq_try_issue_directly(hctx, rq, cookie, true);
srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
}
}
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 01/11] blk-mq: fix direct issue
2017-06-05 15:59 ` [PATCH v4 01/11] blk-mq: fix direct issue Ming Lei
@ 2017-06-05 21:25 ` Bart Van Assche
2017-06-06 4:21 ` Ming Lei
0 siblings, 1 reply; 22+ messages in thread
From: Bart Van Assche @ 2017-06-05 21:25 UTC (permalink / raw)
To: hch@infradead.org, linux-block@vger.kernel.org, axboe@fb.com,
ming.lei@redhat.com
On Mon, 2017-06-05 at 23:59 +0800, Ming Lei wrote:
> +static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
> + struct request *rq,
> + blk_qc_t *cookie, bool may_sleep)
> {
> struct request_queue *q =3D rq->q;
> struct blk_mq_queue_data bd =3D {
> .rq =3D rq,
> .last =3D true,
> };
> - struct blk_mq_hw_ctx *hctx;
> blk_qc_t new_cookie;
> int ret;
> + bool run_queue =3D true;
> +
> + if (blk_mq_hctx_stopped(hctx)) {
> + run_queue =3D false;
> + goto insert;
> + }
> =20
> if (q->elevator)
> goto insert;
> =20
> - if (!blk_mq_get_driver_tag(rq, &hctx, false))
> + if (!blk_mq_get_driver_tag(rq, NULL, false))
> goto insert;
> =20
> new_cookie =3D request_to_qc_t(hctx, rq);
> @@ -1439,7 +1445,7 @@ static void __blk_mq_try_issue_directly(struct requ=
est *rq, blk_qc_t *cookie,
> =20
> __blk_mq_requeue_request(rq);
> insert:
> - blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
> + blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
> }
> =20
> static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
> @@ -1447,7 +1453,7 @@ static void blk_mq_try_issue_directly(struct blk_mq=
_hw_ctx *hctx,
> {
> if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
> rcu_read_lock();
> - __blk_mq_try_issue_directly(rq, cookie, false);
> + __blk_mq_try_issue_directly(hctx, rq, cookie, false);
> rcu_read_unlock();
> } else {
> unsigned int srcu_idx;
> @@ -1455,7 +1461,7 @@ static void blk_mq_try_issue_directly(struct blk_mq=
_hw_ctx *hctx,
> might_sleep();
> =20
> srcu_idx =3D srcu_read_lock(&hctx->queue_rq_srcu);
> - __blk_mq_try_issue_directly(rq, cookie, true);
> + __blk_mq_try_issue_directly(hctx, rq, cookie, true);
> srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
> }
> }
Hello Ming,
It seems like you are assuming that the hardware queue of the rq argument
passed to __blk_mq_try_issue_directly() matches the hctx argument? Sorry
but I think that's an incorrect assumption. Please have a look at the
following code in blk_mq_make_request():
if (same_queue_rq)
blk_mq_try_issue_directly(data.hctx, same_queue_rq,
&cookie);
Bart.=
^ permalink raw reply [flat|nested] 22+ messages in thread* Re: [PATCH v4 01/11] blk-mq: fix direct issue
2017-06-05 21:25 ` Bart Van Assche
@ 2017-06-06 4:21 ` Ming Lei
0 siblings, 0 replies; 22+ messages in thread
From: Ming Lei @ 2017-06-06 4:21 UTC (permalink / raw)
To: Bart Van Assche
Cc: hch@infradead.org, linux-block@vger.kernel.org, axboe@fb.com
On Mon, Jun 05, 2017 at 09:25:29PM +0000, Bart Van Assche wrote:
> On Mon, 2017-06-05 at 23:59 +0800, Ming Lei wrote:
> > +static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
> > + struct request *rq,
> > + blk_qc_t *cookie, bool may_sleep)
> > {
> > struct request_queue *q = rq->q;
> > struct blk_mq_queue_data bd = {
> > .rq = rq,
> > .last = true,
> > };
> > - struct blk_mq_hw_ctx *hctx;
> > blk_qc_t new_cookie;
> > int ret;
> > + bool run_queue = true;
> > +
> > + if (blk_mq_hctx_stopped(hctx)) {
> > + run_queue = false;
> > + goto insert;
> > + }
> >
> > if (q->elevator)
> > goto insert;
> >
> > - if (!blk_mq_get_driver_tag(rq, &hctx, false))
> > + if (!blk_mq_get_driver_tag(rq, NULL, false))
> > goto insert;
> >
> > new_cookie = request_to_qc_t(hctx, rq);
> > @@ -1439,7 +1445,7 @@ static void __blk_mq_try_issue_directly(struct request *rq, blk_qc_t *cookie,
> >
> > __blk_mq_requeue_request(rq);
> > insert:
> > - blk_mq_sched_insert_request(rq, false, true, false, may_sleep);
> > + blk_mq_sched_insert_request(rq, false, run_queue, false, may_sleep);
> > }
> >
> > static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
> > @@ -1447,7 +1453,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
> > {
> > if (!(hctx->flags & BLK_MQ_F_BLOCKING)) {
> > rcu_read_lock();
> > - __blk_mq_try_issue_directly(rq, cookie, false);
> > + __blk_mq_try_issue_directly(hctx, rq, cookie, false);
> > rcu_read_unlock();
> > } else {
> > unsigned int srcu_idx;
> > @@ -1455,7 +1461,7 @@ static void blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
> > might_sleep();
> >
> > srcu_idx = srcu_read_lock(&hctx->queue_rq_srcu);
> > - __blk_mq_try_issue_directly(rq, cookie, true);
> > + __blk_mq_try_issue_directly(hctx, rq, cookie, true);
> > srcu_read_unlock(&hctx->queue_rq_srcu, srcu_idx);
> > }
> > }
>
> Hello Ming,
>
> It seems like you are assuming that the hardware queue of the rq argument
> passed to __blk_mq_try_issue_directly() matches the hctx argument? Sorry
> but I think that's an incorrect assumption. Please have a look at the
> following code in blk_mq_make_request():
>
> if (same_queue_rq)
> blk_mq_try_issue_directly(data.hctx, same_queue_rq,
> &cookie);
IMO it is a bug which need to be fixed, since
blk_mq_try_issue_directly() may take a wrong SRCU lock
and performance can be hurt under this situation.
Thanks,
Ming
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 02/11] blk-mq: move blk_mq_quiesce_queue() into include/linux/blk-mq.h
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
2017-06-05 15:59 ` [PATCH v4 01/11] blk-mq: fix direct issue Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 15:59 ` [PATCH v4 03/11] blk-mq: introduce blk_mq_quiesce_queue_nowait() Ming Lei
` (8 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig; +Cc: Bart Van Assche, Ming Lei
We usually put blk_mq_*() into include/linux/blk-mq.h, so
move this API into there.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
include/linux/blk-mq.h | 1 +
include/linux/blkdev.h | 1 -
2 files changed, 1 insertion(+), 1 deletion(-)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fcd641032f8d..1e480015d172 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -247,6 +247,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
void blk_mq_start_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
+void blk_mq_quiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 019f18c65098..7178ad6805e3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -966,7 +966,6 @@ extern void __blk_run_queue(struct request_queue *q);
extern void __blk_run_queue_uncond(struct request_queue *q);
extern void blk_run_queue(struct request_queue *);
extern void blk_run_queue_async(struct request_queue *q);
-extern void blk_mq_quiesce_queue(struct request_queue *q);
extern int blk_rq_map_user(struct request_queue *, struct request *,
struct rq_map_data *, void __user *, unsigned long,
gfp_t);
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v4 03/11] blk-mq: introduce blk_mq_quiesce_queue_nowait()
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
2017-06-05 15:59 ` [PATCH v4 01/11] blk-mq: fix direct issue Ming Lei
2017-06-05 15:59 ` [PATCH v4 02/11] blk-mq: move blk_mq_quiesce_queue() into include/linux/blk-mq.h Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 21:29 ` Bart Van Assche
2017-06-05 15:59 ` [PATCH v4 04/11] blk-mq: introduce blk_mq_unquiesce_queue Ming Lei
` (7 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig; +Cc: Bart Van Assche, Ming Lei
This patch introduces blk_mq_quiesce_queue_nowait() so
that we can workaround mpt3sas for quiescing its queue.
Once mpt3sas is fixed, we can remove this helper.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
include/linux/blk-mq.h | 8 ++++++++
1 file changed, 8 insertions(+)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 1e480015d172..13d1196f492f 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -266,6 +266,14 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set);
void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
/*
+ * FIXME: this helper is just for working around mpt3sas.
+ */
+static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
+{
+ blk_mq_stop_hw_queues(q);
+}
+
+/*
* Driver command data is immediately after the request. So subtract request
* size to get back to the original request, add request size to get the PDU.
*/
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 03/11] blk-mq: introduce blk_mq_quiesce_queue_nowait()
2017-06-05 15:59 ` [PATCH v4 03/11] blk-mq: introduce blk_mq_quiesce_queue_nowait() Ming Lei
@ 2017-06-05 21:29 ` Bart Van Assche
0 siblings, 0 replies; 22+ messages in thread
From: Bart Van Assche @ 2017-06-05 21:29 UTC (permalink / raw)
To: hch@infradead.org, linux-block@vger.kernel.org, axboe@fb.com,
ming.lei@redhat.com
On Mon, 2017-06-05 at 23:59 +0800, Ming Lei wrote:
> This patch introduces blk_mq_quiesce_queue_nowait() so
> that we can workaround mpt3sas for quiescing its queue.
>=20
> Once mpt3sas is fixed, we can remove this helper.
>=20
> Signed-off-by: Ming Lei <ming.lei@redhat.com>
> ---
> include/linux/blk-mq.h | 8 ++++++++
> 1 file changed, 8 insertions(+)
>=20
> diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
> index 1e480015d172..13d1196f492f 100644
> --- a/include/linux/blk-mq.h
> +++ b/include/linux/blk-mq.h
> @@ -266,6 +266,14 @@ int blk_mq_map_queues(struct blk_mq_tag_set *set);
> void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_qu=
eues);
> =20
> /*
> + * FIXME: this helper is just for working around mpt3sas.
> + */
> +static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
> +{
> + blk_mq_stop_hw_queues(q);
> +}
> +
> +/*
> * Driver command data is immediately after the request. So subtract req=
uest
> * size to get back to the original request, add request size to get the=
PDU.
> */
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>=
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 04/11] blk-mq: introduce blk_mq_unquiesce_queue
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
` (2 preceding siblings ...)
2017-06-05 15:59 ` [PATCH v4 03/11] blk-mq: introduce blk_mq_quiesce_queue_nowait() Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 21:31 ` Bart Van Assche
2017-06-05 15:59 ` [PATCH v4 05/11] blk-mq: use the introduced blk_mq_unquiesce_queue() Ming Lei
` (6 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig; +Cc: Bart Van Assche, Ming Lei
blk_mq_start_stopped_hw_queues() is used implictely
as counterpart of blk_mq_quiesce_queue() for unquiescing queue,
so we introduce blk_mq_unquiesce_queue() and make it
as counterpart of blk_mq_quiesce_queue() explicitely.
This function is for improving the current quiescing mechanism
in the following patches.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 13 +++++++++++++
include/linux/blk-mq.h | 1 +
2 files changed, 14 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2da2cb8d9ff4..068962789dc4 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -181,6 +181,19 @@ void blk_mq_quiesce_queue(struct request_queue *q)
}
EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
+/*
+ * blk_mq_unquiesce_queue() - counterpart of blk_mq_quiesce_queue()
+ * @q: request queue.
+ *
+ * This function recovers queue into the state before quiescing
+ * which is done by blk_mq_quiesce_queue.
+ */
+void blk_mq_unquiesce_queue(struct request_queue *q)
+{
+ blk_mq_start_stopped_hw_queues(q, true);
+}
+EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+
void blk_mq_wake_waiters(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 13d1196f492f..fc0fc45fc658 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -248,6 +248,7 @@ void blk_mq_start_hw_queues(struct request_queue *q);
void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
+void blk_mq_unquiesce_queue(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v4 05/11] blk-mq: use the introduced blk_mq_unquiesce_queue()
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
` (3 preceding siblings ...)
2017-06-05 15:59 ` [PATCH v4 04/11] blk-mq: introduce blk_mq_unquiesce_queue Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 21:31 ` Bart Van Assche
2017-06-05 15:59 ` [PATCH v4 06/11] nvme: host: unquiesce queue in nvme_kill_queues() Ming Lei
` (5 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig
Cc: Bart Van Assche, Ming Lei, linux-nvme, linux-scsi, dm-devel
blk_mq_unquiesce_queue() is used for unquiescing the
queue explicitly, so replace blk_mq_start_stopped_hw_queues()
with it.
For the scsi part, this patch takes Bart's suggestion to
switch to block quiesce/unquiesce API completely.
Cc: linux-nvme@lists.infradead.org
Cc: linux-scsi@vger.kernel.org
Cc: dm-devel@redhat.com
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/md/dm-rq.c | 2 +-
drivers/nvme/host/core.c | 2 +-
drivers/scsi/scsi_lib.c | 4 ++--
3 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b639fa7246ee..ea4029de077f 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -71,7 +71,7 @@ static void dm_old_start_queue(struct request_queue *q)
static void dm_mq_start_queue(struct request_queue *q)
{
- blk_mq_start_stopped_hw_queues(q, true);
+ blk_mq_unquiesce_queue(q);
blk_mq_kick_requeue_list(q);
}
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index a60926410438..c3f189e54d10 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2526,7 +2526,7 @@ void nvme_start_queues(struct nvme_ctrl *ctrl)
mutex_lock(&ctrl->namespaces_mutex);
list_for_each_entry(ns, &ctrl->namespaces, list) {
- blk_mq_start_stopped_hw_queues(ns->queue, true);
+ blk_mq_unquiesce_queue(ns->queue);
blk_mq_kick_requeue_list(ns->queue);
}
mutex_unlock(&ctrl->namespaces_mutex);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 884aaa84c2dd..d6df1dafdc9f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -2979,7 +2979,7 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
if (wait)
blk_mq_quiesce_queue(q);
else
- blk_mq_stop_hw_queues(q);
+ blk_mq_quiesce_queue_nowait(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_stop_queue(q);
@@ -3033,7 +3033,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
return -EINVAL;
if (q->mq_ops) {
- blk_mq_start_stopped_hw_queues(q, false);
+ blk_mq_unquiesce_queue(q);
} else {
spin_lock_irqsave(q->queue_lock, flags);
blk_start_queue(q);
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v4 06/11] nvme: host: unquiesce queue in nvme_kill_queues()
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
` (4 preceding siblings ...)
2017-06-05 15:59 ` [PATCH v4 05/11] blk-mq: use the introduced blk_mq_unquiesce_queue() Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 15:59 ` [PATCH v4 07/11] blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue Ming Lei
` (4 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig
Cc: Bart Van Assche, Ming Lei, linux-nvme
When nvme_kill_queues() is run, queues may be in
quiesced state, so we forcibly unquiesce queues to avoid
blocking dispatch, and I/O hang can be avoided in
remove path.
Peviously we use blk_mq_start_stopped_hw_queues() as
counterpart of blk_mq_quiesce_queue(), now we have
introduced blk_mq_unquiesce_queue(), so use it explicitly.
Cc: linux-nvme@lists.infradead.org
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
drivers/nvme/host/core.c | 6 ++++++
1 file changed, 6 insertions(+)
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index c3f189e54d10..d4e5fc95da4b 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2438,6 +2438,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
struct nvme_ns *ns;
mutex_lock(&ctrl->namespaces_mutex);
+
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ctrl->admin_q);
list_for_each_entry(ns, &ctrl->namespaces, list) {
/*
* Revalidating a dead namespace sets capacity to 0. This will
@@ -2448,6 +2451,9 @@ void nvme_kill_queues(struct nvme_ctrl *ctrl)
revalidate_disk(ns->disk);
blk_set_queue_dying(ns->queue);
+ /* Forcibly unquiesce queues to avoid blocking dispatch */
+ blk_mq_unquiesce_queue(ns->queue);
+
/*
* Forcibly start all queues to avoid having stuck requests.
* Note that we must ensure the queues are not stopped
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v4 07/11] blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
` (5 preceding siblings ...)
2017-06-05 15:59 ` [PATCH v4 06/11] nvme: host: unquiesce queue in nvme_kill_queues() Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 21:32 ` Bart Van Assche
2017-06-05 15:59 ` [PATCH v4 08/11] blk-mq: update comments on blk_mq_quiesce_queue() Ming Lei
` (3 subsequent siblings)
10 siblings, 1 reply; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig; +Cc: Bart Van Assche, Ming Lei
It is required that no dispatch can happen any more once
blk_mq_quiesce_queue() returns, and we don't have such requirement
on APIs of stopping queue.
But blk_mq_quiesce_queue() still may not block/drain dispatch in the
the case of BLK_MQ_S_START_ON_RUN, so use the new introduced flag of
QUEUE_FLAG_QUIESCED and evaluate it inside RCU read-side critical
sections for fixing this issue.
Also blk_mq_quiesce_queue() is implemented via stopping queue, which
limits its uses, and easy to cause race, because any queue restart in
other paths may break blk_mq_quiesce_queue(). With the introduced
flag of QUEUE_FLAG_QUIESCED, we don't need to depend on stopping queue
for quiescing any more.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq-sched.c | 3 ++-
block/blk-mq.c | 11 ++++++++++-
include/linux/blk-mq.h | 4 ++++
include/linux/blkdev.h | 2 ++
4 files changed, 18 insertions(+), 2 deletions(-)
diff --git a/block/blk-mq-sched.c b/block/blk-mq-sched.c
index c4e2afb9d12d..ec9885df324c 100644
--- a/block/blk-mq-sched.c
+++ b/block/blk-mq-sched.c
@@ -141,7 +141,8 @@ void blk_mq_sched_dispatch_requests(struct blk_mq_hw_ctx *hctx)
bool did_work = false;
LIST_HEAD(rq_list);
- if (unlikely(blk_mq_hctx_stopped(hctx)))
+ /* RCU or SRCU read lock is needed before checking quiesced flag */
+ if (unlikely(blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)))
return;
hctx->run++;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 068962789dc4..be0b76408a73 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -170,6 +170,10 @@ void blk_mq_quiesce_queue(struct request_queue *q)
__blk_mq_stop_hw_queues(q, true);
+ spin_lock_irq(q->queue_lock);
+ queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+ spin_unlock_irq(q->queue_lock);
+
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
synchronize_srcu(&hctx->queue_rq_srcu);
@@ -190,6 +194,10 @@ EXPORT_SYMBOL_GPL(blk_mq_quiesce_queue);
*/
void blk_mq_unquiesce_queue(struct request_queue *q)
{
+ spin_lock_irq(q->queue_lock);
+ queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
+ spin_unlock_irq(q->queue_lock);
+
blk_mq_start_stopped_hw_queues(q, true);
}
EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
@@ -1426,7 +1434,8 @@ static void __blk_mq_try_issue_directly(struct blk_mq_hw_ctx *hctx,
int ret;
bool run_queue = true;
- if (blk_mq_hctx_stopped(hctx)) {
+ /* RCU or SRCU read lock is needed before checking quiesced flag */
+ if (blk_mq_hctx_stopped(hctx) || blk_queue_quiesced(q)) {
run_queue = false;
goto insert;
}
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index fc0fc45fc658..dc96ce3f5425 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -271,6 +271,10 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues);
*/
static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
{
+ spin_lock_irq(q->queue_lock);
+ queue_flag_set(QUEUE_FLAG_QUIESCED, q);
+ spin_unlock_irq(q->queue_lock);
+
blk_mq_stop_hw_queues(q);
}
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7178ad6805e3..7da799a88244 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -619,6 +619,7 @@ struct request_queue {
#define QUEUE_FLAG_POLL_STATS 28 /* collecting stats for hybrid polling */
#define QUEUE_FLAG_REGISTERED 29 /* queue has been registered to a disk */
#define QUEUE_FLAG_SCSI_PASSTHROUGH 30 /* queue supports SCSI commands */
+#define QUEUE_FLAG_QUIESCED 31 /* queue has been quiesced */
#define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \
(1 << QUEUE_FLAG_STACKABLE) | \
@@ -715,6 +716,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q)
#define blk_noretry_request(rq) \
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
REQ_FAILFAST_DRIVER))
+#define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
static inline bool blk_account_rq(struct request *rq)
{
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 07/11] blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue
2017-06-05 15:59 ` [PATCH v4 07/11] blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue Ming Lei
@ 2017-06-05 21:32 ` Bart Van Assche
0 siblings, 0 replies; 22+ messages in thread
From: Bart Van Assche @ 2017-06-05 21:32 UTC (permalink / raw)
To: hch@infradead.org, linux-block@vger.kernel.org, axboe@fb.com,
ming.lei@redhat.com
On Mon, 2017-06-05 at 23:59 +0800, Ming Lei wrote:
> It is required that no dispatch can happen any more once
> blk_mq_quiesce_queue() returns, and we don't have such requirement
> on APIs of stopping queue.
>=20
> But blk_mq_quiesce_queue() still may not block/drain dispatch in the
> the case of BLK_MQ_S_START_ON_RUN, so use the new introduced flag of
> QUEUE_FLAG_QUIESCED and evaluate it inside RCU read-side critical
> sections for fixing this issue.
>=20
> Also blk_mq_quiesce_queue() is implemented via stopping queue, which
> limits its uses, and easy to cause race, because any queue restart in
> other paths may break blk_mq_quiesce_queue(). With the introduced
> flag of QUEUE_FLAG_QUIESCED, we don't need to depend on stopping queue
> for quiescing any more.
Hello Ming,
Since this patch depends on patch 1 I will wait with reviewing this patch
until there is agreement about patch 1 in this series.
Bart.=
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 08/11] blk-mq: update comments on blk_mq_quiesce_queue()
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
` (6 preceding siblings ...)
2017-06-05 15:59 ` [PATCH v4 07/11] blk-mq: use QUEUE_FLAG_QUIESCED to quiesce queue Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 15:59 ` [PATCH v4 09/11] blk-mq: don't stop queue for quiescing Ming Lei
` (2 subsequent siblings)
10 siblings, 0 replies; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig; +Cc: Bart Van Assche, Ming Lei
Actually what we want to get from blk_mq_quiesce_queue()
isn't only to wait for completion of all ongoing .queue_rq().
In the typical context of canceling requests, we need to
make sure that the following is done in the dispatch path
before starting to cancel requests:
- failed dispatched request is finished
- busy dispatched request is requeued, and the STARTED
flag is cleared
So update comment to keep code, doc and our expection consistent.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 7 ++++---
1 file changed, 4 insertions(+), 3 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index be0b76408a73..c2177e8713ec 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -155,12 +155,13 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
/**
- * blk_mq_quiesce_queue() - wait until all ongoing queue_rq calls have finished
+ * blk_mq_quiesce_queue() - wait until all ongoing dispatches have finished
* @q: request queue.
*
* Note: this function does not prevent that the struct request end_io()
- * callback function is invoked. Additionally, it is not prevented that
- * new queue_rq() calls occur unless the queue has been stopped first.
+ * callback function is invoked. Once this function is returned, we make
+ * sure no dispatch can happen until the queue is unquiesced via
+ * blk_mq_unquiesce_queue().
*/
void blk_mq_quiesce_queue(struct request_queue *q)
{
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* [PATCH v4 09/11] blk-mq: don't stop queue for quiescing
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
` (7 preceding siblings ...)
2017-06-05 15:59 ` [PATCH v4 08/11] blk-mq: update comments on blk_mq_quiesce_queue() Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 21:36 ` Bart Van Assche
2017-06-05 15:59 ` [PATCH v4 10/11] blk-mq: clarify dispatch may not be drained/blocked by stopping queue Ming Lei
2017-06-05 15:59 ` [PATCH v4 11/11] Revert "blk-mq: don't use sync workqueue flushing from drivers" Ming Lei
10 siblings, 1 reply; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig; +Cc: Bart Van Assche, Ming Lei
Queue can be started by other blk-mq APIs and can be used in
different cases, this limits uses of blk_mq_quiesce_queue()
if it is based on stopping queue, and make its usage very
difficult, especially users have to use the stop queue APIs
carefully for avoiding to break blk_mq_quiesce_queue().
We have applied the QUIESCED flag for draining and blocking
dispatch, so it isn't necessary to stop queue any more.
After stopping queue is removed, blk_mq_quiesce_queue() can
be used safely and easily, then users won't worry about queue
restarting during quiescing at all.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 9 +++------
include/linux/blk-mq.h | 2 --
2 files changed, 3 insertions(+), 8 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index c2177e8713ec..2788cacdaa1e 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -169,11 +169,7 @@ void blk_mq_quiesce_queue(struct request_queue *q)
unsigned int i;
bool rcu = false;
- __blk_mq_stop_hw_queues(q, true);
-
- spin_lock_irq(q->queue_lock);
- queue_flag_set(QUEUE_FLAG_QUIESCED, q);
- spin_unlock_irq(q->queue_lock);
+ blk_mq_quiesce_queue_nowait(q);
queue_for_each_hw_ctx(q, hctx, i) {
if (hctx->flags & BLK_MQ_F_BLOCKING)
@@ -199,7 +195,8 @@ void blk_mq_unquiesce_queue(struct request_queue *q)
queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
spin_unlock_irq(q->queue_lock);
- blk_mq_start_stopped_hw_queues(q, true);
+ /* dispatch requests which are inserted during quiescing */
+ blk_mq_run_hw_queues(q, true);
}
EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index dc96ce3f5425..105d70630947 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -274,8 +274,6 @@ static inline void blk_mq_quiesce_queue_nowait(struct request_queue *q)
spin_lock_irq(q->queue_lock);
queue_flag_set(QUEUE_FLAG_QUIESCED, q);
spin_unlock_irq(q->queue_lock);
-
- blk_mq_stop_hw_queues(q);
}
/*
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 09/11] blk-mq: don't stop queue for quiescing
2017-06-05 15:59 ` [PATCH v4 09/11] blk-mq: don't stop queue for quiescing Ming Lei
@ 2017-06-05 21:36 ` Bart Van Assche
0 siblings, 0 replies; 22+ messages in thread
From: Bart Van Assche @ 2017-06-05 21:36 UTC (permalink / raw)
To: hch@infradead.org, linux-block@vger.kernel.org, axboe@fb.com,
ming.lei@redhat.com
On Mon, 2017-06-05 at 23:59 +0800, Ming Lei wrote:
> @@ -169,11 +169,7 @@ void blk_mq_quiesce_queue(struct request_queue *q)
> unsigned int i;
> bool rcu =3D false;
> =20
> - __blk_mq_stop_hw_queues(q, true);
> -
> - spin_lock_irq(q->queue_lock);
> - queue_flag_set(QUEUE_FLAG_QUIESCED, q);
> - spin_unlock_irq(q->queue_lock);
> + blk_mq_quiesce_queue_nowait(q);
> =20
Although I would have preferred to keep blk_mq_quiesce_nowait() open-coded =
in
this function,
Reviewed-by: Bart Van Assche <Bart.VanAssche@sandisk.com>=
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 10/11] blk-mq: clarify dispatch may not be drained/blocked by stopping queue
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
` (8 preceding siblings ...)
2017-06-05 15:59 ` [PATCH v4 09/11] blk-mq: don't stop queue for quiescing Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 23:55 ` Bart Van Assche
2017-06-05 15:59 ` [PATCH v4 11/11] Revert "blk-mq: don't use sync workqueue flushing from drivers" Ming Lei
10 siblings, 1 reply; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig; +Cc: Bart Van Assche, Ming Lei
BLK_MQ_S_STOPPED may not be observed in other concurrent I/O paths,
we can't guarantee that dispatching won't happen after returning
from the APIs of stopping queue.
So clarify the fact and avoid potential misuse.
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2788cacdaa1e..d3e24d600246 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1186,6 +1186,11 @@ static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
set_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
+/*
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queue() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{
__blk_mq_stop_hw_queue(hctx, false);
@@ -1201,6 +1206,11 @@ static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
__blk_mq_stop_hw_queue(hctx, sync);
}
+/*
+ * We do not guarantee that dispatch can be drained or blocked
+ * after blk_mq_stop_hw_queues() returns. Please use
+ * blk_mq_quiesce_queue() for that requirement.
+ */
void blk_mq_stop_hw_queues(struct request_queue *q)
{
__blk_mq_stop_hw_queues(q, false);
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread* Re: [PATCH v4 10/11] blk-mq: clarify dispatch may not be drained/blocked by stopping queue
2017-06-05 15:59 ` [PATCH v4 10/11] blk-mq: clarify dispatch may not be drained/blocked by stopping queue Ming Lei
@ 2017-06-05 23:55 ` Bart Van Assche
2017-06-06 12:48 ` Ming Lei
0 siblings, 1 reply; 22+ messages in thread
From: Bart Van Assche @ 2017-06-05 23:55 UTC (permalink / raw)
To: hch@infradead.org, linux-block@vger.kernel.org, axboe@fb.com,
ming.lei@redhat.com
On Mon, 2017-06-05 at 23:59 +0800, Ming Lei wrote:
> +/*
> + * We do not guarantee that dispatch can be drained or blocked
> + * after blk_mq_stop_hw_queue() returns. Please use
> + * blk_mq_quiesce_queue() for that requirement.
> + */
Hello Ming,
This is comment explains what blk_mq_stop_hw_queue() should not be used for
and may leave the reader wondering what it is useful for. How about mention=
ing
first that this function is useful to pause .queue_rq() calls as long as th=
e
block driver it will have to return BUSY if .queue_rq() is called?
Bart.=
^ permalink raw reply [flat|nested] 22+ messages in thread
* Re: [PATCH v4 10/11] blk-mq: clarify dispatch may not be drained/blocked by stopping queue
2017-06-05 23:55 ` Bart Van Assche
@ 2017-06-06 12:48 ` Ming Lei
0 siblings, 0 replies; 22+ messages in thread
From: Ming Lei @ 2017-06-06 12:48 UTC (permalink / raw)
To: Bart Van Assche
Cc: hch@infradead.org, linux-block@vger.kernel.org, axboe@fb.com
On Mon, Jun 05, 2017 at 11:55:20PM +0000, Bart Van Assche wrote:
> On Mon, 2017-06-05 at 23:59 +0800, Ming Lei wrote:
> > +/*
> > + * We do not guarantee that dispatch can be drained or blocked
> > + * after blk_mq_stop_hw_queue() returns. Please use
> > + * blk_mq_quiesce_queue() for that requirement.
> > + */
>
> Hello Ming,
>
> This is comment explains what blk_mq_stop_hw_queue() should not be used for
> and may leave the reader wondering what it is useful for. How about mentioning
> first that this function is useful to pause .queue_rq() calls as long as the
> block driver it will have to return BUSY if .queue_rq() is called?
OK, will make that as one example.
Thanks,
Ming
^ permalink raw reply [flat|nested] 22+ messages in thread
* [PATCH v4 11/11] Revert "blk-mq: don't use sync workqueue flushing from drivers"
2017-06-05 15:59 [PATCH v4 00/11] blk-mq: fix & improve queue quiescing Ming Lei
` (9 preceding siblings ...)
2017-06-05 15:59 ` [PATCH v4 10/11] blk-mq: clarify dispatch may not be drained/blocked by stopping queue Ming Lei
@ 2017-06-05 15:59 ` Ming Lei
2017-06-05 21:39 ` Bart Van Assche
10 siblings, 1 reply; 22+ messages in thread
From: Ming Lei @ 2017-06-05 15:59 UTC (permalink / raw)
To: Jens Axboe, linux-block, Christoph Hellwig; +Cc: Bart Van Assche, Ming Lei
This patch reverts commit 2719aa217e0d02(blk-mq: don't use
sync workqueue flushing from drivers) because only
blk_mq_quiesce_queue() need the sync flush, and now
we don't need to stop queue any more, so revert it.
Also changes to cancel_delayed_work() in blk_mq_stop_hw_queue().
Signed-off-by: Ming Lei <ming.lei@redhat.com>
---
block/blk-mq.c | 30 ++++++++----------------------
1 file changed, 8 insertions(+), 22 deletions(-)
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d3e24d600246..aa707ca75355 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -42,7 +42,6 @@ static LIST_HEAD(all_q_list);
static void blk_mq_poll_stats_start(struct request_queue *q);
static void blk_mq_poll_stats_fn(struct blk_stat_callback *cb);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync);
static int blk_mq_poll_stats_bkt(const struct request *rq)
{
@@ -1176,16 +1175,6 @@ bool blk_mq_queue_stopped(struct request_queue *q)
}
EXPORT_SYMBOL(blk_mq_queue_stopped);
-static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
-{
- if (sync)
- cancel_delayed_work_sync(&hctx->run_work);
- else
- cancel_delayed_work(&hctx->run_work);
-
- set_bit(BLK_MQ_S_STOPPED, &hctx->state);
-}
-
/*
* We do not guarantee that dispatch can be drained or blocked
* after blk_mq_stop_hw_queue() returns. Please use
@@ -1193,18 +1182,11 @@ static void __blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx, bool sync)
*/
void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx)
{
- __blk_mq_stop_hw_queue(hctx, false);
-}
-EXPORT_SYMBOL(blk_mq_stop_hw_queue);
+ cancel_delayed_work(&hctx->run_work);
-static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
-{
- struct blk_mq_hw_ctx *hctx;
- int i;
-
- queue_for_each_hw_ctx(q, hctx, i)
- __blk_mq_stop_hw_queue(hctx, sync);
+ set_bit(BLK_MQ_S_STOPPED, &hctx->state);
}
+EXPORT_SYMBOL(blk_mq_stop_hw_queue);
/*
* We do not guarantee that dispatch can be drained or blocked
@@ -1213,7 +1195,11 @@ static void __blk_mq_stop_hw_queues(struct request_queue *q, bool sync)
*/
void blk_mq_stop_hw_queues(struct request_queue *q)
{
- __blk_mq_stop_hw_queues(q, false);
+ struct blk_mq_hw_ctx *hctx;
+ int i;
+
+ queue_for_each_hw_ctx(q, hctx, i)
+ blk_mq_stop_hw_queue(hctx);
}
EXPORT_SYMBOL(blk_mq_stop_hw_queues);
--
2.9.4
^ permalink raw reply related [flat|nested] 22+ messages in thread