* [PATCH v2 1/4] blk-mq: Rename request_queue.mq_freeze_wq into mq_wq
2018-01-10 18:18 [PATCH v2 0/4] Make SCSI transport recovery more robust Bart Van Assche
@ 2018-01-10 18:18 ` Bart Van Assche
2018-01-10 18:18 ` [PATCH v2 2/4] block: Introduce blk_start_wait_if_quiesced() and blk_finish_wait_if_quiesced() Bart Van Assche
` (2 subsequent siblings)
3 siblings, 0 replies; 10+ messages in thread
From: Bart Van Assche @ 2018-01-10 18:18 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, Martin K . Petersen, Christoph Hellwig,
Hannes Reinecke, Jason Gunthorpe, Doug Ledford, linux-scsi,
linux-rdma, Bart Van Assche, Johannes Thumshirn, Ming Lei
Rename a waitqueue in struct request_queue since the next patch will
add code that uses this waitqueue outside the request queue freezing
implementation.
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
---
block/blk-core.c | 10 +++++-----
block/blk-mq.c | 10 +++++-----
include/linux/blkdev.h | 2 +-
3 files changed, 11 insertions(+), 11 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index fa1cb95f7f6a..c10b4ce95248 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -377,7 +377,7 @@ void blk_clear_preempt_only(struct request_queue *q)
spin_lock_irqsave(q->queue_lock, flags);
queue_flag_clear(QUEUE_FLAG_PREEMPT_ONLY, q);
- wake_up_all(&q->mq_freeze_wq);
+ wake_up_all(&q->mq_wq);
spin_unlock_irqrestore(q->queue_lock, flags);
}
EXPORT_SYMBOL_GPL(blk_clear_preempt_only);
@@ -648,7 +648,7 @@ void blk_set_queue_dying(struct request_queue *q)
}
/* Make blk_queue_enter() reexamine the DYING flag. */
- wake_up_all(&q->mq_freeze_wq);
+ wake_up_all(&q->mq_wq);
}
EXPORT_SYMBOL_GPL(blk_set_queue_dying);
@@ -851,7 +851,7 @@ int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags)
*/
smp_rmb();
- ret = wait_event_interruptible(q->mq_freeze_wq,
+ ret = wait_event_interruptible(q->mq_wq,
(atomic_read(&q->mq_freeze_depth) == 0 &&
(preempt || !blk_queue_preempt_only(q))) ||
blk_queue_dying(q));
@@ -872,7 +872,7 @@ static void blk_queue_usage_counter_release(struct percpu_ref *ref)
struct request_queue *q =
container_of(ref, struct request_queue, q_usage_counter);
- wake_up_all(&q->mq_freeze_wq);
+ wake_up_all(&q->mq_wq);
}
static void blk_rq_timed_out_timer(struct timer_list *t)
@@ -948,7 +948,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id)
q->bypass_depth = 1;
__set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags);
- init_waitqueue_head(&q->mq_freeze_wq);
+ init_waitqueue_head(&q->mq_wq);
/*
* Init percpu_ref in atomic mode so that it's faster to shutdown.
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 29f140b4dbf7..a05ea7e9b415 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -137,16 +137,16 @@ EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
void blk_mq_freeze_queue_wait(struct request_queue *q)
{
- wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->q_usage_counter));
+ wait_event(q->mq_wq, percpu_ref_is_zero(&q->q_usage_counter));
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait);
int blk_mq_freeze_queue_wait_timeout(struct request_queue *q,
unsigned long timeout)
{
- return wait_event_timeout(q->mq_freeze_wq,
- percpu_ref_is_zero(&q->q_usage_counter),
- timeout);
+ return wait_event_timeout(q->mq_wq,
+ percpu_ref_is_zero(&q->q_usage_counter),
+ timeout);
}
EXPORT_SYMBOL_GPL(blk_mq_freeze_queue_wait_timeout);
@@ -185,7 +185,7 @@ void blk_mq_unfreeze_queue(struct request_queue *q)
WARN_ON_ONCE(freeze_depth < 0);
if (!freeze_depth) {
percpu_ref_reinit(&q->q_usage_counter);
- wake_up_all(&q->mq_freeze_wq);
+ wake_up_all(&q->mq_wq);
}
}
EXPORT_SYMBOL_GPL(blk_mq_unfreeze_queue);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50fb1c18ec54..2c74c03a9d5f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -638,7 +638,7 @@ struct request_queue {
struct throtl_data *td;
#endif
struct rcu_head rcu_head;
- wait_queue_head_t mq_freeze_wq;
+ wait_queue_head_t mq_wq;
struct percpu_ref q_usage_counter;
struct list_head all_q_node;
--
2.15.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 2/4] block: Introduce blk_start_wait_if_quiesced() and blk_finish_wait_if_quiesced()
2018-01-10 18:18 [PATCH v2 0/4] Make SCSI transport recovery more robust Bart Van Assche
2018-01-10 18:18 ` [PATCH v2 1/4] blk-mq: Rename request_queue.mq_freeze_wq into mq_wq Bart Van Assche
@ 2018-01-10 18:18 ` Bart Van Assche
2018-01-10 18:18 ` [PATCH v2 3/4] scsi: Avoid that .queuecommand() gets called for a quiesced SCSI device Bart Van Assche
2018-01-10 18:18 ` [PATCH v2 4/4] IB/srp: Fix a sleep-in-invalid-context bug Bart Van Assche
3 siblings, 0 replies; 10+ messages in thread
From: Bart Van Assche @ 2018-01-10 18:18 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, Martin K . Petersen, Christoph Hellwig,
Hannes Reinecke, Jason Gunthorpe, Doug Ledford, linux-scsi,
linux-rdma, Bart Van Assche, Hannes Reinecke, Johannes Thumshirn,
Ming Lei
Introduce functions that allow block drivers to wait while a request
queue is in the quiesced state (blk-mq) or in the stopped state (legacy
block layer). The next patch will add calls to these functions in the
SCSI core.
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Hannes Reinecke <hare@suse.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
---
block/blk-core.c | 1 +
block/blk-mq.c | 64 ++++++++++++++++++++++++++++++++++++++++++++++++++
include/linux/blk-mq.h | 2 ++
3 files changed, 67 insertions(+)
diff --git a/block/blk-core.c b/block/blk-core.c
index c10b4ce95248..06eaea15bae9 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -287,6 +287,7 @@ void blk_start_queue(struct request_queue *q)
WARN_ON_ONCE(q->mq_ops);
queue_flag_clear(QUEUE_FLAG_STOPPED, q);
+ wake_up_all(&q->mq_wq);
__blk_run_queue(q);
}
EXPORT_SYMBOL(blk_start_queue);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index a05ea7e9b415..87455977ad34 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -247,11 +247,75 @@ void blk_mq_unquiesce_queue(struct request_queue *q)
queue_flag_clear(QUEUE_FLAG_QUIESCED, q);
spin_unlock_irqrestore(q->queue_lock, flags);
+ wake_up_all(&q->mq_wq);
+
/* dispatch requests which are inserted during quiescing */
blk_mq_run_hw_queues(q, true);
}
EXPORT_SYMBOL_GPL(blk_mq_unquiesce_queue);
+/**
+ * blk_start_wait_if_quiesced() - wait if a queue is quiesced (blk-mq) or stopped (legacy block layer)
+ * @q: Request queue pointer.
+ *
+ * Some block drivers, e.g. the SCSI core, can bypass the block layer core
+ * request submission mechanism. Surround such code with
+ * blk_start_wait_if_quiesced() / blk_finish_wait_if_quiesced() to avoid that
+ * request submission can happen while a queue is quiesced or stopped.
+ *
+ * Returns with the RCU read lock held (blk-mq) or with q->queue_lock held
+ * (legacy block layer).
+ *
+ * Notes:
+ * - Every call of this function must be followed by a call of
+ * blk_finish_wait_if_quiesced().
+ * - This function does not support block drivers whose .queue_rq()
+ * implementation can sleep (BLK_MQ_F_BLOCKING).
+ */
+int blk_start_wait_if_quiesced(struct request_queue *q)
+{
+ struct blk_mq_hw_ctx *hctx;
+ unsigned int i;
+
+ might_sleep();
+
+ if (q->mq_ops) {
+ queue_for_each_hw_ctx(q, hctx, i)
+ WARN_ON(hctx->flags & BLK_MQ_F_BLOCKING);
+
+ rcu_read_lock();
+ while (!blk_queue_dying(q) && blk_queue_quiesced(q)) {
+ rcu_read_unlock();
+ wait_event(q->mq_wq, blk_queue_dying(q) ||
+ !blk_queue_quiesced(q));
+ rcu_read_lock();
+ }
+ } else {
+ spin_lock_irq(q->queue_lock);
+ wait_event_lock_irq(q->mq_wq,
+ blk_queue_dying(q) || !blk_queue_stopped(q),
+ *q->queue_lock);
+ q->request_fn_active++;
+ }
+ return blk_queue_dying(q) ? -ENODEV : 0;
+}
+EXPORT_SYMBOL(blk_start_wait_if_quiesced);
+
+/**
+ * blk_finish_wait_if_quiesced() - counterpart of blk_start_wait_if_quiesced()
+ * @q: Request queue pointer.
+ */
+void blk_finish_wait_if_quiesced(struct request_queue *q)
+{
+ if (q->mq_ops) {
+ rcu_read_unlock();
+ } else {
+ q->request_fn_active--;
+ spin_unlock_irq(q->queue_lock);
+ }
+}
+EXPORT_SYMBOL(blk_finish_wait_if_quiesced);
+
void blk_mq_wake_waiters(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 8efcf49796a3..15912cd348b5 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -267,6 +267,8 @@ void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
void blk_mq_quiesce_queue(struct request_queue *q);
void blk_mq_unquiesce_queue(struct request_queue *q);
+int blk_start_wait_if_quiesced(struct request_queue *q);
+void blk_finish_wait_if_quiesced(struct request_queue *q);
void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
void blk_mq_run_hw_queues(struct request_queue *q, bool async);
--
2.15.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* [PATCH v2 3/4] scsi: Avoid that .queuecommand() gets called for a quiesced SCSI device
2018-01-10 18:18 [PATCH v2 0/4] Make SCSI transport recovery more robust Bart Van Assche
2018-01-10 18:18 ` [PATCH v2 1/4] blk-mq: Rename request_queue.mq_freeze_wq into mq_wq Bart Van Assche
2018-01-10 18:18 ` [PATCH v2 2/4] block: Introduce blk_start_wait_if_quiesced() and blk_finish_wait_if_quiesced() Bart Van Assche
@ 2018-01-10 18:18 ` Bart Van Assche
2018-01-11 2:23 ` Ming Lei
2018-01-10 18:18 ` [PATCH v2 4/4] IB/srp: Fix a sleep-in-invalid-context bug Bart Van Assche
3 siblings, 1 reply; 10+ messages in thread
From: Bart Van Assche @ 2018-01-10 18:18 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, Martin K . Petersen, Christoph Hellwig,
Hannes Reinecke, Jason Gunthorpe, Doug Ledford, linux-scsi,
linux-rdma, Bart Van Assche, Johannes Thumshirn, Ming Lei
Several SCSI transport and LLD drivers surround code that does not
tolerate concurrent calls of .queuecommand() with scsi_target_block() /
scsi_target_unblock(). These last two functions use
blk_mq_quiesce_queue() / blk_mq_unquiesce_queue() for scsi-mq request
queues to prevent concurrent .queuecommand() calls. However, that is
not sufficient to prevent .queuecommand() calls from scsi_send_eh_cmnd().
Hence surround the .queuecommand() call from the SCSI error handler with
blk_start_wait_if_quiesced() / blk_finish_wait_if_quiesced().
Note: converting the .queuecommand() call in scsi_send_eh_cmnd() into
code that calls blk_get_request(), e.g. scsi_execute_req(), is not an
option since scsi_send_eh_cmnd() can be called if all requests are
allocated and if no requests will make progress without aborting any
of these requests.
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Cc: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Johannes Thumshirn <jthumshirn@suse.de>
Cc: Ming Lei <ming.lei@redhat.com>
---
drivers/scsi/scsi_error.c | 3 +++
1 file changed, 3 insertions(+)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 62b56de38ae8..f7154ea86715 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -1016,6 +1016,7 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
{
struct scsi_device *sdev = scmd->device;
struct Scsi_Host *shost = sdev->host;
+ struct request_queue *q = sdev->request_queue;
DECLARE_COMPLETION_ONSTACK(done);
unsigned long timeleft = timeout;
struct scsi_eh_save ses;
@@ -1028,7 +1029,9 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
scsi_log_send(scmd);
scmd->scsi_done = scsi_eh_done;
+ blk_start_wait_if_quiesced(q);
rtn = shost->hostt->queuecommand(shost, scmd);
+ blk_finish_wait_if_quiesced(q);
if (rtn) {
if (timeleft > stall_for) {
scsi_eh_restore_cmnd(scmd, &ses);
--
2.15.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2 3/4] scsi: Avoid that .queuecommand() gets called for a quiesced SCSI device
2018-01-10 18:18 ` [PATCH v2 3/4] scsi: Avoid that .queuecommand() gets called for a quiesced SCSI device Bart Van Assche
@ 2018-01-11 2:23 ` Ming Lei
2018-01-12 22:45 ` Bart Van Assche
0 siblings, 1 reply; 10+ messages in thread
From: Ming Lei @ 2018-01-11 2:23 UTC (permalink / raw)
To: Bart Van Assche
Cc: Jens Axboe, linux-block, Martin K . Petersen, Christoph Hellwig,
Hannes Reinecke, Jason Gunthorpe, Doug Ledford, linux-scsi,
linux-rdma, Johannes Thumshirn
On Wed, Jan 10, 2018 at 10:18:16AM -0800, Bart Van Assche wrote:
> Several SCSI transport and LLD drivers surround code that does not
> tolerate concurrent calls of .queuecommand() with scsi_target_block() /
> scsi_target_unblock(). These last two functions use
> blk_mq_quiesce_queue() / blk_mq_unquiesce_queue() for scsi-mq request
> queues to prevent concurrent .queuecommand() calls. However, that is
Actually blk_mq_quiesce_queue() is supposed to disable and drain dispatch,
not for prevent concurrent .queuecommand() calls.
> not sufficient to prevent .queuecommand() calls from scsi_send_eh_cmnd().
Given it is error handling, do we need to prevent the .queuecommand() call
in scsi_send_eh_cmnd()? Could you share us what the actual issue
observed is from user view?
> Hence surround the .queuecommand() call from the SCSI error handler with
> blk_start_wait_if_quiesced() / blk_finish_wait_if_quiesced().
>
> Note: converting the .queuecommand() call in scsi_send_eh_cmnd() into
> code that calls blk_get_request(), e.g. scsi_execute_req(), is not an
> option since scsi_send_eh_cmnd() can be called if all requests are
> allocated and if no requests will make progress without aborting any
> of these requests.
If we need to prevent the .queuecommand() in scsi_send_eh_cmnd(), what
do you think of the approach by requeuing the EH command via
scsi_mq_requeue_cmd()/scsi_requeue_cmd()? And the EH request will be
dispatched finally when the queue becomes unquiesced or the STOPPED
is cleared.
--
Ming
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 3/4] scsi: Avoid that .queuecommand() gets called for a quiesced SCSI device
2018-01-11 2:23 ` Ming Lei
@ 2018-01-12 22:45 ` Bart Van Assche
2018-01-13 15:54 ` Ming Lei
0 siblings, 1 reply; 10+ messages in thread
From: Bart Van Assche @ 2018-01-12 22:45 UTC (permalink / raw)
To: ming.lei@redhat.com
Cc: linux-block@vger.kernel.org, jthumshirn@suse.de,
linux-rdma@vger.kernel.org, jgg@mellanox.com, hch@lst.de,
martin.petersen@oracle.com, axboe@kernel.dk,
linux-scsi@vger.kernel.org, hare@suse.com, dledford@redhat.com
T24gVGh1LCAyMDE4LTAxLTExIGF0IDEwOjIzICswODAwLCBNaW5nIExlaSB3cm90ZToNCj4gPiBu
b3Qgc3VmZmljaWVudCB0byBwcmV2ZW50IC5xdWV1ZWNvbW1hbmQoKSBjYWxscyBmcm9tIHNjc2lf
c2VuZF9laF9jbW5kKCkuDQo+IA0KPiBHaXZlbiBpdCBpcyBlcnJvciBoYW5kbGluZywgZG8gd2Ug
bmVlZCB0byBwcmV2ZW50IHRoZSAucXVldWVjb21tYW5kKCkgY2FsbA0KPiBpbiBzY3NpX3NlbmRf
ZWhfY21uZCgpPyBDb3VsZCB5b3Ugc2hhcmUgdXMgd2hhdCB0aGUgYWN0dWFsIGlzc3VlDQo+IG9i
c2VydmVkIGlzIGZyb20gdXNlciB2aWV3Pw0KDQpQbGVhc2UgaGF2ZSBhIGxvb2sgYXQgdGhlIGtl
cm5lbCBidWcgcmVwb3J0IGluIHRoZSBkZXNjcmlwdGlvbiBvZiBwYXRjaCA0LzQgb2YNCnRoaXMg
c2VyaWVzLg0KDQo+ID4gSGVuY2Ugc3Vycm91bmQgdGhlIC5xdWV1ZWNvbW1hbmQoKSBjYWxsIGZy
b20gdGhlIFNDU0kgZXJyb3IgaGFuZGxlciB3aXRoDQo+ID4gYmxrX3N0YXJ0X3dhaXRfaWZfcXVp
ZXNjZWQoKSAvIGJsa19maW5pc2hfd2FpdF9pZl9xdWllc2NlZCgpLg0KPiA+IA0KPiA+IE5vdGU6
IGNvbnZlcnRpbmcgdGhlIC5xdWV1ZWNvbW1hbmQoKSBjYWxsIGluIHNjc2lfc2VuZF9laF9jbW5k
KCkgaW50bw0KPiA+IGNvZGUgdGhhdCBjYWxscyBibGtfZ2V0X3JlcXVlc3QoKSwgZS5nLiBzY3Np
X2V4ZWN1dGVfcmVxKCksIGlzIG5vdCBhbg0KPiA+IG9wdGlvbiBzaW5jZSBzY3NpX3NlbmRfZWhf
Y21uZCgpIGNhbiBiZSBjYWxsZWQgaWYgYWxsIHJlcXVlc3RzIGFyZQ0KPiA+IGFsbG9jYXRlZCBh
bmQgaWYgbm8gcmVxdWVzdHMgd2lsbCBtYWtlIHByb2dyZXNzIHdpdGhvdXQgYWJvcnRpbmcgYW55
DQo+ID4gb2YgdGhlc2UgcmVxdWVzdHMuDQo+IA0KPiBJZiB3ZSBuZWVkIHRvIHByZXZlbnQgdGhl
IC5xdWV1ZWNvbW1hbmQoKSBpbiBzY3NpX3NlbmRfZWhfY21uZCgpLCB3aGF0DQo+IGRvIHlvdSB0
aGluayBvZiB0aGUgYXBwcm9hY2ggYnkgcmVxdWV1aW5nIHRoZSBFSCBjb21tYW5kIHZpYQ0KPiBz
Y3NpX21xX3JlcXVldWVfY21kKCkvc2NzaV9yZXF1ZXVlX2NtZCgpPyBBbmQgdGhlIEVIIHJlcXVl
c3Qgd2lsbCBiZQ0KPiBkaXNwYXRjaGVkIGZpbmFsbHkgd2hlbiB0aGUgcXVldWUgYmVjb21lcyB1
bnF1aWVzY2VkIG9yIHRoZSBTVE9QUEVEDQo+IGlzIGNsZWFyZWQuDQoNCkNhbGxpbmcgc2NzaV9t
cV9yZXF1ZXVlX2NtZCgpIHdvdWxkIHRyaWdnZXIgc2NzaV9tcV91bmluaXRfY21kKCksDQpibGtf
bXFfcHV0X2RyaXZlcl90YWcoKSwgYmxrX21xX2dldF9kcml2ZXJfdGFnKCkgYW5kIHNjc2lfbXFf
cHJlcF9mbigpLg0KVGhhdCBjb3VsZCBjYXVzZSBzY3NpX2VoX3NjbWRfYWRkKCkgdG8gYmUgY2Fs
bGVkIG11bHRpcGxlIHRpbWVzIGZvciB0aGUNCnNhbWUgU0NTSSBjb21tYW5kLiBIb3dldmVyLCB0
aGF0IHdvdWxkIGJyZWFrIG9uZSBvZiB0aGUgYXNzdW1wdGlvbnMNCnNjc2lfZWhfc2NtZF9hZGQo
KSBpcyBiYXNlZCBvbiwgbmFtZWx5IHRoYXQgdGhhdCBmdW5jdGlvbiBnZXRzIGNhbGxlZA0Kb25s
eSBvbmNlIHBlciBTQ1NJIGNvbW1hbmQgdGhhdCBpcyBpbiBwcm9ncmVzcy4NCg0KQmFydC4g
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 3/4] scsi: Avoid that .queuecommand() gets called for a quiesced SCSI device
2018-01-12 22:45 ` Bart Van Assche
@ 2018-01-13 15:54 ` Ming Lei
2018-01-25 17:07 ` Bart Van Assche
0 siblings, 1 reply; 10+ messages in thread
From: Ming Lei @ 2018-01-13 15:54 UTC (permalink / raw)
To: Bart Van Assche
Cc: linux-block@vger.kernel.org, jthumshirn@suse.de,
linux-rdma@vger.kernel.org, jgg@mellanox.com, hch@lst.de,
martin.petersen@oracle.com, axboe@kernel.dk,
linux-scsi@vger.kernel.org, hare@suse.com, dledford@redhat.com
On Fri, Jan 12, 2018 at 10:45:57PM +0000, Bart Van Assche wrote:
> On Thu, 2018-01-11 at 10:23 +0800, Ming Lei wrote:
> > > not sufficient to prevent .queuecommand() calls from scsi_send_eh_cmnd().
> >
> > Given it is error handling, do we need to prevent the .queuecommand() call
> > in scsi_send_eh_cmnd()? Could you share us what the actual issue
> > observed is from user view?
>
> Please have a look at the kernel bug report in the description of patch 4/4 of
> this series.
Thanks for your mentioning, then I can find the following comment in
srp_queuecommand():
/*
* The SCSI EH thread is the only context from which srp_queuecommand()
* can get invoked for blocked devices (SDEV_BLOCK /
* SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
* locking the rport mutex if invoked from inside the SCSI EH.
*/
That means EH request is allowed to send to blocked device.
I also replied in patch 4/4, looks there is one simple one line change
which should address the issue of 'sleep in atomic context', please discuss that
in patch 4/4 thread.
--
Ming
^ permalink raw reply [flat|nested] 10+ messages in thread
* Re: [PATCH v2 3/4] scsi: Avoid that .queuecommand() gets called for a quiesced SCSI device
2018-01-13 15:54 ` Ming Lei
@ 2018-01-25 17:07 ` Bart Van Assche
0 siblings, 0 replies; 10+ messages in thread
From: Bart Van Assche @ 2018-01-25 17:07 UTC (permalink / raw)
To: ming.lei@redhat.com
Cc: linux-block@vger.kernel.org, jthumshirn@suse.de,
linux-rdma@vger.kernel.org, jgg@mellanox.com, hch@lst.de,
martin.petersen@oracle.com, axboe@kernel.dk,
linux-scsi@vger.kernel.org, hare@suse.com, dledford@redhat.com
T24gU2F0LCAyMDE4LTAxLTEzIGF0IDIzOjU0ICswODAwLCBNaW5nIExlaSB3cm90ZToNCj4gVGhh
bmtzIGZvciB5b3VyIG1lbnRpb25pbmcsIHRoZW4gSSBjYW4gZmluZCB0aGUgZm9sbG93aW5nIGNv
bW1lbnQgaW4NCj4gc3JwX3F1ZXVlY29tbWFuZCgpOg0KPiANCj4gCQkvKg0KPiAJCSAqIFRoZSBT
Q1NJIEVIIHRocmVhZCBpcyB0aGUgb25seSBjb250ZXh0IGZyb20gd2hpY2ggc3JwX3F1ZXVlY29t
bWFuZCgpDQo+IAkJICogY2FuIGdldCBpbnZva2VkIGZvciBibG9ja2VkIGRldmljZXMgKFNERVZf
QkxPQ0sgLw0KPiAJCSAqIFNERVZfQ1JFQVRFRF9CTE9DSykuIEF2b2lkIHJhY2luZyB3aXRoIHNy
cF9yZWNvbm5lY3RfcnBvcnQoKSBieQ0KPiAJCSAqIGxvY2tpbmcgdGhlIHJwb3J0IG11dGV4IGlm
IGludm9rZWQgZnJvbSBpbnNpZGUgdGhlIFNDU0kgRUguDQo+IAkJICovDQo+IA0KPiBUaGF0IG1l
YW5zIEVIIHJlcXVlc3QgaXMgYWxsb3dlZCB0byBzZW5kIHRvIGJsb2NrZWQgZGV2aWNlLg0KDQpO
byB3YXkuIFRoYXQncyBhIGJ1ZyBhbmQgYSBidWcgdGhhdCBuZWVkcyB0byBiZSBmaXhlZC4NCg0K
QmFydC4=
^ permalink raw reply [flat|nested] 10+ messages in thread
* [PATCH v2 4/4] IB/srp: Fix a sleep-in-invalid-context bug
2018-01-10 18:18 [PATCH v2 0/4] Make SCSI transport recovery more robust Bart Van Assche
` (2 preceding siblings ...)
2018-01-10 18:18 ` [PATCH v2 3/4] scsi: Avoid that .queuecommand() gets called for a quiesced SCSI device Bart Van Assche
@ 2018-01-10 18:18 ` Bart Van Assche
2018-01-13 15:42 ` Ming Lei
3 siblings, 1 reply; 10+ messages in thread
From: Bart Van Assche @ 2018-01-10 18:18 UTC (permalink / raw)
To: Jens Axboe
Cc: linux-block, Martin K . Petersen, Christoph Hellwig,
Hannes Reinecke, Jason Gunthorpe, Doug Ledford, linux-scsi,
linux-rdma, Bart Van Assche
The previous two patches guarantee that srp_queuecommand() does not get
invoked while reconnecting occurs. Hence remove the code from
srp_queuecommand() that prevents command queueing while reconnecting.
This patch avoids that the following can appear in the kernel log:
BUG: sleeping function called from invalid context at kernel/locking/mutex.c:747
in_atomic(): 1, irqs_disabled(): 0, pid: 5600, name: scsi_eh_9
1 lock held by scsi_eh_9/5600:
#0: (rcu_read_lock){....}, at: [<00000000cbb798c7>] __blk_mq_run_hw_queue+0xf1/0x1e0
Preemption disabled at:
[<00000000139badf2>] __blk_mq_delay_run_hw_queue+0x78/0xf0
CPU: 9 PID: 5600 Comm: scsi_eh_9 Tainted: G W 4.15.0-rc4-dbg+ #1
Hardware name: Dell Inc. PowerEdge R720/0VWT90, BIOS 2.5.4 01/22/2016
Call Trace:
dump_stack+0x67/0x99
___might_sleep+0x16a/0x250 [ib_srp]
__mutex_lock+0x46/0x9d0
srp_queuecommand+0x356/0x420 [ib_srp]
scsi_dispatch_cmd+0xf6/0x3f0
scsi_queue_rq+0x4a8/0x5f0
blk_mq_dispatch_rq_list+0x73/0x440
blk_mq_sched_dispatch_requests+0x109/0x1a0
__blk_mq_run_hw_queue+0x131/0x1e0
__blk_mq_delay_run_hw_queue+0x9a/0xf0
blk_mq_run_hw_queue+0xc0/0x1e0
blk_mq_start_hw_queues+0x2c/0x40
scsi_run_queue+0x18e/0x2d0
scsi_run_host_queues+0x22/0x40
scsi_error_handler+0x18d/0x5f0
kthread+0x11c/0x140
ret_from_fork+0x24/0x30
Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
Reviewed-by: Hannes Reinecke <hare@suse.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Doug Ledford <dledford@redhat.com>
---
drivers/infiniband/ulp/srp/ib_srp.c | 21 ++-------------------
1 file changed, 2 insertions(+), 19 deletions(-)
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 972d4b3c5223..670f187ecb91 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2149,7 +2149,6 @@ static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
{
struct srp_target_port *target = host_to_target(shost);
- struct srp_rport *rport = target->rport;
struct srp_rdma_ch *ch;
struct srp_request *req;
struct srp_iu *iu;
@@ -2159,16 +2158,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
u32 tag;
u16 idx;
int len, ret;
- const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
-
- /*
- * The SCSI EH thread is the only context from which srp_queuecommand()
- * can get invoked for blocked devices (SDEV_BLOCK /
- * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
- * locking the rport mutex if invoked from inside the SCSI EH.
- */
- if (in_scsi_eh)
- mutex_lock(&rport->mutex);
scmnd->result = srp_chkready(target->rport);
if (unlikely(scmnd->result))
@@ -2230,13 +2219,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
goto err_unmap;
}
- ret = 0;
-
-unlock_rport:
- if (in_scsi_eh)
- mutex_unlock(&rport->mutex);
-
- return ret;
+ return 0;
err_unmap:
srp_unmap_data(scmnd, ch, req);
@@ -2258,7 +2241,7 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
ret = SCSI_MLQUEUE_HOST_BUSY;
}
- goto unlock_rport;
+ return ret;
}
/*
--
2.15.1
^ permalink raw reply related [flat|nested] 10+ messages in thread* Re: [PATCH v2 4/4] IB/srp: Fix a sleep-in-invalid-context bug
2018-01-10 18:18 ` [PATCH v2 4/4] IB/srp: Fix a sleep-in-invalid-context bug Bart Van Assche
@ 2018-01-13 15:42 ` Ming Lei
0 siblings, 0 replies; 10+ messages in thread
From: Ming Lei @ 2018-01-13 15:42 UTC (permalink / raw)
To: Bart Van Assche
Cc: Jens Axboe, linux-block, Martin K . Petersen, Christoph Hellwig,
Hannes Reinecke, Jason Gunthorpe, Doug Ledford, linux-scsi,
linux-rdma
On Wed, Jan 10, 2018 at 10:18:17AM -0800, Bart Van Assche wrote:
> The previous two patches guarantee that srp_queuecommand() does not get
> invoked while reconnecting occurs. Hence remove the code from
> srp_queuecommand() that prevents command queueing while reconnecting.
> This patch avoids that the following can appear in the kernel log:
>
> BUG: sleeping function called from invalid context at kernel/locking/mutex.c:747
> in_atomic(): 1, irqs_disabled(): 0, pid: 5600, name: scsi_eh_9
> 1 lock held by scsi_eh_9/5600:
> #0: (rcu_read_lock){....}, at: [<00000000cbb798c7>] __blk_mq_run_hw_queue+0xf1/0x1e0
> Preemption disabled at:
> [<00000000139badf2>] __blk_mq_delay_run_hw_queue+0x78/0xf0
> CPU: 9 PID: 5600 Comm: scsi_eh_9 Tainted: G W 4.15.0-rc4-dbg+ #1
> Hardware name: Dell Inc. PowerEdge R720/0VWT90, BIOS 2.5.4 01/22/2016
> Call Trace:
> dump_stack+0x67/0x99
> ___might_sleep+0x16a/0x250 [ib_srp]
> __mutex_lock+0x46/0x9d0
> srp_queuecommand+0x356/0x420 [ib_srp]
> scsi_dispatch_cmd+0xf6/0x3f0
> scsi_queue_rq+0x4a8/0x5f0
> blk_mq_dispatch_rq_list+0x73/0x440
> blk_mq_sched_dispatch_requests+0x109/0x1a0
> __blk_mq_run_hw_queue+0x131/0x1e0
> __blk_mq_delay_run_hw_queue+0x9a/0xf0
> blk_mq_run_hw_queue+0xc0/0x1e0
> blk_mq_start_hw_queues+0x2c/0x40
> scsi_run_queue+0x18e/0x2d0
> scsi_run_host_queues+0x22/0x40
> scsi_error_handler+0x18d/0x5f0
> kthread+0x11c/0x140
> ret_from_fork+0x24/0x30
>
> Signed-off-by: Bart Van Assche <bart.vanassche@wdc.com>
> Reviewed-by: Hannes Reinecke <hare@suse.com>
> Cc: Jason Gunthorpe <jgg@mellanox.com>
> Cc: Doug Ledford <dledford@redhat.com>
> ---
> drivers/infiniband/ulp/srp/ib_srp.c | 21 ++-------------------
> 1 file changed, 2 insertions(+), 19 deletions(-)
>
> diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
> index 972d4b3c5223..670f187ecb91 100644
> --- a/drivers/infiniband/ulp/srp/ib_srp.c
> +++ b/drivers/infiniband/ulp/srp/ib_srp.c
> @@ -2149,7 +2149,6 @@ static void srp_handle_qp_err(struct ib_cq *cq, struct ib_wc *wc,
> static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
> {
> struct srp_target_port *target = host_to_target(shost);
> - struct srp_rport *rport = target->rport;
> struct srp_rdma_ch *ch;
> struct srp_request *req;
> struct srp_iu *iu;
> @@ -2159,16 +2158,6 @@ static int srp_queuecommand(struct Scsi_Host *shost, struct scsi_cmnd *scmnd)
> u32 tag;
> u16 idx;
> int len, ret;
> - const bool in_scsi_eh = !in_interrupt() && current == shost->ehandler;
> -
> - /*
> - * The SCSI EH thread is the only context from which srp_queuecommand()
> - * can get invoked for blocked devices (SDEV_BLOCK /
> - * SDEV_CREATED_BLOCK). Avoid racing with srp_reconnect_rport() by
> - * locking the rport mutex if invoked from inside the SCSI EH.
> - */
> - if (in_scsi_eh)
> - mutex_lock(&rport->mutex);
This issue is triggered because that the above EH handler context detection is
wrong since scsi_run_host_queues() from scsi_error_handler() is for
handling normal requests, see the comment below:
/*
* finally we need to re-initiate requests that may be pending. we will
* have had everything blocked while error handling is taking place, and
* now that error recovery is done, we will need to ensure that these
* requests are started.
*/
scsi_run_host_queues(shost);
This issue should have been fixed by the following one line change simply:
in_scsi_eh = !!scmd->eh_action;
--
Ming
^ permalink raw reply [flat|nested] 10+ messages in thread