* [PATCH 0/3 v6] Fixes for SCSI device removal
@ 2012-05-04 15:00 Bart Van Assche
  2012-05-04 15:03 ` [PATCH 1/3] sd: Fix device removal NULL pointer dereference Bart Van Assche
                   ` (2 more replies)
  0 siblings, 3 replies; 21+ messages in thread
From: Bart Van Assche @ 2012-05-04 15:00 UTC (permalink / raw)
  To: linux-scsi
  Cc: James Bottomley, Jun'ichi Nomura, Mike Christie,
	Stefan Richter, Tomas Henzl, Mike Snitzer
This is version six of the SCSI device removal patches.
Changes compared to v5:
- Removed the function scsi_free_queue() and inlined that function
  in its callers.
- Added two additional patches.
^ permalink raw reply	[flat|nested] 21+ messages in thread
* [PATCH 1/3] sd: Fix device removal NULL pointer dereference
  2012-05-04 15:00 [PATCH 0/3 v6] Fixes for SCSI device removal Bart Van Assche
@ 2012-05-04 15:03 ` Bart Van Assche
  2012-05-04 15:06 ` [PATCH 2/3] Stop accepting SCSI requests before removing a device Bart Van Assche
  2012-05-04 15:07 ` [PATCH 3/3] Make scsi_free_queue() abort pending requests Bart Van Assche
  2 siblings, 0 replies; 21+ messages in thread
From: Bart Van Assche @ 2012-05-04 15:03 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Mike Christie,
	Stefan Richter, Tomas Henzl, Mike Snitzer
Since scsi_prep_fn() may be invoked concurrently with
__scsi_remove_device(), keep the queuedata pointer in
__scsi_remove_device(). This patch fixes a kernel oops that
can be triggered by USB device removal. See also
http://www.spinics.net/lists/linux-scsi/msg56254.html.
This patch has been tested as follows:
- Verified that repeated removal (100.000 times) of a
  SCSI disk (sd) device went fine:
    $ target='id_ext=0002c9030005f34e,ioc_guid=0002c9030005f34e,dgid=fe800000000000000002c9030005f34f,pkey=ffff,service_id=0002c9030005f34e'; \
      for ((i=0;i<100000;i++)); do \
        echo "$target" >/sys/class/infiniband_srp/srp-mlx4_0-1/add_target; \
      done
- Verified that a fio data integrity test on a dm device ran
  fine. The dm device was configured to communicate via two
  different paths (iSCSI + SRP). The SRP initiator device node
  was removed and recreated repeatedly.
Reported-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: James Bottomley <JBottomley@parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Stefan Richter <stefanr@s5r6.in-berlin.de>
Cc: <stable@vger.kernel.org>
---
 drivers/scsi/hosts.c      |    8 +++++++-
 drivers/scsi/scsi_lib.c   |   34 +++++++---------------------------
 drivers/scsi/scsi_priv.h  |    1 -
 drivers/scsi/scsi_sysfs.c |    5 +----
 4 files changed, 15 insertions(+), 33 deletions(-)
diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 351dc0b..ff1a0be 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -296,9 +296,15 @@ static void scsi_host_dev_release(struct device *dev)
 		destroy_workqueue(shost->work_q);
 	q = shost->uspace_req_q;
 	if (q) {
+		/*
+		 * Note: freeing queuedata before invoking blk_cleanup_queue()
+		 * is safe here because no request function is associated with
+		 * uspace_req_q. See also the __scsi_alloc_queue() call in
+		 * drivers/scsi/scsi_tgt_lib.c.
+		 */
 		kfree(q->queuedata);
 		q->queuedata = NULL;
-		scsi_free_queue(q);
+		blk_cleanup_queue(q);
 	}
 
 	scsi_destroy_command_freelist(shost);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 5dfd749..3d11485 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -406,10 +406,7 @@ static void scsi_run_queue(struct request_queue *q)
 	LIST_HEAD(starved_list);
 	unsigned long flags;
 
-	/* if the device is dead, sdev will be NULL, so no queue to run */
-	if (!sdev)
-		return;
-
+	BUG_ON(!sdev);
 	shost = sdev->host;
 	if (scsi_target(sdev)->single_lun)
 		scsi_single_lun_run(sdev);
@@ -1370,9 +1367,9 @@ static inline int scsi_host_queue_ready(struct request_queue *q,
  * may be changed after request stacking drivers call the function,
  * regardless of taking lock or not.
  *
- * When scsi can't dispatch I/Os anymore and needs to kill I/Os
- * (e.g. !sdev), scsi needs to return 'not busy'.
- * Otherwise, request stacking drivers may hold requests forever.
+ * When scsi can't dispatch I/Os anymore and needs to kill I/Os scsi
+ * needs to return 'not busy'. Otherwise, request stacking drivers
+ * may hold requests forever.
  */
 static int scsi_lld_busy(struct request_queue *q)
 {
@@ -1380,9 +1377,10 @@ static int scsi_lld_busy(struct request_queue *q)
 	struct Scsi_Host *shost;
 	struct scsi_target *starget;
 
-	if (!sdev)
+	if (blk_queue_dead(q))
 		return 0;
 
+	BUG_ON(!sdev);
 	shost = sdev->host;
 	starget = scsi_target(sdev);
 
@@ -1487,11 +1485,7 @@ static void scsi_request_fn(struct request_queue *q)
 	struct scsi_cmnd *cmd;
 	struct request *req;
 
-	if (!sdev) {
-		while ((req = blk_peek_request(q)) != NULL)
-			scsi_kill_request(req, q);
-		return;
-	}
+	BUG_ON(!sdev);
 
 	if(!get_device(&sdev->sdev_gendev))
 		/* We must be tearing the block queue down already */
@@ -1694,20 +1688,6 @@ struct request_queue *scsi_alloc_queue(struct scsi_device *sdev)
 	return q;
 }
 
-void scsi_free_queue(struct request_queue *q)
-{
-	unsigned long flags;
-
-	WARN_ON(q->queuedata);
-
-	/* cause scsi_request_fn() to kill all non-finished requests */
-	spin_lock_irqsave(q->queue_lock, flags);
-	q->request_fn(q);
-	spin_unlock_irqrestore(q->queue_lock, flags);
-
-	blk_cleanup_queue(q);
-}
-
 /*
  * Function:    scsi_block_requests()
  *
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index be4fa6d..fd9f57f 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -84,7 +84,6 @@ extern void scsi_next_command(struct scsi_cmnd *cmd);
 extern void scsi_io_completion(struct scsi_cmnd *, unsigned int);
 extern void scsi_run_host_queues(struct Scsi_Host *shost);
 extern struct request_queue *scsi_alloc_queue(struct scsi_device *sdev);
-extern void scsi_free_queue(struct request_queue *q);
 extern int scsi_init_queue(void);
 extern void scsi_exit_queue(void);
 struct request_queue;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 04c2a27..42c35ff 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -971,11 +971,8 @@ void __scsi_remove_device(struct scsi_device *sdev)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(dev);
 
-	/* cause the request function to reject all I/O requests */
-	sdev->request_queue->queuedata = NULL;
-
 	/* Freeing the queue signals to block that we're done */
-	scsi_free_queue(sdev->request_queue);
+	blk_cleanup_queue(sdev->request_queue);
 	put_device(dev);
 }
 
-- 
1.7.7
^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-04 15:00 [PATCH 0/3 v6] Fixes for SCSI device removal Bart Van Assche
  2012-05-04 15:03 ` [PATCH 1/3] sd: Fix device removal NULL pointer dereference Bart Van Assche
@ 2012-05-04 15:06 ` Bart Van Assche
  2012-05-04 20:16   ` Mike Christie
  2012-05-04 15:07 ` [PATCH 3/3] Make scsi_free_queue() abort pending requests Bart Van Assche
  2 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2012-05-04 15:06 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Mike Christie,
	Stefan Richter, Tomas Henzl, Mike Snitzer
Source code inspection of __scsi_remove_device() revealed a
race condition in this function: no new SCSI requests must
be accepted for a SCSI device once device removal starts.
Reported-by: Mike Christie <michaelc@cs.wisc.edu>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: James Bottomley <JBottomley@parallels.com>
Cc: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
---
 drivers/scsi/scsi_sysfs.c |   12 +++++++++---
 1 files changed, 9 insertions(+), 3 deletions(-)
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 42c35ff..f8fc240 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -955,12 +955,20 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 void __scsi_remove_device(struct scsi_device *sdev)
 {
 	struct device *dev = &sdev->sdev_gendev;
+	struct request_queue *q = sdev->request_queue;
+
+	/*
+	 * Stop accepting new requests before tearing down the
+	 * device. Note: the actual queue deallocation happens in
+	 * scsi_device_dev_release_usercontext().
+	 */
+	blk_cleanup_queue(q);
 
 	if (sdev->is_visible) {
 		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
 			return;
 
-		bsg_unregister_queue(sdev->request_queue);
+		bsg_unregister_queue(q);
 		device_unregister(&sdev->sdev_dev);
 		transport_remove_device(dev);
 		device_del(dev);
@@ -971,8 +979,6 @@ void __scsi_remove_device(struct scsi_device *sdev)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(dev);
 
-	/* Freeing the queue signals to block that we're done */
-	blk_cleanup_queue(sdev->request_queue);
 	put_device(dev);
 }
 
^ permalink raw reply related	[flat|nested] 21+ messages in thread
* [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-04 15:00 [PATCH 0/3 v6] Fixes for SCSI device removal Bart Van Assche
  2012-05-04 15:03 ` [PATCH 1/3] sd: Fix device removal NULL pointer dereference Bart Van Assche
  2012-05-04 15:06 ` [PATCH 2/3] Stop accepting SCSI requests before removing a device Bart Van Assche
@ 2012-05-04 15:07 ` Bart Van Assche
  2012-05-04 20:25   ` Mike Christie
  2 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2012-05-04 15:07 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Mike Christie,
	Stefan Richter, Tomas Henzl, Mike Snitzer
It is possible that a SCSI LLD invokes scsi_remove_host() after a
request has been queued via scsi_host_template.queuecommand() but
before scsi_done() has been invoked for that request. This may
cause the SCSI error handler to start processing a timeout long
after scsi_remove_host() finished. This may even cause LLD error
handler functions to be invoked after the LLD kernel module has
been removed. Change this behavior such that the SCSI error
handler no longer invokes timeout processing after
scsi_remove_host() finished.
Note: blk_abort_queue() indirectly invokes the SCSI LLD
eh_abort_handler. This patch may cause that abort handler to be
invoked before the SCSI timeout elapsed. Hence follow-up
patches may be necessary to fix race conditions in LLDs that
expect that the abort handler is only invoked after the SCSI
timeout elapsed. Maybe this is why a similar patch has been
reverted about a year ago (see also commit
09c9d4c9b6a2b5909ae3c6265e4cd3820b636863).
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Cc: James Bottomley <jbottomley@parallels.com>
Cc: Mike Christie <michaelc@cs.wisc.edu>
Cc: Tomas Henzl <thenzl@redhat.com>
Cc: Mike Snitzer <snitzer@redhat.com>
Cc: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
---
 drivers/scsi/scsi_sysfs.c |    1 +
 1 files changed, 1 insertions(+), 0 deletions(-)
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index f8fc240..a61051d 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -963,6 +963,7 @@ void __scsi_remove_device(struct scsi_device *sdev)
 	 * scsi_device_dev_release_usercontext().
 	 */
 	blk_cleanup_queue(q);
+	blk_abort_queue(q);
 
 	if (sdev->is_visible) {
 		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
-- 
1.7.7
^ permalink raw reply related	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-04 15:06 ` [PATCH 2/3] Stop accepting SCSI requests before removing a device Bart Van Assche
@ 2012-05-04 20:16   ` Mike Christie
  2012-05-04 20:30     ` Mike Christie
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Christie @ 2012-05-04 20:16 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/04/2012 10:06 AM, Bart Van Assche wrote:
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 42c35ff..f8fc240 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -955,12 +955,20 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
>  void __scsi_remove_device(struct scsi_device *sdev)
>  {
>  	struct device *dev = &sdev->sdev_gendev;
> +	struct request_queue *q = sdev->request_queue;
> +
> +	/*
> +	 * Stop accepting new requests before tearing down the
> +	 * device. Note: the actual queue deallocation happens in
> +	 * scsi_device_dev_release_usercontext().
> +	 */
> +	blk_cleanup_queue(q);
>  
>  	if (sdev->is_visible) {
>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
>  			return;
>  
> -		bsg_unregister_queue(sdev->request_queue);
> +		bsg_unregister_queue(q);
>  		device_unregister(&sdev->sdev_dev);
>  		transport_remove_device(dev);
>  		device_del(dev);
> @@ -971,8 +979,6 @@ void __scsi_remove_device(struct scsi_device *sdev)
My fault. The blk_cleanup_queue call should be right before the
scsi_device_set_state(sdev, SDEV_DEL); call in this function.
It needs to be after the device_del call, because that triggers the SCSI
ULD removal which can send IO.
>  		sdev->host->hostt->slave_destroy(sdev);
>  	transport_destroy_device(dev);
>  
> -	/* Freeing the queue signals to block that we're done */
> -	blk_cleanup_queue(sdev->request_queue);
>  	put_device(dev);
>  }
>  
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-04 15:07 ` [PATCH 3/3] Make scsi_free_queue() abort pending requests Bart Van Assche
@ 2012-05-04 20:25   ` Mike Christie
  2012-05-04 20:32     ` Mike Christie
  2012-05-05 13:41     ` Bart Van Assche
  0 siblings, 2 replies; 21+ messages in thread
From: Mike Christie @ 2012-05-04 20:25 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/04/2012 10:07 AM, Bart Van Assche wrote:
> It is possible that a SCSI LLD invokes scsi_remove_host() after a
> request has been queued via scsi_host_template.queuecommand() but
> before scsi_done() has been invoked for that request. This may
If that happens won't we wait in blk_cleanup_queue->blk_drain_queue for
that IO to be completed (completed normally or timed out and processed
through that path)?
Is the point that once we call scsi_remove_host that the LLD is not
going to process any more IO so IO will timeout, so just call
blk_abort_queue to speed up that cleanup? If so I can see where you are
coming from.
I do not know if that is correct behavior though. Like it has been
discussed before the scsi ULD shutdown code runs from this path, so IO
is sent. All FC drivers are failing that IO as well as some other
drivers like you said, so I do not know if that is a bug in the FC
drivers or the SCSI ULD shutdown/remove code should not be sending IO???
I thought we were supposed to still execute that IO.
> ---
>  drivers/scsi/scsi_sysfs.c |    1 +
>  1 files changed, 1 insertions(+), 0 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index f8fc240..a61051d 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -963,6 +963,7 @@ void __scsi_remove_device(struct scsi_device *sdev)
>  	 * scsi_device_dev_release_usercontext().
>  	 */
>  	blk_cleanup_queue(q);
> +	blk_abort_queue(q);
>  
>  	if (sdev->is_visible) {
>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-04 20:16   ` Mike Christie
@ 2012-05-04 20:30     ` Mike Christie
  2012-05-05 13:04       ` Bart Van Assche
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Christie @ 2012-05-04 20:30 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
[-- Attachment #1: Type: text/plain, Size: 1781 bytes --]
On 05/04/2012 03:16 PM, Mike Christie wrote:
> On 05/04/2012 10:06 AM, Bart Van Assche wrote:
>> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
>> index 42c35ff..f8fc240 100644
>> --- a/drivers/scsi/scsi_sysfs.c
>> +++ b/drivers/scsi/scsi_sysfs.c
>> @@ -955,12 +955,20 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
>>  void __scsi_remove_device(struct scsi_device *sdev)
>>  {
>>  	struct device *dev = &sdev->sdev_gendev;
>> +	struct request_queue *q = sdev->request_queue;
>> +
>> +	/*
>> +	 * Stop accepting new requests before tearing down the
>> +	 * device. Note: the actual queue deallocation happens in
>> +	 * scsi_device_dev_release_usercontext().
>> +	 */
>> +	blk_cleanup_queue(q);
>>  
>>  	if (sdev->is_visible) {
>>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
>>  			return;
>>  
>> -		bsg_unregister_queue(sdev->request_queue);
>> +		bsg_unregister_queue(q);
>>  		device_unregister(&sdev->sdev_dev);
>>  		transport_remove_device(dev);
>>  		device_del(dev);
>> @@ -971,8 +979,6 @@ void __scsi_remove_device(struct scsi_device *sdev)
> 
> My fault. The blk_cleanup_queue call should be right before the
> scsi_device_set_state(sdev, SDEV_DEL); call in this function.
> 
> It needs to be after the device_del call, because that triggers the SCSI
> ULD removal which can send IO.
> 
I made the attached patch. It allows the scsi ULD removal IO to execute
ok. I tested with iscsi devices that have write caches.
With your patch I get:
Apr 27 19:09:51 noisyr6 kernel: sd 11:0:0:1: [sdc] Synchronizing SCSI cache
Apr 27 19:09:51 noisyr6 kernel: sd 11:0:0:1: [sdc] Result:
hostbyte=DID_TRANSPORT_FAILFAST driverbyte=DRIVER_OK
With my patch I just get:
May  4 16:29:27 noisyr6 kernel: sd 8:0:0:1: [sdc] Synchronizing SCSI cache
[-- Attachment #2: scsi-clean-queue-after-del.patch --]
[-- Type: text/x-patch, Size: 1165 bytes --]
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 42c35ff..11dc3ff 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -955,24 +955,30 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 void __scsi_remove_device(struct scsi_device *sdev)
 {
 	struct device *dev = &sdev->sdev_gendev;
+	struct request_queue *q = sdev->request_queue;
 
 	if (sdev->is_visible) {
 		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
 			return;
 
-		bsg_unregister_queue(sdev->request_queue);
+		bsg_unregister_queue(q);
 		device_unregister(&sdev->sdev_dev);
 		transport_remove_device(dev);
 		device_del(dev);
 	} else
 		put_device(&sdev->sdev_dev);
+	/*
+	 * Stop accepting new requests before tearing down the
+	 * queue. Note: the actual queue deallocation happens in
+	 * scsi_device_dev_release_usercontext().
+	 */
+	blk_cleanup_queue(q);
+
 	scsi_device_set_state(sdev, SDEV_DEL);
 	if (sdev->host->hostt->slave_destroy)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(dev);
 
-	/* Freeing the queue signals to block that we're done */
-	blk_cleanup_queue(sdev->request_queue);
 	put_device(dev);
 }
 
^ permalink raw reply related	[flat|nested] 21+ messages in thread
* Re: [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-04 20:25   ` Mike Christie
@ 2012-05-04 20:32     ` Mike Christie
  2012-05-05  6:07       ` Bart Van Assche
  2012-05-05 13:41     ` Bart Van Assche
  1 sibling, 1 reply; 21+ messages in thread
From: Mike Christie @ 2012-05-04 20:32 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/04/2012 03:25 PM, Mike Christie wrote:
> On 05/04/2012 10:07 AM, Bart Van Assche wrote:
>> It is possible that a SCSI LLD invokes scsi_remove_host() after a
>> request has been queued via scsi_host_template.queuecommand() but
>> before scsi_done() has been invoked for that request. This may
> 
> If that happens won't we wait in blk_cleanup_queue->blk_drain_queue for
> that IO to be completed (completed normally or timed out and processed
> through that path)?
> 
> Is the point that once we call scsi_remove_host that the LLD is not
> going to process any more IO so IO will timeout, so just call
> blk_abort_queue to speed up that cleanup? If so I can see where you are
> coming from.
> 
Oh not wait. I do not get the patch. After blk_cleanup_queue runs then
no IO should be running and no new IO can be queued can it?
>>  	 */
>>  	blk_cleanup_queue(q);
>> +	blk_abort_queue(q);
>>  
>>  	if (sdev->is_visible) {
>>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-04 20:32     ` Mike Christie
@ 2012-05-05  6:07       ` Bart Van Assche
  2012-05-07  0:44         ` Mike Christie
  0 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2012-05-05  6:07 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/04/12 20:32, Mike Christie wrote:
> Oh not wait. I do not get the patch. After blk_cleanup_queue runs then
> no IO should be running and no new IO can be queued can it?
> 
>>>  	 */
>>>  	blk_cleanup_queue(q);
>>> +	blk_abort_queue(q);
>>>  
>>>  	if (sdev->is_visible) {
>>>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
After blk_cleanup_queue() finished no new requests will be queued to a
SCSI LLD. However, that function doesn't wait for already queued
requests to finish. I have verified with ib_srp LLD that the
blk_abort_queue() call triggers the "SRP abort called" kernel log
message generated by ib_srp when srp_abort() is called.
Bart.
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-04 20:30     ` Mike Christie
@ 2012-05-05 13:04       ` Bart Van Assche
  2012-05-29 15:00         ` Bart Van Assche
  0 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2012-05-05 13:04 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/04/12 20:30, Mike Christie wrote:
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 42c35ff..11dc3ff 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -955,24 +955,30 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
>  void __scsi_remove_device(struct scsi_device *sdev)
>  {
>  	struct device *dev = &sdev->sdev_gendev;
> +	struct request_queue *q = sdev->request_queue;
>  
>  	if (sdev->is_visible) {
>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
>  			return;
>  
> -		bsg_unregister_queue(sdev->request_queue);
> +		bsg_unregister_queue(q);
>  		device_unregister(&sdev->sdev_dev);
>  		transport_remove_device(dev);
>  		device_del(dev);
>  	} else
>  		put_device(&sdev->sdev_dev);
> +	/*
> +	 * Stop accepting new requests before tearing down the
> +	 * queue. Note: the actual queue deallocation happens in
> +	 * scsi_device_dev_release_usercontext().
> +	 */
> +	blk_cleanup_queue(q);
> +
>  	scsi_device_set_state(sdev, SDEV_DEL);
>  	if (sdev->host->hostt->slave_destroy)
>  		sdev->host->hostt->slave_destroy(sdev);
>  	transport_destroy_device(dev);
>  
> -	/* Freeing the queue signals to block that we're done */
> -	blk_cleanup_queue(sdev->request_queue);
>  	put_device(dev);
>  }
Now that we're looking at potential device removal races: since the host
lock push down scsi_dispatch_cmd() is invoked while a reference on the
device is hold but without holding the host lock or the device queue lock.
Shouldn't we make sure that invoking the SCSI device tear down code only
occurs once it is sure that hostt->queuecommand won't be invoked anymore ?
Bart.
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-04 20:25   ` Mike Christie
  2012-05-04 20:32     ` Mike Christie
@ 2012-05-05 13:41     ` Bart Van Assche
  1 sibling, 0 replies; 21+ messages in thread
From: Bart Van Assche @ 2012-05-05 13:41 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/04/12 20:25, Mike Christie wrote:
> On 05/04/2012 10:07 AM, Bart Van Assche wrote:
>> It is possible that a SCSI LLD invokes scsi_remove_host() after a
>> request has been queued via scsi_host_template.queuecommand() but
>> before scsi_done() has been invoked for that request. This may
> 
> If that happens won't we wait in blk_cleanup_queue->blk_drain_queue for
> that IO to be completed (completed normally or timed out and processed
> through that path)?
As far as I understand the block layer requests that have been queued
but for which scsi_request_fn() has not yet been invoked are on the
request_queue.queue_head list. Requests that have been passed to the
SCSI LLD but for which scsi_done() has not yet been invoked are on the
request_queue.timeout_list list. blk_drain_queue() works on the former
list while blk_abort_queue() processes the latter.
Bart.
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-05  6:07       ` Bart Van Assche
@ 2012-05-07  0:44         ` Mike Christie
  2012-05-07  1:15           ` Mike Christie
  2012-05-14 18:43           ` Bart Van Assche
  0 siblings, 2 replies; 21+ messages in thread
From: Mike Christie @ 2012-05-07  0:44 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/05/2012 01:07 AM, Bart Van Assche wrote:
> On 05/04/12 20:32, Mike Christie wrote:
> 
>> Oh not wait. I do not get the patch. After blk_cleanup_queue runs then
>> no IO should be running and no new IO can be queued can it?
>>
>>>>  	 */
>>>>  	blk_cleanup_queue(q);
>>>> +	blk_abort_queue(q);
>>>>  
>>>>  	if (sdev->is_visible) {
>>>>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
> 
> 
> After blk_cleanup_queue() finished no new requests will be queued to a
> SCSI LLD. However, that function doesn't wait for already queued
> requests to finish. I have verified with ib_srp LLD that the\
It does for me. It is supposed to isn't it? For BLOCK FS requests
blk_drain_queue will wait for the q->in_flight counters to go to zero.
They get incremented at scsi_request_fn-> blk_start_request ->
blk_dequeue_request time and decremented in scsi_end_request ->
blk_end_request ->  blk_end_bidi_request -> blk_finish_request ->
blk_put_request -> elv_completed_request. For BLOCK PC and other
requests there is the rq.count tracking.
I added some printk code in the blk_drain_queue loop, and if I force the
target to wait to complete a IO, then remove the device, blk_drain_queue
is looping until the IO is eventually completed.
> blk_abort_queue() call triggers the "SRP abort called" kernel log
> message generated by ib_srp when srp_abort() is called.
What type of IO is it?
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-07  0:44         ` Mike Christie
@ 2012-05-07  1:15           ` Mike Christie
  2012-05-14 18:43           ` Bart Van Assche
  1 sibling, 0 replies; 21+ messages in thread
From: Mike Christie @ 2012-05-07  1:15 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/06/2012 07:44 PM, Mike Christie wrote:
> blk_put_request -> elv_completed_request. For BLOCK PC and other
> requests there is the rq.count tracking.
The rq.count stuff should also make sure we have a queue before the
prep/request fn code is run and for requeues from what I can tell.
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-07  0:44         ` Mike Christie
  2012-05-07  1:15           ` Mike Christie
@ 2012-05-14 18:43           ` Bart Van Assche
  2012-05-29 14:56             ` Bart Van Assche
  1 sibling, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2012-05-14 18:43 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer, Jens Axboe
On 05/07/12 00:44, Mike Christie wrote:
> On 05/05/2012 01:07 AM, Bart Van Assche wrote:
>> On 05/04/12 20:32, Mike Christie wrote:
>>> Oh not wait. I do not get the patch. After blk_cleanup_queue runs then
>>> no IO should be running and no new IO can be queued can it?
>>>
>>>>>  	 */
>>>>>  	blk_cleanup_queue(q);
>>>>> +	blk_abort_queue(q);
>>>>>  
>>>>>  	if (sdev->is_visible) {
>>>>>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
>>
>> After blk_cleanup_queue() finished no new requests will be queued to a
>> SCSI LLD. However, that function doesn't wait for already queued
>> requests to finish. I have verified with ib_srp LLD that the\
> 
> It does for me. It is supposed to isn't it? For BLOCK FS requests
> blk_drain_queue will wait for the q->in_flight counters to go to zero.
> They get incremented at scsi_request_fn-> blk_start_request ->
> blk_dequeue_request time and decremented in scsi_end_request ->
> blk_end_request ->  blk_end_bidi_request -> blk_finish_request ->
> blk_put_request -> elv_completed_request. For BLOCK PC and other
> requests there is the rq.count tracking.
I've had a closer look at this and noticed that the q->in_flight[]
decrement in elv_completed_request() is non-atomic and also that that
function is called from one context with the queue lock held
(__blk_put_request()) but from another context without the queue lock
held (blk_execute_rq_nowait() -> flush_end_io() ->
elv_completed_request()). I'd appreciate it if someone who is more
familiar with the block layer than myself could comment on this.
Bart.
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 3/3] Make scsi_free_queue() abort pending requests
  2012-05-14 18:43           ` Bart Van Assche
@ 2012-05-29 14:56             ` Bart Van Assche
  0 siblings, 0 replies; 21+ messages in thread
From: Bart Van Assche @ 2012-05-29 14:56 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer, Jens Axboe
On 05/14/12 18:43, Bart Van Assche wrote:
> On 05/07/12 00:44, Mike Christie wrote:
>> On 05/05/2012 01:07 AM, Bart Van Assche wrote:
>>> On 05/04/12 20:32, Mike Christie wrote:
>>>> Oh not wait. I do not get the patch. After blk_cleanup_queue runs then
>>>> no IO should be running and no new IO can be queued can it?
>>>>
>>>>>>  	 */
>>>>>>  	blk_cleanup_queue(q);
>>>>>> +	blk_abort_queue(q);
>>>>>>  
>>>>>>  	if (sdev->is_visible) {
>>>>>>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
>>>
>>> After blk_cleanup_queue() finished no new requests will be queued to a
>>> SCSI LLD. However, that function doesn't wait for already queued
>>> requests to finish. I have verified with ib_srp LLD that the\
>>
>> It does for me. It is supposed to isn't it? For BLOCK FS requests
>> blk_drain_queue will wait for the q->in_flight counters to go to zero.
>> They get incremented at scsi_request_fn-> blk_start_request ->
>> blk_dequeue_request time and decremented in scsi_end_request ->
>> blk_end_request ->  blk_end_bidi_request -> blk_finish_request ->
>> blk_put_request -> elv_completed_request. For BLOCK PC and other
>> requests there is the rq.count tracking.
> 
> I've had a closer look at this and noticed that the q->in_flight[]
> decrement in elv_completed_request() is non-atomic and also that that
> function is called from one context with the queue lock held
> (__blk_put_request()) but from another context without the queue lock
> held (blk_execute_rq_nowait() -> flush_end_io() ->
> elv_completed_request()). I'd appreciate it if someone who is more
> familiar with the block layer than myself could comment on this.
(replying to my own e-mail)
Does the patch below make sense ? It ensures that the queue lock
is held around all end_io invocations.
---
 block/blk-core.c  |    4 ++++
 block/blk-exec.c  |    2 +-
 block/blk-flush.c |    2 ++
 3 files changed, 7 insertions(+), 1 deletions(-)
diff --git a/block/blk-core.c b/block/blk-core.c
index 1f61b74..41ff2af 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -394,6 +394,8 @@ void blk_drain_queue(struct request_queue *q, bool drain_all)
 			}
 		}
 
+		WARN_ON(drain == 0 && !list_empty(&q->timeout_list));
+
 		spin_unlock_irq(q->queue_lock);
 
 		if (!drain)
@@ -2287,6 +2289,8 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
  */
 static void blk_finish_request(struct request *req, int error)
 {
+	lockdep_assert_held(req->q->queue_lock);
+
 	if (blk_rq_tagged(req))
 		blk_queue_end_tag(req->q, req);
 
diff --git a/block/blk-exec.c b/block/blk-exec.c
index fb2cbd5..6724fab 100644
--- a/block/blk-exec.c
+++ b/block/blk-exec.c
@@ -54,10 +54,10 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk,
 	spin_lock_irq(q->queue_lock);
 
 	if (unlikely(blk_queue_dead(q))) {
-		spin_unlock_irq(q->queue_lock);
 		rq->errors = -ENXIO;
 		if (rq->end_io)
 			rq->end_io(rq, rq->errors);
+		spin_unlock_irq(q->queue_lock);
 		return;
 	}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 720ad60..7bb8ed0 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -198,6 +198,8 @@ static void flush_end_io(struct request *flush_rq, int error)
 	bool queued = false;
 	struct request *rq, *n;
 
+	lockdep_assert_held(q->queue_lock);
+
 	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
 
 	/* account completion of the flush request */
^ permalink raw reply related	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-05 13:04       ` Bart Van Assche
@ 2012-05-29 15:00         ` Bart Van Assche
  2012-05-29 17:35           ` Mike Christie
  0 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2012-05-29 15:00 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/05/12 13:04, Bart Van Assche wrote:
> Now that we're looking at potential device removal races: since the host
> lock push down scsi_dispatch_cmd() is invoked while a reference on the
> device is hold but without holding the host lock or the device queue lock.
> Shouldn't we make sure that invoking the SCSI device tear down code only
> occurs once it is sure that hostt->queuecommand won't be invoked anymore ?
(replying to my own e-mail)
The patch below makes sure that blk_drain_queue() and blk_cleanup_queue()
wait until all queuecommand invocations have finished and hence fixes a
race between the SCSI error handler and __scsi_remove_device(). Any feedback
is welcome.
---
 drivers/scsi/scsi_error.c |   14 +++++++++++++-
 1 files changed, 13 insertions(+), 1 deletions(-)
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 386f0c5..947f627 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -781,10 +781,17 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
 	struct scsi_device *sdev = scmd->device;
 	struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
 	struct Scsi_Host *shost = sdev->host;
+	struct request_queue *q = sdev->request_queue;
 	DECLARE_COMPLETION_ONSTACK(done);
 	unsigned long timeleft;
 	struct scsi_eh_save ses;
-	int rtn;
+	int rtn = FAILED;
+
+	spin_lock_irq(q->queue_lock);
+	if (blk_queue_dead(q))
+		goto out_unlock;
+	q->rq.count[BLK_RW_SYNC]++;
+	spin_unlock_irq(q->queue_lock);
 
 	scsi_eh_prep_cmnd(scmd, &ses, cmnd, cmnd_size, sense_bytes);
 	shost->eh_action = &done;
@@ -838,6 +845,11 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
 	if (sdrv && sdrv->eh_action)
 		rtn = sdrv->eh_action(scmd, cmnd, cmnd_size, rtn);
 
+	spin_lock_irq(q->queue_lock);
+	q->rq.count[BLK_RW_SYNC]--;
+out_unlock:
+	spin_unlock_irq(q->queue_lock);
+
 	return rtn;
 }
 
^ permalink raw reply related	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-29 15:00         ` Bart Van Assche
@ 2012-05-29 17:35           ` Mike Christie
  2012-05-30  6:56             ` Bart Van Assche
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Christie @ 2012-05-29 17:35 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/29/2012 10:00 AM, Bart Van Assche wrote:
> On 05/05/12 13:04, Bart Van Assche wrote:
> 
>> Now that we're looking at potential device removal races: since the host
>> lock push down scsi_dispatch_cmd() is invoked while a reference on the
>> device is hold but without holding the host lock or the device queue lock.
>> Shouldn't we make sure that invoking the SCSI device tear down code only
>> occurs once it is sure that hostt->queuecommand won't be invoked anymore ?
> 
> 
> (replying to my own e-mail)
> 
> The patch below makes sure that blk_drain_queue() and blk_cleanup_queue()
> wait until all queuecommand invocations have finished and hence fixes a
> race between the SCSI error handler and __scsi_remove_device(). Any feedback
> is welcome.
> 
> ---
>  drivers/scsi/scsi_error.c |   14 +++++++++++++-
>  1 files changed, 13 insertions(+), 1 deletions(-)
> 
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 386f0c5..947f627 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -781,10 +781,17 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
>  	struct scsi_device *sdev = scmd->device;
>  	struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
>  	struct Scsi_Host *shost = sdev->host;
> +	struct request_queue *q = sdev->request_queue;
>  	DECLARE_COMPLETION_ONSTACK(done);
>  	unsigned long timeleft;
>  	struct scsi_eh_save ses;
> -	int rtn;
> +	int rtn = FAILED;
> +
> +	spin_lock_irq(q->queue_lock);
> +	if (blk_queue_dead(q))
> +		goto out_unlock;
> +	q->rq.count[BLK_RW_SYNC]++;
> +	spin_unlock_irq(q->queue_lock);
Are you hitting a case where a scsi_cmnd does not have a request struct
that was allocated through the block layer functions like
blk_get_request, but is getting sent through this path? What code is
doing this?
Or, are you hitting a bug where somehow the request is freed (so the
rq.count is decremented) but the scsi eh is still working on a scsi_cmnd
that had a request struct allocated for it?
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-29 17:35           ` Mike Christie
@ 2012-05-30  6:56             ` Bart Van Assche
  2012-05-30 17:27               ` Mike Christie
  0 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2012-05-30  6:56 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/29/12 17:35, Mike Christie wrote:
> On 05/29/2012 10:00 AM, Bart Van Assche wrote:
>> The patch below makes sure that blk_drain_queue() and blk_cleanup_queue()
>> wait until all queuecommand invocations have finished and hence fixes a
>> race between the SCSI error handler and __scsi_remove_device(). Any feedback
>> is welcome.
>>
>> ---
>>  drivers/scsi/scsi_error.c |   14 +++++++++++++-
>>  1 files changed, 13 insertions(+), 1 deletions(-)
>>
>> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
>> index 386f0c5..947f627 100644
>> --- a/drivers/scsi/scsi_error.c
>> +++ b/drivers/scsi/scsi_error.c
>> @@ -781,10 +781,17 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
>>  	struct scsi_device *sdev = scmd->device;
>>  	struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
>>  	struct Scsi_Host *shost = sdev->host;
>> +	struct request_queue *q = sdev->request_queue;
>>  	DECLARE_COMPLETION_ONSTACK(done);
>>  	unsigned long timeleft;
>>  	struct scsi_eh_save ses;
>> -	int rtn;
>> +	int rtn = FAILED;
>> +
>> +	spin_lock_irq(q->queue_lock);
>> +	if (blk_queue_dead(q))
>> +		goto out_unlock;
>> +	q->rq.count[BLK_RW_SYNC]++;
>> +	spin_unlock_irq(q->queue_lock);
> 
> Are you hitting a case where a scsi_cmnd does not have a request struct
> that was allocated through the block layer functions like
> blk_get_request, but is getting sent through this path? What code is
> doing this?
> 
> Or, are you hitting a bug where somehow the request is freed (so the
> rq.count is decremented) but the scsi eh is still working on a scsi_cmnd
> that had a request struct allocated for it?
 
I haven't hit any such bugs. This patch is what I came up with after
analyzing what would be necessary to make sure that queuecommand isn't
called anymore after blk_cleanup_queue() finished and also to make sure
that blk_drain_queue() waits until all active queuecommand calls have
finished. The above patch was tested in combination with a patch you
posted about three weeks ago:
http://marc.info/?l=linux-scsi&m=133616359518771&w=2.
Bart.
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-30  6:56             ` Bart Van Assche
@ 2012-05-30 17:27               ` Mike Christie
  2012-05-30 20:00                 ` Bart Van Assche
  0 siblings, 1 reply; 21+ messages in thread
From: Mike Christie @ 2012-05-30 17:27 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/30/2012 01:56 AM, Bart Van Assche wrote:
> On 05/29/12 17:35, Mike Christie wrote:
> 
>> On 05/29/2012 10:00 AM, Bart Van Assche wrote:
>>> The patch below makes sure that blk_drain_queue() and blk_cleanup_queue()
>>> wait until all queuecommand invocations have finished and hence fixes a
>>> race between the SCSI error handler and __scsi_remove_device(). Any feedback
>>> is welcome.
>>>
>>> ---
>>>  drivers/scsi/scsi_error.c |   14 +++++++++++++-
>>>  1 files changed, 13 insertions(+), 1 deletions(-)
>>>
>>> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
>>> index 386f0c5..947f627 100644 
>>> --- a/drivers/scsi/scsi_error.c
>>> +++ b/drivers/scsi/scsi_error.c
>>> @@ -781,10 +781,17 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
>>>  	struct scsi_device *sdev = scmd->device;
>>>  	struct scsi_driver *sdrv = scsi_cmd_to_driver(scmd);
>>>  	struct Scsi_Host *shost = sdev->host;
>>> +	struct request_queue *q = sdev->request_queue;
>>>  	DECLARE_COMPLETION_ONSTACK(done);
>>>  	unsigned long timeleft;
>>>  	struct scsi_eh_save ses;
>>> -	int rtn;
>>> +	int rtn = FAILED;
>>> +
>>> +	spin_lock_irq(q->queue_lock);
>>> +	if (blk_queue_dead(q))
>>> +		goto out_unlock;
>>> +	q->rq.count[BLK_RW_SYNC]++;
>>> +	spin_unlock_irq(q->queue_lock);
>>
>> Are you hitting a case where a scsi_cmnd does not have a request struct
>> that was allocated through the block layer functions like
>> blk_get_request, but is getting sent through this path? What code is
>> doing this?
>>
>> Or, are you hitting a bug where somehow the request is freed (so the
>> rq.count is decremented) but the scsi eh is still working on a scsi_cmnd
>> that had a request struct allocated for it?
>  
> 
> I haven't hit any such bugs. This patch is what I came up with after
> analyzing what would be necessary to make sure that queuecommand isn't
> called anymore after blk_cleanup_queue() finished and also to make sure
> that blk_drain_queue() waits until all active queuecommand calls have
It should be waiting now if the scsi_cmnd has a request backing
shouldn't it? We will allocate a request struct with blk_get_request or
one of the other blk helpers for each scsi_cmnd, and that will increment
the q->rq.count. If we then go down the error path because a cmd timed
out or because scsi_decide_disposition returned FAILED, then we will
still have that request backing the scsi cmnd and the count should still
be incremented for it. When we call scsi_send_eh_cmnd for eh operations
the request is then still there and not freed yet. The request will get
freed later when scsi_eh_flush_done_q is called. In there we will either
retry or call scsi_finish_command which will go through the normal
completion process and eventually call __blk_put_request and freed_request.
^ permalink raw reply	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-30 17:27               ` Mike Christie
@ 2012-05-30 20:00                 ` Bart Van Assche
  2012-06-01  3:13                   ` Mike Christie
  0 siblings, 1 reply; 21+ messages in thread
From: Bart Van Assche @ 2012-05-30 20:00 UTC (permalink / raw)
  To: Mike Christie
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/30/12 17:27, Mike Christie wrote:
> It should be waiting now if the scsi_cmnd has a request backing
> shouldn't it? We will allocate a request struct with blk_get_request or
> one of the other blk helpers for each scsi_cmnd, and that will increment
> the q->rq.count. If we then go down the error path because a cmd timed
> out or because scsi_decide_disposition returned FAILED, then we will
> still have that request backing the scsi cmnd and the count should still
> be incremented for it. When we call scsi_send_eh_cmnd for eh operations
> the request is then still there and not freed yet. The request will get
> freed later when scsi_eh_flush_done_q is called. In there we will either
> retry or call scsi_finish_command which will go through the normal
> completion process and eventually call __blk_put_request and freed_request.
OK, that means that the counter manipulation code can be left out.
Skipping the queuecommand() call once device removal started is still
useful though since when not doing that scsi_remove_host() sometimes
takes much longer than expected. A call stack I obtained via echo w
>/proc/sysrq-trigger while scsi_remove_host() took longer than expected
is as follows:
 [<ffffffff81404799>] schedule+0x29/0x70
 [<ffffffff81063c55>] async_synchronize_cookie_domain+0x75/0x120
 [<ffffffff8105c940>] ? wake_up_bit+0x40/0x40
 [<ffffffff812c88dc>] ? __pm_runtime_resume+0x6c/0xa0
 [<ffffffff81063d15>] async_synchronize_cookie+0x15/0x20
 [<ffffffff81063d3c>] async_synchronize_full+0x1c/0x40
 [<ffffffffa015aaf6>] sd_remove+0x36/0xc0 [sd_mod]
 [<ffffffff812bce1c>] __device_release_driver+0x7c/0xe0
 [<ffffffff812bd00f>] device_release_driver+0x2f/0x50
 [<ffffffff812bc6cb>] bus_remove_device+0xfb/0x170
 [<ffffffff812b97cd>] device_del+0x12d/0x1c0
 [<ffffffffa003e714>] __scsi_remove_device+0xd4/0xe0 [scsi_mod]
 [<ffffffffa003d10f>] scsi_forget_host+0x6f/0x80 [scsi_mod]
 [<ffffffffa003266a>] scsi_remove_host+0x7a/0x130 [scsi_mod]
 [<ffffffffa0564096>] srp_remove_target+0xa6/0x100 [ib_srp]
 [<ffffffffa05642d4>] srp_remove_work+0x64/0x90 [ib_srp]
 [<ffffffff81054f98>] process_one_work+0x1a8/0x530
 [<ffffffff81054f29>] ? process_one_work+0x139/0x530
 [<ffffffffa0564270>] ? srp_remove_one+0x180/0x180 [ib_srp]
 [<ffffffff81056cea>] worker_thread+0x16a/0x350
 [<ffffffff81056b80>] ? manage_workers+0x250/0x250
 [<ffffffff8105c12e>] kthread+0xae/0xc0
 [<ffffffff8140f514>] kernel_thread_helper+0x4/0x10
With the patch below these delays do not occur:
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 386f0c5..0d6ab69 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -791,14 +791,15 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
 
 	scsi_log_send(scmd);
 	scmd->scsi_done = scsi_eh_done;
-	shost->hostt->queuecommand(shost, scmd);
-
-	timeleft = wait_for_completion_timeout(&done, timeout);
-
+	if (sdev->sdev_state != SDEV_DEL &&
+	    shost->hostt->queuecommand(shost, scmd) == 0) {
+		timeleft = wait_for_completion_timeout(&done, timeout);
+		scsi_log_completion(scmd, SUCCESS);
+	} else {
+		timeleft = 0;
+	}
 	shost->eh_action = NULL;
 
-	scsi_log_completion(scmd, SUCCESS);
-
 	SCSI_LOG_ERROR_RECOVERY(3,
 		printk("%s: scmd: %p, timeleft: %ld\n",
 			__func__, scmd, timeleft));
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 42c35ff..f32757c 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -955,24 +955,30 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
 void __scsi_remove_device(struct scsi_device *sdev)
 {
 	struct device *dev = &sdev->sdev_gendev;
+	struct request_queue *q = sdev->request_queue;
 
 	if (sdev->is_visible) {
 		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
 			return;
 
-		bsg_unregister_queue(sdev->request_queue);
+		bsg_unregister_queue(q);
 		device_unregister(&sdev->sdev_dev);
 		transport_remove_device(dev);
 		device_del(dev);
 	} else
 		put_device(&sdev->sdev_dev);
+
+	/*
+	 * Stop accepting new requests and wait until all queuecommand()
+	 * invocations have finished before tearing down the device.
+	 */
 	scsi_device_set_state(sdev, SDEV_DEL);
+	blk_cleanup_queue(q);
+
 	if (sdev->host->hostt->slave_destroy)
 		sdev->host->hostt->slave_destroy(sdev);
 	transport_destroy_device(dev);
 
-	/* Freeing the queue signals to block that we're done */
-	blk_cleanup_queue(sdev->request_queue);
 	put_device(dev);
 }
 
^ permalink raw reply related	[flat|nested] 21+ messages in thread
* Re: [PATCH 2/3] Stop accepting SCSI requests before removing a device
  2012-05-30 20:00                 ` Bart Van Assche
@ 2012-06-01  3:13                   ` Mike Christie
  0 siblings, 0 replies; 21+ messages in thread
From: Mike Christie @ 2012-06-01  3:13 UTC (permalink / raw)
  To: Bart Van Assche
  Cc: linux-scsi, James Bottomley, Jun'ichi Nomura, Stefan Richter,
	Tomas Henzl, Mike Snitzer
On 05/30/2012 03:00 PM, Bart Van Assche wrote:
> On 05/30/12 17:27, Mike Christie wrote:
> 
>> It should be waiting now if the scsi_cmnd has a request backing
>> shouldn't it? We will allocate a request struct with blk_get_request or
>> one of the other blk helpers for each scsi_cmnd, and that will increment
>> the q->rq.count. If we then go down the error path because a cmd timed
>> out or because scsi_decide_disposition returned FAILED, then we will
>> still have that request backing the scsi cmnd and the count should still
>> be incremented for it. When we call scsi_send_eh_cmnd for eh operations
>> the request is then still there and not freed yet. The request will get
>> freed later when scsi_eh_flush_done_q is called. In there we will either
>> retry or call scsi_finish_command which will go through the normal
>> completion process and eventually call __blk_put_request and freed_request.
> 
> 
> OK, that means that the counter manipulation code can be left out.
> Skipping the queuecommand() call once device removal started is still
> useful though since when not doing that scsi_remove_host() sometimes
> takes much longer than expected. A call stack I obtained via echo w
>> /proc/sysrq-trigger while scsi_remove_host() took longer than expected
> is as follows:
> 
>  [<ffffffff81404799>] schedule+0x29/0x70
>  [<ffffffff81063c55>] async_synchronize_cookie_domain+0x75/0x120
>  [<ffffffff8105c940>] ? wake_up_bit+0x40/0x40
>  [<ffffffff812c88dc>] ? __pm_runtime_resume+0x6c/0xa0
>  [<ffffffff81063d15>] async_synchronize_cookie+0x15/0x20
>  [<ffffffff81063d3c>] async_synchronize_full+0x1c/0x40
>  [<ffffffffa015aaf6>] sd_remove+0x36/0xc0 [sd_mod]
>  [<ffffffff812bce1c>] __device_release_driver+0x7c/0xe0
>  [<ffffffff812bd00f>] device_release_driver+0x2f/0x50
>  [<ffffffff812bc6cb>] bus_remove_device+0xfb/0x170
>  [<ffffffff812b97cd>] device_del+0x12d/0x1c0
>  [<ffffffffa003e714>] __scsi_remove_device+0xd4/0xe0 [scsi_mod]
>  [<ffffffffa003d10f>] scsi_forget_host+0x6f/0x80 [scsi_mod]
>  [<ffffffffa003266a>] scsi_remove_host+0x7a/0x130 [scsi_mod]
>  [<ffffffffa0564096>] srp_remove_target+0xa6/0x100 [ib_srp]
>  [<ffffffffa05642d4>] srp_remove_work+0x64/0x90 [ib_srp]
>  [<ffffffff81054f98>] process_one_work+0x1a8/0x530
>  [<ffffffff81054f29>] ? process_one_work+0x139/0x530
>  [<ffffffffa0564270>] ? srp_remove_one+0x180/0x180 [ib_srp]
>  [<ffffffff81056cea>] worker_thread+0x16a/0x350
>  [<ffffffff81056b80>] ? manage_workers+0x250/0x250
>  [<ffffffff8105c12e>] kthread+0xae/0xc0
>  [<ffffffff8140f514>] kernel_thread_helper+0x4/0x10
> 
> With the patch below these delays do not occur:
> 
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 386f0c5..0d6ab69 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -791,14 +791,15 @@ static int scsi_send_eh_cmnd(struct scsi_cmnd *scmd, unsigned char *cmnd,
>  
>  	scsi_log_send(scmd);
>  	scmd->scsi_done = scsi_eh_done;
> -	shost->hostt->queuecommand(shost, scmd);
> -
> -	timeleft = wait_for_completion_timeout(&done, timeout);
> -
> +	if (sdev->sdev_state != SDEV_DEL &&
> +	    shost->hostt->queuecommand(shost, scmd) == 0) {
> +		timeleft = wait_for_completion_timeout(&done, timeout);
> +		scsi_log_completion(scmd, SUCCESS);
> +	} else {
> +		timeleft = 0;
> +	}
>  	shost->eh_action = NULL;
>  
> -	scsi_log_completion(scmd, SUCCESS);
> -
>  	SCSI_LOG_ERROR_RECOVERY(3,
>  		printk("%s: scmd: %p, timeleft: %ld\n",
>  			__func__, scmd, timeleft));
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index 42c35ff..f32757c 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -955,24 +955,30 @@ int scsi_sysfs_add_sdev(struct scsi_device *sdev)
>  void __scsi_remove_device(struct scsi_device *sdev)
>  {
>  	struct device *dev = &sdev->sdev_gendev;
> +	struct request_queue *q = sdev->request_queue;
>  
>  	if (sdev->is_visible) {
>  		if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
>  			return;
>  
> -		bsg_unregister_queue(sdev->request_queue);
> +		bsg_unregister_queue(q);
>  		device_unregister(&sdev->sdev_dev);
>  		transport_remove_device(dev);
>  		device_del(dev);
>  	} else
>  		put_device(&sdev->sdev_dev);
> +
> +	/*
> +	 * Stop accepting new requests and wait until all queuecommand()
> +	 * invocations have finished before tearing down the device.
> +	 */
>  	scsi_device_set_state(sdev, SDEV_DEL);
> +	blk_cleanup_queue(q);
> +
>  	if (sdev->host->hostt->slave_destroy)
>  		sdev->host->hostt->slave_destroy(sdev);
>  	transport_destroy_device(dev);
>  
> -	/* Freeing the queue signals to block that we're done */
> -	blk_cleanup_queue(sdev->request_queue);
>  	put_device(dev);
>  }
>  
Looks ok to me.
^ permalink raw reply	[flat|nested] 21+ messages in thread
end of thread, other threads:[~2012-06-01  3:14 UTC | newest]
Thread overview: 21+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-05-04 15:00 [PATCH 0/3 v6] Fixes for SCSI device removal Bart Van Assche
2012-05-04 15:03 ` [PATCH 1/3] sd: Fix device removal NULL pointer dereference Bart Van Assche
2012-05-04 15:06 ` [PATCH 2/3] Stop accepting SCSI requests before removing a device Bart Van Assche
2012-05-04 20:16   ` Mike Christie
2012-05-04 20:30     ` Mike Christie
2012-05-05 13:04       ` Bart Van Assche
2012-05-29 15:00         ` Bart Van Assche
2012-05-29 17:35           ` Mike Christie
2012-05-30  6:56             ` Bart Van Assche
2012-05-30 17:27               ` Mike Christie
2012-05-30 20:00                 ` Bart Van Assche
2012-06-01  3:13                   ` Mike Christie
2012-05-04 15:07 ` [PATCH 3/3] Make scsi_free_queue() abort pending requests Bart Van Assche
2012-05-04 20:25   ` Mike Christie
2012-05-04 20:32     ` Mike Christie
2012-05-05  6:07       ` Bart Van Assche
2012-05-07  0:44         ` Mike Christie
2012-05-07  1:15           ` Mike Christie
2012-05-14 18:43           ` Bart Van Assche
2012-05-29 14:56             ` Bart Van Assche
2012-05-05 13:41     ` Bart Van Assche
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).