From: James Bottomley <James.Bottomley@HansenPartnership.com>
To: Bart Van Assche <Bart.VanAssche@sandisk.com>,
"bblock@linux.vnet.ibm.com" <bblock@linux.vnet.ibm.com>
Cc: "linux-scsi@vger.kernel.org" <linux-scsi@vger.kernel.org>,
"maxg@mellanox.com" <maxg@mellanox.com>,
"israelr@mellanox.com" <israelr@mellanox.com>,
"hare@suse.de" <hare@suse.de>,
"martin.petersen@oracle.com" <martin.petersen@oracle.com>
Subject: Re: [PATCH v3 3/4] sd: Make synchronize cache upon shutdown asynchronous
Date: Sun, 23 Apr 2017 12:28:29 -0500 [thread overview]
Message-ID: <1492968509.2414.6.camel@HansenPartnership.com> (raw)
In-Reply-To: <1492728740.2642.14.camel@sandisk.com>
On Thu, 2017-04-20 at 22:52 +0000, Bart Van Assche wrote:
> On Thu, 2017-04-20 at 15:13 -0700, James Bottomley wrote:
> > How is that possible? Once the device goes into the CANCEL state,
> > it
> > no longer can be found by starget_for_each_device() because
> > scsi_device_get() returns NULL ...
>
> scsi_target_block() is not serialized against __scsi_remove_device().
> I think
> the following sequence of events can cause a queue to be stopped for
> a device
> in the CANCEL state:
> (a) scsi_target_block() triggers a call to scsi_get_device().
> (b) __scsi_remove_device() is called from the context of another
> thread.
> (c) __scsi_remove_device() changes the device state into SDEV_CANCEL.
> (d) scsi_internal_device_block() calls blk_mq_stop_hw_queue().
OK, so the bug is that I didn't prevent the DEL->BLOCK transition which
allows us to go blocked after the new DEL if you were blocked before
when we reached cancel (corrected). but looking at all of this, I
don't like the loose binding of block and queue stop, so this one makes
it more simplistic by deferring the queue stop until we try to execute
a blocked request.
In doing all of this, I folded in your start queue patch (I bet if we
fix this someone's going to want a backport to stable, so we shouldn't
make it difficult).
I also noticed the scsi_wait_for_queuecommand() should be event driven
(we're not supposed to do sleep driven condition checking) and there's
a spurious lock in scsi_request_fn_active() which means we could
eliminate the whole function.
However, the above is for another day. This should work.
James
---
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e5a2d590a104..a9ec35d9c91b 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1250,6 +1250,12 @@ scsi_prep_state_check(struct scsi_device *sdev, struct request *req)
break;
case SDEV_BLOCK:
case SDEV_CREATED_BLOCK:
+ /* q lock is held only in the non-mq case */
+ if (req->q->mq_ops)
+ blk_mq_stop_hw_queues(req->q);
+ else
+ blk_stop_queue(req->q);
+
ret = BLKPREP_DEFER;
break;
case SDEV_QUIESCE:
@@ -2611,7 +2617,6 @@ scsi_device_set_state(struct scsi_device *sdev, enum scsi_device_state state)
case SDEV_QUIESCE:
case SDEV_OFFLINE:
case SDEV_TRANSPORT_OFFLINE:
- case SDEV_BLOCK:
break;
default:
goto illegal;
@@ -2844,10 +2849,12 @@ static int scsi_request_fn_active(struct scsi_device *sdev)
*/
static void scsi_wait_for_queuecommand(struct scsi_device *sdev)
{
- WARN_ON_ONCE(sdev->host->use_blk_mq);
-
- while (scsi_request_fn_active(sdev))
- msleep(20);
+ if (sdev->request_queue->mq_ops) {
+ synchronize_rcu();
+ } else {
+ while (scsi_request_fn_active(sdev))
+ msleep(20);
+ }
}
/**
@@ -2953,8 +2960,6 @@ EXPORT_SYMBOL(scsi_target_resume);
int
scsi_internal_device_block(struct scsi_device *sdev, bool wait)
{
- struct request_queue *q = sdev->request_queue;
- unsigned long flags;
int err = 0;
err = scsi_device_set_state(sdev, SDEV_BLOCK);
@@ -2970,23 +2975,27 @@ scsi_internal_device_block(struct scsi_device *sdev, bool wait)
* block layer from calling the midlayer with this device's
* request queue.
*/
+ if (wait)
+ scsi_wait_for_queuecommand(sdev);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(scsi_internal_device_block);
+
+void scsi_start_queue(struct scsi_device *sdev)
+{
+ struct request_queue *q = sdev->request_queue;
+ unsigned long flags;
+
if (q->mq_ops) {
- if (wait)
- blk_mq_quiesce_queue(q);
- else
- blk_mq_stop_hw_queues(q);
+ blk_mq_start_stopped_hw_queues(q, false);
} else {
spin_lock_irqsave(q->queue_lock, flags);
- blk_stop_queue(q);
+ blk_start_queue(q);
spin_unlock_irqrestore(q->queue_lock, flags);
- if (wait)
- scsi_wait_for_queuecommand(sdev);
}
-
- return 0;
}
-EXPORT_SYMBOL_GPL(scsi_internal_device_block);
-
+
/**
* scsi_internal_device_unblock - resume a device after a block request
* @sdev: device to resume
@@ -3007,9 +3016,6 @@ int
scsi_internal_device_unblock(struct scsi_device *sdev,
enum scsi_device_state new_state)
{
- struct request_queue *q = sdev->request_queue;
- unsigned long flags;
-
/*
* Try to transition the scsi device to SDEV_RUNNING or one of the
* offlined states and goose the device queue if successful.
@@ -3027,13 +3033,7 @@ scsi_internal_device_unblock(struct scsi_device *sdev,
sdev->sdev_state != SDEV_OFFLINE)
return -EINVAL;
- if (q->mq_ops) {
- blk_mq_start_stopped_hw_queues(q, false);
- } else {
- spin_lock_irqsave(q->queue_lock, flags);
- blk_start_queue(q);
- spin_unlock_irqrestore(q->queue_lock, flags);
- }
+ scsi_start_queue(sdev);
return 0;
}
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index f11bd102d6d5..c7629e31a75b 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -89,6 +89,7 @@ extern void scsi_run_host_queues(struct Scsi_Host *shost);
extern void scsi_requeue_run_queue(struct work_struct *work);
extern struct request_queue *scsi_alloc_queue(struct scsi_device *sdev);
extern struct request_queue *scsi_mq_alloc_queue(struct scsi_device *sdev);
+extern void scsi_start_queue(struct scsi_device *sdev);
extern int scsi_mq_setup_tags(struct Scsi_Host *shost);
extern void scsi_mq_destroy_tags(struct Scsi_Host *shost);
extern int scsi_init_queue(void);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 82dfe07b1d47..5e826801c84c 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -1282,8 +1282,17 @@ void __scsi_remove_device(struct scsi_device *sdev)
return;
if (sdev->is_visible) {
- if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0)
- return;
+ /*
+ * If blocked, we go straight to DEL so any commands
+ * issued during the driver shutdown (like sync cache)
+ * are errored
+ */
+ if (scsi_device_set_state(sdev, SDEV_CANCEL) != 0) {
+ if (scsi_device_set_state(sdev, SDEV_DEL) != 0)
+ return;
+
+ scsi_start_queue(sdev);
+ }
bsg_unregister_queue(sdev->request_queue);
device_unregister(&sdev->sdev_dev);
next prev parent reply other threads:[~2017-04-23 17:28 UTC|newest]
Thread overview: 25+ messages / expand[flat|nested] mbox.gz Atom feed top
2017-04-17 17:34 [PATCH v3 0/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
2017-04-17 17:34 ` [PATCH v3 1/4] Introduce scsi_start_queue() Bart Van Assche
2017-04-17 17:34 ` [PATCH v3 2/4] Introduce scsi_execute_async() Bart Van Assche
2017-04-17 17:34 ` [PATCH v3 3/4] sd: Make synchronize cache upon shutdown asynchronous Bart Van Assche
2017-04-18 14:44 ` Benjamin Block
2017-04-18 15:34 ` Bart Van Assche
2017-04-18 15:56 ` James Bottomley
2017-04-18 16:06 ` Bart Van Assche
2017-04-18 23:47 ` Bart Van Assche
2017-04-18 23:56 ` James Bottomley
2017-04-19 0:02 ` Bart Van Assche
2017-04-19 0:05 ` James Bottomley
2017-04-19 18:42 ` Bart Van Assche
2017-04-20 21:59 ` Bart Van Assche
2017-04-20 22:13 ` James Bottomley
2017-04-20 22:27 ` Bart Van Assche
2017-04-20 22:52 ` Bart Van Assche
2017-04-23 17:28 ` James Bottomley [this message]
2017-04-24 21:46 ` Bart Van Assche
2017-04-26 18:53 ` Bart Van Assche
2017-04-28 18:45 ` James Bottomley
2017-04-24 7:14 ` [lkp-robot] [sd] ab1218235c: INFO:possible_recursive_locking_detected kernel test robot
2017-04-17 17:34 ` [PATCH v3 4/4] Avoid that __scsi_remove_device() hangs Bart Van Assche
2017-04-18 11:58 ` [PATCH v3 0/4] " Israel Rukshin
2017-04-18 15:40 ` Bart Van Assche
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1492968509.2414.6.camel@HansenPartnership.com \
--to=james.bottomley@hansenpartnership.com \
--cc=Bart.VanAssche@sandisk.com \
--cc=bblock@linux.vnet.ibm.com \
--cc=hare@suse.de \
--cc=israelr@mellanox.com \
--cc=linux-scsi@vger.kernel.org \
--cc=martin.petersen@oracle.com \
--cc=maxg@mellanox.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox