All of lore.kernel.org
 help / color / mirror / Atom feed
From: Bart Van Assche <bvanassche@acm.org>
To: linux-scsi <linux-scsi@vger.kernel.org>,
	James Bottomley <jbottomley@parallels.com>,
	Mike Christie <michaelc@cs.wisc.edu>,
	Jens Axboe <axboe@kernel.dk>, Tejun Heo <tj@kernel.org>,
	Chanho Min <chanho.min@lge.com>
Subject: Re: [PATCH 0/7 v5] More device removal fixes
Date: Mon, 26 Nov 2012 18:19:51 +0100	[thread overview]
Message-ID: <50B3A4B7.5050801@acm.org> (raw)
In-Reply-To: <50AF5206.9060608@acm.org>

On 11/23/12 11:37, Bart Van Assche wrote:
> On 10/26/12 14:00, Bart Van Assche wrote:
>> Fix a few race conditions that can be triggered by removing a device:
>> [ ... ]
> 
> I'd like to add the patch below to this series. [ ... ]

Below is another patch (hopefully the last) that I'd like to add to
this series. Note: the reason that I only ran into this issue now is
because I only started testing very recently with an ib_srp version
with the host state check removed from srp_queuecommand().

[PATCH] Make scsi_remove_host() wait for device removal

SCSI LLDs may start cleaning up host resources needed by their
queuecommand() callback as soon as scsi_remove_host() finished.
Hence scsi_remove_host() must wait until blk_cleanup_queue() for
all devices associated with the host has finished.

This patch fixes the following kernel oops:

BUG: spinlock bad magic on CPU#0, multipathd/20128
 lock: 0xffff8801b32e8bc8, .magic: ffff8801, .owner: <none>/-1, .owner_cpu: -1556759232
Pid: 20128, comm: multipathd Not tainted 3.7.0-rc7-debug+ #1
Call Trace:
 [<ffffffff8141206f>] spin_dump+0x8c/0x91
 [<ffffffff81412095>] spin_bug+0x21/0x26
 [<ffffffff81218a6f>] do_raw_spin_lock+0x13f/0x150
 [<ffffffff81417b38>] _raw_spin_lock_irqsave+0x78/0xa0
 [<ffffffffa0293c6c>] srp_queuecommand+0x3c/0xc80 [ib_srp]
 [<ffffffffa0002f18>] scsi_dispatch_cmd+0x148/0x310 [scsi_mod]
 [<ffffffffa000a080>] scsi_request_fn+0x320/0x520 [scsi_mod]
 [<ffffffff811ec397>] __blk_run_queue+0x37/0x50
 [<ffffffff811ec3ee>] queue_unplugged+0x3e/0xd0
 [<ffffffff811eef33>] blk_flush_plug_list+0x1c3/0x240
 [<ffffffff811eefc8>] blk_finish_plug+0x18/0x50
 [<ffffffff8119661e>] do_io_submit+0x29e/0xa00
 [<ffffffff81196d90>] sys_io_submit+0x10/0x20
 [<ffffffff81420d82>] system_call_fastpath+0x16/0x1b
---
 drivers/scsi/hosts.c      |   16 ++++++++++++++++
 drivers/scsi/scsi_priv.h  |    2 +-
 drivers/scsi/scsi_scan.c  |    5 ++++-
 drivers/scsi/scsi_sysfs.c |   15 ++++++++++++---
 include/scsi/scsi_host.h  |    1 +
 5 files changed, 34 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/hosts.c b/drivers/scsi/hosts.c
index 593085a..7b6b0b2 100644
--- a/drivers/scsi/hosts.c
+++ b/drivers/scsi/hosts.c
@@ -150,6 +150,19 @@ int scsi_host_set_state(struct Scsi_Host *shost, enum scsi_host_state state)
 }
 EXPORT_SYMBOL(scsi_host_set_state);
 
+static bool scsi_device_list_empty(struct Scsi_Host *shost)
+{
+	bool res;
+
+	WARN_ON_ONCE(irqs_disabled());
+
+	spin_lock_irq(shost->host_lock);
+	res = list_empty(&shost->__devices);
+	spin_unlock_irq(shost->host_lock);
+
+	return res;
+}
+
 /**
  * scsi_remove_host - remove a scsi host
  * @shost:	a pointer to a scsi host to remove
@@ -178,6 +191,8 @@ void scsi_remove_host(struct Scsi_Host *shost)
 		BUG_ON(scsi_host_set_state(shost, SHOST_DEL_RECOVERY));
 	spin_unlock_irqrestore(shost->host_lock, flags);
 
+	wait_event(shost->device_list_empty, scsi_device_list_empty(shost));
+
 	transport_unregister_device(&shost->shost_gendev);
 	device_unregister(&shost->shost_dev);
 	device_del(&shost->shost_gendev);
@@ -351,6 +366,7 @@ struct Scsi_Host *scsi_host_alloc(struct scsi_host_template *sht, int privsize)
 	shost->shost_state = SHOST_CREATED;
 	INIT_LIST_HEAD(&shost->__devices);
 	INIT_LIST_HEAD(&shost->__targets);
+	init_waitqueue_head(&shost->device_list_empty);
 	INIT_LIST_HEAD(&shost->eh_cmd_q);
 	INIT_LIST_HEAD(&shost->starved_list);
 	init_waitqueue_head(&shost->host_wait);
diff --git a/drivers/scsi/scsi_priv.h b/drivers/scsi/scsi_priv.h
index 8f9a0ca..86250fb 100644
--- a/drivers/scsi/scsi_priv.h
+++ b/drivers/scsi/scsi_priv.h
@@ -130,7 +130,7 @@ extern int scsi_sysfs_add_sdev(struct scsi_device *);
 extern int scsi_sysfs_add_host(struct Scsi_Host *);
 extern int scsi_sysfs_register(void);
 extern void scsi_sysfs_unregister(void);
-extern void scsi_sysfs_device_initialize(struct scsi_device *);
+extern int scsi_sysfs_device_initialize(struct scsi_device *);
 extern int scsi_sysfs_target_initialize(struct scsi_device *);
 extern struct scsi_transport_template blank_transport_template;
 extern void __scsi_remove_device(struct scsi_device *);
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 3e58b22..ddea318 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -289,7 +289,10 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 	sdev->request_queue->queuedata = sdev;
 	scsi_adjust_queue_depth(sdev, 0, sdev->host->cmd_per_lun);
 
-	scsi_sysfs_device_initialize(sdev);
+	if (scsi_sysfs_device_initialize(sdev)) {
+		display_failure_msg = 0;
+		goto out_device_destroy;
+	}
 
 	if (shost->hostt->slave_alloc) {
 		ret = shost->hostt->slave_alloc(sdev);
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 2661a957..760fc85 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -348,6 +348,8 @@ static void scsi_device_dev_release_usercontext(struct work_struct *work)
 	starget->reap_ref++;
 	list_del(&sdev->siblings);
 	list_del(&sdev->same_target_siblings);
+	if (list_empty(&sdev->host->__devices))
+		wake_up(&sdev->host->device_list_empty);
 	spin_unlock_irqrestore(sdev->host->host_lock, flags);
 
 	cancel_work_sync(&sdev->event_work);
@@ -1109,11 +1111,12 @@ static struct device_type scsi_dev_type = {
 	.groups =	scsi_sdev_attr_groups,
 };
 
-void scsi_sysfs_device_initialize(struct scsi_device *sdev)
+int scsi_sysfs_device_initialize(struct scsi_device *sdev)
 {
 	unsigned long flags;
 	struct Scsi_Host *shost = sdev->host;
 	struct scsi_target  *starget = sdev->sdev_target;
+	int ret = -ENODEV;
 
 	device_initialize(&sdev->sdev_gendev);
 	sdev->sdev_gendev.bus = &scsi_bus_type;
@@ -1128,10 +1131,16 @@ void scsi_sysfs_device_initialize(struct scsi_device *sdev)
 		     sdev->host->host_no, sdev->channel, sdev->id, sdev->lun);
 	sdev->scsi_level = starget->scsi_level;
 	transport_setup_device(&sdev->sdev_gendev);
+
 	spin_lock_irqsave(shost->host_lock, flags);
-	list_add_tail(&sdev->same_target_siblings, &starget->devices);
-	list_add_tail(&sdev->siblings, &shost->__devices);
+	if (scsi_host_scan_allowed(shost)) {
+		list_add_tail(&sdev->same_target_siblings, &starget->devices);
+		list_add_tail(&sdev->siblings, &shost->__devices);
+		ret = 0;
+	}
 	spin_unlock_irqrestore(shost->host_lock, flags);
+
+	return ret;
 }
 
 int scsi_is_sdev_device(const struct device *dev)
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index 4908480..4ad2d9f 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -561,6 +561,7 @@ struct Scsi_Host {
 	 */
 	struct list_head	__devices;
 	struct list_head	__targets;
+	wait_queue_head_t	device_list_empty;
 	
 	struct scsi_host_cmd_pool *cmd_pool;
 	spinlock_t		free_list_lock;
-- 
1.7.10.4




      reply	other threads:[~2012-11-26 17:19 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-26 12:00 [PATCH 0/7 v5] More device removal fixes Bart Van Assche
2012-10-26 12:01 ` [PATCH 1/7] block: Avoid that blk_drain_queue() finishes early Bart Van Assche
2012-10-29  1:47   ` Tejun Heo
2012-10-29  1:52     ` Tejun Heo
2012-10-29 14:35       ` Bart Van Assche
2012-10-26 12:02 ` [PATCH 2/7] block: Let blk_drain_queue() caller obtain the queue lock Bart Van Assche
2012-10-29  1:55   ` Tejun Heo
2012-10-26 12:02 ` [PATCH 3/7] block: Rename queue dead flag Bart Van Assche
2012-10-26 12:03 ` [PATCH 4/7] block: Avoid that request_fn is invoked on a dead queue Bart Van Assche
2012-10-29  1:59   ` Tejun Heo
2012-10-26 12:04 ` [PATCH 5/7] block: Make blk_cleanup_queue() wait until request_fn finished Bart Van Assche
2012-10-29  2:00   ` Tejun Heo
2012-10-26 12:05 ` [PATCH 6/7] Fix race between starved list processing and device removal Bart Van Assche
2012-10-28 18:01   ` Zhuang, Jin Can
2012-10-29 14:32     ` Bart Van Assche
2012-10-30  5:40       ` Zhuang, Jin Can
2012-11-02 10:48         ` Bart Van Assche
2012-11-21 11:06           ` Bart Van Assche
     [not found]         ` <026701cdb8c3$d2e3cb50$78ab61f0$@min@lge.com>
2012-11-21 12:10           ` Bart Van Assche
2012-10-29  2:07   ` Tejun Heo
2012-10-26 12:05 ` [PATCH 7/7] Remove get_device() / put_device() pair from scsi_request_fn() Bart Van Assche
2012-10-29  2:08   ` Tejun Heo
2012-11-23 10:37 ` [PATCH 0/7 v5] More device removal fixes Bart Van Assche
2012-11-26 17:19   ` Bart Van Assche [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=50B3A4B7.5050801@acm.org \
    --to=bvanassche@acm.org \
    --cc=axboe@kernel.dk \
    --cc=chanho.min@lge.com \
    --cc=jbottomley@parallels.com \
    --cc=linux-scsi@vger.kernel.org \
    --cc=michaelc@cs.wisc.edu \
    --cc=tj@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.