* [RFC] Asynchronous scsi scanning
@ 2006-05-11 14:33 Matthew Wilcox
2006-05-11 18:15 ` Mike Christie
2006-05-18 17:22 ` [PATCH] " Matthew Wilcox
0 siblings, 2 replies; 41+ messages in thread
From: Matthew Wilcox @ 2006-05-11 14:33 UTC (permalink / raw)
To: linux-scsi
A customer has a machine with 162 scsi hosts, and just scanning the scsi
busses takes over an hour. Here's what I've come up with to reduce that.
For drivers which call scsi_scan_host(), no changes are necessary.
The fibrechannel and SAS drivers are going to take a bit more work,
but I thought I'd send out the core first. I'm not entirely happy about
how the threads rendezvous; it'd be nice to not have to use a completion.
One user-visible change in behaviour is that after loading a driver, the
insmod will return before discovery is finished. Apparently Ubuntu's
userspace already copes with this, but it's something to be aware of.
The late_initcall takes care of this for built-in modules, but it'd be
nice if there were an API to say "run this function before insmod exits".
Index: ./drivers/scsi/scsi_scan.c
===================================================================
RCS file: /var/cvs/linux-2.6/drivers/scsi/scsi_scan.c,v
retrieving revision 1.38
diff -u -p -r1.38 scsi_scan.c
--- ./drivers/scsi/scsi_scan.c 19 Apr 2006 04:55:59 -0000 1.38
+++ ./drivers/scsi/scsi_scan.c 11 May 2006 13:18:42 -0000
@@ -30,7 +30,9 @@
#include <linux/moduleparam.h>
#include <linux/init.h>
#include <linux/blkdev.h>
-#include <asm/semaphore.h>
+#include <linux/delay.h>
+#include <linux/kthread.h>
+#include <linux/spinlock.h>
#include <scsi/scsi.h>
#include <scsi/scsi_device.h>
@@ -109,6 +111,22 @@ MODULE_PARM_DESC(inq_timeout,
"Timeout (in seconds) waiting for devices to answer INQUIRY."
" Default is 5. Some non-compliant devices need more.");
+static spinlock_t async_scan_lock = SPIN_LOCK_UNLOCKED;
+static LIST_HEAD(scanning_hosts);
+
+static int scsi_complete_async_scans(void)
+{
+ if (list_empty(&scanning_hosts))
+ return 0;
+
+ printk(KERN_INFO "scsi: waiting for bus probes to complete ...\n");
+ while (!list_empty(&scanning_hosts))
+ ssleep(1);
+ return 0;
+}
+late_initcall(scsi_complete_async_scans);
+
+
/**
* scsi_unlock_floptical - unlock device via a special MODE SENSE command
* @sdev: scsi device to send command to
@@ -629,7 +647,8 @@ static int scsi_probe_lun(struct scsi_de
* SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device
* SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized
**/
-static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags)
+static int scsi_add_lun(struct scsi_device *sdev, char *inq_result,
+ int *bflags, int async)
{
/*
* XXX do not save the inquiry, since it can change underneath us,
@@ -802,7 +821,7 @@ static int scsi_add_lun(struct scsi_devi
* register it and tell the rest of the kernel
* about it.
*/
- if (scsi_sysfs_add_sdev(sdev) != 0)
+ if (!async && scsi_sysfs_add_sdev(sdev) != 0)
return SCSI_SCAN_NO_RESPONSE;
return SCSI_SCAN_LUN_PRESENT;
@@ -914,7 +933,7 @@ static int scsi_probe_and_add_lun(struct
goto out_free_result;
}
- res = scsi_add_lun(sdev, result, &bflags);
+ res = scsi_add_lun(sdev, result, &bflags, shost->async_scan);
if (res == SCSI_SCAN_LUN_PRESENT) {
if (bflags & BLIST_KEY) {
sdev->lockable = 0;
@@ -1427,6 +1446,9 @@ void scsi_scan_target(struct device *par
{
struct Scsi_Host *shost = dev_to_shost(parent);
+ if (!shost->async_scan)
+ scsi_complete_async_scans();
+
mutex_lock(&shost->scan_mutex);
if (scsi_host_scan_allowed(shost))
__scsi_scan_target(parent, channel, id, lun, rescan);
@@ -1492,14 +1514,121 @@ int scsi_scan_host_selected(struct Scsi_
return 0;
}
+/* The error handling here is pretty yucky. Do we want an
+ * shost_for_each_device_safe() iterator?
+ */
+static void scsi_sysfs_add_devices(struct Scsi_Host *shost)
+{
+ struct scsi_device *sdev;
+ shost_for_each_device(sdev, shost) {
+ int err;
+ next:
+ err = scsi_sysfs_add_sdev(sdev);
+ if (err) {
+ struct scsi_device *tmp = sdev;
+ sdev = __scsi_iterate_devices(shost, sdev);
+ scsi_destroy_sdev(tmp);
+ goto next;
+ }
+ }
+}
+
+struct async_scan_data {
+ struct list_head list;
+ struct Scsi_Host *shost;
+ struct completion prev_finished;
+};
+
+/**
+ * scsi_prep_async_scan - prepare for an async scan
+ * @shost: the host which will be scanned
+ * Returns: a cookie to be passed to scsi_finish_async_scan()
+ *
+ * If your driver does not use scsi_scan_host(), you can call this function
+ * to tell the midlayer you're about to commence an asynchronous scan.
+ * This reserves your device's position in the scanning list and ensures
+ * that other asynchronous scans started after yours won't affect the
+ * disc ordering.
+ */
+struct async_scan_data * scsi_prep_async_scan(struct Scsi_Host *shost)
+{
+ struct async_scan_data *data;
+
+ if (shost->async_scan) {
+ printk("%s called twice for host %d", __FUNCTION__,
+ shost->host_no);
+ dump_stack();
+ return NULL;
+ }
+
+ data = kmalloc(sizeof(*data), GFP_KERNEL);
+ data->shost = shost;
+ init_completion(&data->prev_finished);
+
+ spin_lock(&async_scan_lock);
+ shost->async_scan = 1;
+ if (list_empty(&scanning_hosts))
+ complete(&data->prev_finished);
+ list_add_tail(&data->list, &scanning_hosts);
+ spin_unlock(&async_scan_lock);
+
+ return data;
+}
+EXPORT_SYMBOL_GPL(scsi_prep_async_scan);
+
+/**
+ * scsi_finish_async_scan - asynchronous scan has finished
+ * @data: cookie returned from earlier call to scsi_prep_async_scan()
+ *
+ * Once your driver has found all the devices currently present, call
+ * this function. It will announce all the devices it has found to
+ * the rest of the system.
+ */
+void scsi_finish_async_scan(struct async_scan_data *data)
+{
+ if (!data->shost->async_scan) {
+ printk("%s called twice for host %d", __FUNCTION__,
+ data->shost->host_no);
+ dump_stack();
+ return;
+ }
+
+ wait_for_completion(&data->prev_finished);
+
+ scsi_sysfs_add_devices(data->shost);
+
+ spin_lock(&async_scan_lock);
+ data->shost->async_scan = 0;
+ list_del(&data->list);
+ spin_unlock(&async_scan_lock);
+ if (!list_empty(&scanning_hosts)) {
+ struct async_scan_data *next = list_entry(scanning_hosts.next,
+ struct async_scan_data, list);
+ complete(&next->prev_finished);
+ }
+
+ kfree(data);
+}
+EXPORT_SYMBOL_GPL(scsi_finish_async_scan);
+
+static int do_scan_async(void *_data)
+{
+ struct async_scan_data *data = _data;
+ scsi_scan_host_selected(data->shost, SCAN_WILD_CARD, SCAN_WILD_CARD,
+ SCAN_WILD_CARD, 0);
+
+ scsi_finish_async_scan(data);
+ return 0;
+}
+
/**
* scsi_scan_host - scan the given adapter
* @shost: adapter to scan
**/
void scsi_scan_host(struct Scsi_Host *shost)
{
- scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD,
- SCAN_WILD_CARD, 0);
+ struct async_scan_data *data = scsi_prep_async_scan(shost);
+ kthread_run(do_scan_async, data, "scsi_scan_%d", shost->host_no);
}
EXPORT_SYMBOL(scsi_scan_host);
Index: ./include/scsi/scsi_device.h
===================================================================
RCS file: /var/cvs/linux-2.6/include/scsi/scsi_device.h,v
retrieving revision 1.27
diff -u -p -r1.27 scsi_device.h
--- ./include/scsi/scsi_device.h 3 Apr 2006 13:46:08 -0000 1.27
+++ ./include/scsi/scsi_device.h 11 May 2006 13:18:42 -0000
@@ -298,6 +298,10 @@ extern int scsi_execute_async(struct scs
void (*done)(void *, char *, int, int),
gfp_t gfp);
+struct async_scan_data;
+struct async_scan_data * scsi_prep_async_scan(struct Scsi_Host *shost);
+void scsi_finish_async_scan(struct async_scan_data *data);
+
static inline void scsi_device_reprobe(struct scsi_device *sdev)
{
device_reprobe(&sdev->sdev_gendev);
Index: ./include/scsi/scsi_host.h
===================================================================
RCS file: /var/cvs/linux-2.6/include/scsi/scsi_host.h,v
retrieving revision 1.26
diff -u -p -r1.26 scsi_host.h
--- ./include/scsi/scsi_host.h 19 Apr 2006 04:56:20 -0000 1.26
+++ ./include/scsi/scsi_host.h 11 May 2006 13:18:42 -0000
@@ -541,6 +541,9 @@ struct Scsi_Host {
*/
unsigned ordered_tag:1;
+ /* Are we currently performing an async scan? */
+ unsigned async_scan:1;
+
/*
* Optional work queue to be utilized by the transport
*/
^ permalink raw reply [flat|nested] 41+ messages in thread* Re: [RFC] Asynchronous scsi scanning 2006-05-11 14:33 [RFC] Asynchronous scsi scanning Matthew Wilcox @ 2006-05-11 18:15 ` Mike Christie 2006-05-11 18:21 ` Matthew Wilcox 2006-05-18 17:22 ` [PATCH] " Matthew Wilcox 1 sibling, 1 reply; 41+ messages in thread From: Mike Christie @ 2006-05-11 18:15 UTC (permalink / raw) To: Matthew Wilcox; +Cc: linux-scsi Matthew Wilcox wrote: > A customer has a machine with 162 scsi hosts, and just scanning the scsi > busses takes over an hour. Here's what I've come up with to reduce that. > For drivers which call scsi_scan_host(), no changes are necessary. > The fibrechannel and SAS drivers are going to take a bit more work, > but I thought I'd send out the core first. I'm not entirely happy about > how the threads rendezvous; it'd be nice to not have to use a completion. > > One user-visible change in behaviour is that after loading a driver, the > insmod will return before discovery is finished. Apparently Ubuntu's > userspace already copes with this, but it's something to be aware of. > The late_initcall takes care of this for built-in modules, but it'd be > nice if there were an API to say "run this function before insmod exits". > Instead of all the scanning from work queue and kthreads, what about just running something from the host addition hotplug event. When a host or target or rport or whatever we want is added, userspace gets a hotplug event today, I thought. From that event could you just do echo - - - > path-to-object/scan This is basically what we do for iscsi. We can scan all hosts or sessions in parrallel which makes startup pretty fast. The problem is of course getting the distros to support it. I guess if they have to support async scanning for all hosts (I think fedora and red hat does not support this - maybe just for usb), then they could also userspace scanning support at the same time. ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC] Asynchronous scsi scanning 2006-05-11 18:15 ` Mike Christie @ 2006-05-11 18:21 ` Matthew Wilcox 2006-05-11 18:49 ` Mike Christie 0 siblings, 1 reply; 41+ messages in thread From: Matthew Wilcox @ 2006-05-11 18:21 UTC (permalink / raw) To: Mike Christie; +Cc: linux-scsi On Thu, May 11, 2006 at 01:15:40PM -0500, Mike Christie wrote: > Instead of all the scanning from work queue and kthreads, what about > just running something from the host addition hotplug event. When a host > or target or rport or whatever we want is added, userspace gets a > hotplug event today, I thought. From that event could you just do > > echo - - - > path-to-object/scan 1) Assumes userspace exists. People still use monolithic kernels and there's no requirement for initramfs yet. 2) Unless it's serialised (in which case it still takes two hours to boot), you lose drive numbering. Now, arguably, we don't need to preserve drive numbering any more, since we have alternatives like uuids. But I can guarantee you we'll be buried in bug reports and unhappy users if drive numbers start changing arbitrarily. ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC] Asynchronous scsi scanning 2006-05-11 18:21 ` Matthew Wilcox @ 2006-05-11 18:49 ` Mike Christie 2006-05-11 18:56 ` Matthew Wilcox 0 siblings, 1 reply; 41+ messages in thread From: Mike Christie @ 2006-05-11 18:49 UTC (permalink / raw) To: Matthew Wilcox; +Cc: linux-scsi Matthew Wilcox wrote: > On Thu, May 11, 2006 at 01:15:40PM -0500, Mike Christie wrote: >> Instead of all the scanning from work queue and kthreads, what about >> just running something from the host addition hotplug event. When a host >> or target or rport or whatever we want is added, userspace gets a >> hotplug event today, I thought. From that event could you just do >> >> echo - - - > path-to-object/scan > > 1) Assumes userspace exists. People still use monolithic kernels and > there's no requirement for initramfs yet. Ok you are right, I thought you were going to handle the insmod returning early problem in userspsace with some sort of wait like is done for usb in I think fedora. > 2) Unless it's serialised (in which case it still takes two hours to > boot), you lose drive numbering. > Drive numbering refers to the bus, target numbering right? Currently when you scan a host you are serialized at the host level because of the host->scan_mutex aren't we? So for each host addition hotplug event you can scan that host with the command above, and then you can scan all your hosts in parallel and the drive numbering is not affected is it? I thought, your patch is basically creating a kernel thread and running scan host selected, which grabs the scan_mutex, with all wild cards. What I suggested ends up calling scan host selected with all wild cards but from userspace. ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC] Asynchronous scsi scanning 2006-05-11 18:49 ` Mike Christie @ 2006-05-11 18:56 ` Matthew Wilcox 2006-05-11 19:09 ` Mike Christie 0 siblings, 1 reply; 41+ messages in thread From: Matthew Wilcox @ 2006-05-11 18:56 UTC (permalink / raw) To: Mike Christie; +Cc: linux-scsi On Thu, May 11, 2006 at 01:49:26PM -0500, Mike Christie wrote: > > 2) Unless it's serialised (in which case it still takes two hours to > > boot), you lose drive numbering. > > Drive numbering refers to the bus, target numbering right? No, I mean sda, sdb, sdc, etc. > Currently > when you scan a host you are serialized at the host level because of the > host->scan_mutex aren't we? So for each host addition hotplug event you > can scan that host with the command above, and then you can scan all > your hosts in parallel and the drive numbering is not affected is it? I > thought, your patch is basically creating a kernel thread and running > scan host selected, which grabs the scan_mutex, with all wild cards. > What I suggested ends up calling scan host selected with all wild cards > but from userspace. The trick is to avoid adding the sdevs to sysfs until all prior sdevs have been added. I don't see a good way to do that from userspace. ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [RFC] Asynchronous scsi scanning 2006-05-11 18:56 ` Matthew Wilcox @ 2006-05-11 19:09 ` Mike Christie 0 siblings, 0 replies; 41+ messages in thread From: Mike Christie @ 2006-05-11 19:09 UTC (permalink / raw) To: Matthew Wilcox; +Cc: linux-scsi Matthew Wilcox wrote: > On Thu, May 11, 2006 at 01:49:26PM -0500, Mike Christie wrote: >>> 2) Unless it's serialised (in which case it still takes two hours to >>> boot), you lose drive numbering. >> Drive numbering refers to the bus, target numbering right? > > No, I mean sda, sdb, sdc, etc. > Ah ok, I see what you mean. ^ permalink raw reply [flat|nested] 41+ messages in thread
* [PATCH] Asynchronous scsi scanning 2006-05-11 14:33 [RFC] Asynchronous scsi scanning Matthew Wilcox 2006-05-11 18:15 ` Mike Christie @ 2006-05-18 17:22 ` Matthew Wilcox 2006-05-29 3:19 ` Asynchronous scsi scanning, version 9 Matthew Wilcox 1 sibling, 1 reply; 41+ messages in thread From: Matthew Wilcox @ 2006-05-18 17:22 UTC (permalink / raw) To: linux-scsi On Thu, May 11, 2006 at 08:33:52AM -0600, Matthew Wilcox wrote: > A customer has a machine with 162 scsi hosts, and just scanning the scsi > busses takes over an hour. Here's what I've come up with to reduce that. > For drivers which call scsi_scan_host(), no changes are necessary. > The fibrechannel and SAS drivers are going to take a bit more work, > but I thought I'd send out the core first. I'm not entirely happy about > how the threads rendezvous; it'd be nice to not have to use a completion. I guess nobody looked at this patch to find my bugs ;-) Here's an updated patch which fixes a few problems: - scsi_complete_async_scans() now hooks onto the end of the list and waits for completion, rather than the rather hacky 'sleep for one second until the list is empty'. - scsi_prep_aync_scan() now takes a reference to the host, which is released by scsi_finish_async_scan(). That prevents the host from going away while we're scanning. I don't think this is a danger with normal scsi hosts, but I couldn't prove USB couldn't do it ... and better safe than sorry. - Handle some error conditions like kmalloc() and scsi_host_get() failing. - Keep the list lock around the wake-up-the-next-one logic to prevent a double-completion. I'm going to put this patch into the parisc-linux tree to get it some more testers. It'd be nice if we could get this into 2.6.18 ... --- cut --- Scanning SCSI busses takes an inordinately long time at boot. Attempt to ameliorate the situation by scanning all scsi busses in parallel. This effects a dramatic improvement in boot time, even on machines with a single scsi bus, as the bus scan can complete while other initialisation is occurring. On one large configuration I have access to, it cuts boot time from 276.45 seconds down to 149.59 seconds; over two minutes saved. Great care is taken not to perturb scsi device naming. Signed-off-by: Matthew Wilcox <mattheww@wil.cx> Index: ./drivers/scsi/scsi_scan.c =================================================================== RCS file: /var/cvs/linux-2.6/drivers/scsi/scsi_scan.c,v retrieving revision 1.38 diff -u -p -r1.38 scsi_scan.c --- ./drivers/scsi/scsi_scan.c 19 Apr 2006 04:55:59 -0000 1.38 +++ ./drivers/scsi/scsi_scan.c 18 May 2006 16:34:50 -0000 @@ -30,7 +30,9 @@ #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <asm/semaphore.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/spinlock.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -109,6 +111,45 @@ MODULE_PARM_DESC(inq_timeout, "Timeout (in seconds) waiting for devices to answer INQUIRY." " Default is 5. Some non-compliant devices need more."); +static spinlock_t async_scan_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(scanning_hosts); + +struct async_scan_data { + struct list_head list; + struct Scsi_Host *shost; + struct completion prev_finished; +}; + +static int scsi_complete_async_scans(void) +{ + struct async_scan_data *data; + if (list_empty(&scanning_hosts)) + return 0; + + data = kmalloc(sizeof(*data), GFP_KERNEL); + data->shost = NULL; + init_completion(&data->prev_finished); + + spin_lock(&async_scan_lock); + if (list_empty(&scanning_hosts)) + goto done; + list_add_tail(&data->list, &scanning_hosts); + spin_unlock(&async_scan_lock); + + printk(KERN_INFO "scsi: waiting for bus probes to complete ...\n"); + wait_for_completion(&data->prev_finished); + + spin_lock(&async_scan_lock); + list_del(&data->list); + spin_unlock(&async_scan_lock); + + done: + kfree(data); + return 0; +} +late_initcall(scsi_complete_async_scans); + + /** * scsi_unlock_floptical - unlock device via a special MODE SENSE command * @sdev: scsi device to send command to @@ -629,7 +670,8 @@ static int scsi_probe_lun(struct scsi_de * SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ -static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags) +static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, + int *bflags, int async) { /* * XXX do not save the inquiry, since it can change underneath us, @@ -802,7 +844,7 @@ static int scsi_add_lun(struct scsi_devi * register it and tell the rest of the kernel * about it. */ - if (scsi_sysfs_add_sdev(sdev) != 0) + if (!async && scsi_sysfs_add_sdev(sdev) != 0) return SCSI_SCAN_NO_RESPONSE; return SCSI_SCAN_LUN_PRESENT; @@ -914,7 +956,7 @@ static int scsi_probe_and_add_lun(struct goto out_free_result; } - res = scsi_add_lun(sdev, result, &bflags); + res = scsi_add_lun(sdev, result, &bflags, shost->async_scan); if (res == SCSI_SCAN_LUN_PRESENT) { if (bflags & BLIST_KEY) { sdev->lockable = 0; @@ -1427,6 +1469,9 @@ void scsi_scan_target(struct device *par { struct Scsi_Host *shost = dev_to_shost(parent); + if (!shost->async_scan) + scsi_complete_async_scans(); + mutex_lock(&shost->scan_mutex); if (scsi_host_scan_allowed(shost)) __scsi_scan_target(parent, channel, id, lun, rescan); @@ -1492,14 +1537,127 @@ int scsi_scan_host_selected(struct Scsi_ return 0; } +/* The error handling here is pretty yucky. Do we want an + * shost_for_each_device_safe() iterator? + */ +static void scsi_sysfs_add_devices(struct Scsi_Host *shost) +{ + struct scsi_device *sdev; + shost_for_each_device(sdev, shost) { + int err; + next: + err = scsi_sysfs_add_sdev(sdev); + if (err) { + struct scsi_device *tmp = sdev; + sdev = __scsi_iterate_devices(shost, sdev); + scsi_destroy_sdev(tmp); + goto next; + } + } +} + +/** + * scsi_prep_async_scan - prepare for an async scan + * @shost: the host which will be scanned + * Returns: a cookie to be passed to scsi_finish_async_scan() + * + * If your driver does not use scsi_scan_host(), you can call this function + * to tell the midlayer you're about to commence an asynchronous scan. + * This reserves your device's position in the scanning list and ensures + * that other asynchronous scans started after yours won't affect the + * disc ordering. + */ +struct async_scan_data * scsi_prep_async_scan(struct Scsi_Host *shost) +{ + struct async_scan_data *data; + + if (shost->async_scan) { + printk("%s called twice for host %d", __FUNCTION__, + shost->host_no); + dump_stack(); + return NULL; + } + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto err; + data->shost = scsi_host_get(shost); + if (!data->shost) + goto err; + init_completion(&data->prev_finished); + + spin_lock(&async_scan_lock); + shost->async_scan = 1; + if (list_empty(&scanning_hosts)) + complete(&data->prev_finished); + list_add_tail(&data->list, &scanning_hosts); + spin_unlock(&async_scan_lock); + + return data; + + err: + kfree(data); + return NULL; +} +EXPORT_SYMBOL_GPL(scsi_prep_async_scan); + +/** + * scsi_finish_async_scan - asynchronous scan has finished + * @data: cookie returned from earlier call to scsi_prep_async_scan() + * + * Once your driver has found all the devices currently present, call + * this function. It will announce all the devices it has found to + * the rest of the system. + */ +void scsi_finish_async_scan(struct async_scan_data *data) +{ + struct Scsi_Host *shost = data->shost; + if (!shost->async_scan) { + printk("%s called twice for host %d", __FUNCTION__, + shost->host_no); + dump_stack(); + return; + } + + wait_for_completion(&data->prev_finished); + + scsi_sysfs_add_devices(shost); + + spin_lock(&async_scan_lock); + shost->async_scan = 0; + list_del(&data->list); + if (!list_empty(&scanning_hosts)) { + struct async_scan_data *next = list_entry(scanning_hosts.next, + struct async_scan_data, list); + complete(&next->prev_finished); + } + spin_unlock(&async_scan_lock); + + scsi_host_put(shost); + kfree(data); +} +EXPORT_SYMBOL_GPL(scsi_finish_async_scan); + +static int do_scan_async(void *_data) +{ + struct async_scan_data *data = _data; + scsi_scan_host_selected(data->shost, SCAN_WILD_CARD, SCAN_WILD_CARD, + SCAN_WILD_CARD, 0); + + scsi_finish_async_scan(data); + return 0; +} + /** * scsi_scan_host - scan the given adapter * @shost: adapter to scan **/ void scsi_scan_host(struct Scsi_Host *shost) { - scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD, - SCAN_WILD_CARD, 0); + struct async_scan_data *data = scsi_prep_async_scan(shost); + if (!data) + return; + kthread_run(do_scan_async, data, "scsi_scan_%d", shost->host_no); } EXPORT_SYMBOL(scsi_scan_host); Index: ./include/scsi/scsi_device.h =================================================================== RCS file: /var/cvs/linux-2.6/include/scsi/scsi_device.h,v retrieving revision 1.27 diff -u -p -r1.27 scsi_device.h --- ./include/scsi/scsi_device.h 3 Apr 2006 13:46:08 -0000 1.27 +++ ./include/scsi/scsi_device.h 18 May 2006 16:34:50 -0000 @@ -298,6 +298,10 @@ extern int scsi_execute_async(struct scs void (*done)(void *, char *, int, int), gfp_t gfp); +struct async_scan_data; +struct async_scan_data * scsi_prep_async_scan(struct Scsi_Host *shost); +void scsi_finish_async_scan(struct async_scan_data *data); + static inline void scsi_device_reprobe(struct scsi_device *sdev) { device_reprobe(&sdev->sdev_gendev); Index: ./include/scsi/scsi_host.h =================================================================== RCS file: /var/cvs/linux-2.6/include/scsi/scsi_host.h,v retrieving revision 1.26 diff -u -p -r1.26 scsi_host.h --- ./include/scsi/scsi_host.h 19 Apr 2006 04:56:20 -0000 1.26 +++ ./include/scsi/scsi_host.h 18 May 2006 16:34:50 -0000 @@ -541,6 +541,9 @@ struct Scsi_Host { */ unsigned ordered_tag:1; + /* Are we currently performing an async scan? */ + unsigned async_scan:1; + /* * Optional work queue to be utilized by the transport */ ^ permalink raw reply [flat|nested] 41+ messages in thread
* Asynchronous scsi scanning, version 9 2006-05-18 17:22 ` [PATCH] " Matthew Wilcox @ 2006-05-29 3:19 ` Matthew Wilcox 2006-05-29 8:38 ` Stefan Richter 0 siblings, 1 reply; 41+ messages in thread From: Matthew Wilcox @ 2006-05-29 3:19 UTC (permalink / raw) To: linux-scsi This version of the patch incorporates the feedback from Storage Summit and fixes a few more little bugs. James, could you grep for 'yucky' and advise me whether you prefer the way I've done it, or the way I suggest we might want to do it? --- cut --- Add the scsi_mod.scan kernel parameter to determine how scsi busses are scanned. "sync" is the current behaviour. "none" punts scanning scsi busses to userspace. "async" is the new default. With async, we spawn a kernel thread to scan each scsi host. As each thread completes its scan, it waits for all its predecessors to complete, then adds the devices it found, preserving current device naming. The late_initcall ensures that all built-in drivers finish their scans before init is started. Boot-time benefits from this include cutting 2 minutes from the prior 4 minute boot time of an hp rx8620 with 16 CPUs and 18 scsi hosts, and several seconds from smaller machines. If scsi were moved earlier in the initialisation sequence, more gains could be realised as more devices could be initialised in parallel with the scsi devices. Signed-off-by: Matthew Wilcox <matthew@wil.cx> Index: ./drivers/scsi/scsi_scan.c =================================================================== RCS file: /var/cvs/linux-2.6/drivers/scsi/scsi_scan.c,v retrieving revision 1.40.2.1 retrieving revision 1.41 diff -u -p -r1.40.2.1 -r1.41 --- ./drivers/scsi/scsi_scan.c 27 May 2006 15:58:03 -0000 1.40.2.1 +++ ./drivers/scsi/scsi_scan.c 29 May 2006 02:51:18 -0000 1.41 @@ -30,7 +30,9 @@ #include <linux/moduleparam.h> #include <linux/init.h> #include <linux/blkdev.h> -#include <asm/semaphore.h> +#include <linux/delay.h> +#include <linux/kthread.h> +#include <linux/spinlock.h> #include <scsi/scsi.h> #include <scsi/scsi_device.h> @@ -88,6 +90,11 @@ module_param_named(max_luns, max_scsi_lu MODULE_PARM_DESC(max_luns, "last scsi LUN (should be between 1 and 2^32-1)"); +static char scsi_scan_type[] = "async"; + +module_param_string(scan, scsi_scan_type, sizeof(scsi_scan_type), S_IRUGO); +MODULE_PARM_DESC(scan, "sync, async or none"); + /* * max_scsi_report_luns: the maximum number of LUNS that will be * returned from the REPORT LUNS command. 8 times this value must @@ -109,6 +116,50 @@ MODULE_PARM_DESC(inq_timeout, "Timeout (in seconds) waiting for devices to answer INQUIRY." " Default is 5. Some non-compliant devices need more."); +static spinlock_t async_scan_lock = SPIN_LOCK_UNLOCKED; +static LIST_HEAD(scanning_hosts); + +struct async_scan_data { + struct list_head list; + struct Scsi_Host *shost; + struct completion prev_finished; +}; + +static int scsi_complete_async_scans(void) +{ + struct async_scan_data *data; + + do { + if (list_empty(&scanning_hosts)) + return 0; + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + msleep(1); + } while (!data); + + data->shost = NULL; + init_completion(&data->prev_finished); + + spin_lock(&async_scan_lock); + if (list_empty(&scanning_hosts)) + goto done; + list_add_tail(&data->list, &scanning_hosts); + spin_unlock(&async_scan_lock); + + printk(KERN_INFO "scsi: waiting for bus probes to complete ...\n"); + wait_for_completion(&data->prev_finished); + + spin_lock(&async_scan_lock); + list_del(&data->list); + done: + spin_unlock(&async_scan_lock); + + kfree(data); + return 0; +} +late_initcall(scsi_complete_async_scans); + + /** * scsi_unlock_floptical - unlock device via a special MODE SENSE command * @sdev: scsi device to send command to @@ -629,7 +680,8 @@ static int scsi_probe_lun(struct scsi_de * SCSI_SCAN_NO_RESPONSE: could not allocate or setup a scsi_device * SCSI_SCAN_LUN_PRESENT: a new scsi_device was allocated and initialized **/ -static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, int *bflags) +static int scsi_add_lun(struct scsi_device *sdev, char *inq_result, + int *bflags, int async) { /* * XXX do not save the inquiry, since it can change underneath us, @@ -802,7 +854,7 @@ static int scsi_add_lun(struct scsi_devi * register it and tell the rest of the kernel * about it. */ - if (scsi_sysfs_add_sdev(sdev) != 0) + if (!async && scsi_sysfs_add_sdev(sdev) != 0) return SCSI_SCAN_NO_RESPONSE; return SCSI_SCAN_LUN_PRESENT; @@ -914,7 +966,7 @@ static int scsi_probe_and_add_lun(struct goto out_free_result; } - res = scsi_add_lun(sdev, result, &bflags); + res = scsi_add_lun(sdev, result, &bflags, shost->async_scan); if (res == SCSI_SCAN_LUN_PRESENT) { if (bflags & BLIST_KEY) { sdev->lockable = 0; @@ -1427,6 +1479,12 @@ void scsi_scan_target(struct device *par { struct Scsi_Host *shost = dev_to_shost(parent); + if (strncmp(scsi_scan_type, "none", 4) == 0) + return; + + if (!shost->async_scan) + scsi_complete_async_scans(); + mutex_lock(&shost->scan_mutex); if (scsi_host_scan_allowed(shost)) __scsi_scan_target(parent, channel, id, lun, rescan); @@ -1472,6 +1530,9 @@ int scsi_scan_host_selected(struct Scsi_ "%s: <%u:%u:%u>\n", __FUNCTION__, channel, id, lun)); + if (!shost->async_scan) + scsi_complete_async_scans(); + if (((channel != SCAN_WILD_CARD) && (channel > shost->max_channel)) || ((id != SCAN_WILD_CARD) && (id > shost->max_id)) || ((lun != SCAN_WILD_CARD) && (lun > shost->max_lun))) @@ -1492,14 +1553,143 @@ int scsi_scan_host_selected(struct Scsi_ return 0; } +/* The error handling here is pretty yucky. Do we want an + * shost_for_each_device_safe() iterator? + */ +static void scsi_sysfs_add_devices(struct Scsi_Host *shost) +{ + struct scsi_device *sdev; + shost_for_each_device(sdev, shost) { + int err; + next: + err = scsi_sysfs_add_sdev(sdev); + if (err) { + struct scsi_device *tmp = sdev; + sdev = __scsi_iterate_devices(shost, sdev); + scsi_destroy_sdev(tmp); + goto next; + } + } +} + +/** + * scsi_prep_async_scan - prepare for an async scan + * @shost: the host which will be scanned + * Returns: a cookie to be passed to scsi_finish_async_scan() + * + * If your driver does not use scsi_scan_host(), you can call this function + * to tell the midlayer you're about to commence an asynchronous scan. + * This reserves your device's position in the scanning list and ensures + * that other asynchronous scans started after yours won't affect the + * disc ordering. + */ +struct async_scan_data * scsi_prep_async_scan(struct Scsi_Host *shost) +{ + struct async_scan_data *data; + + if (strncmp(scsi_scan_type, "sync", 4) == 0) + return NULL; + + if (shost->async_scan) { + printk("%s called twice for host %d", __FUNCTION__, + shost->host_no); + dump_stack(); + return NULL; + } + + data = kmalloc(sizeof(*data), GFP_KERNEL); + if (!data) + goto err; + data->shost = scsi_host_get(shost); + if (!data->shost) + goto err; + init_completion(&data->prev_finished); + + spin_lock(&async_scan_lock); + shost->async_scan = 1; + if (list_empty(&scanning_hosts)) + complete(&data->prev_finished); + list_add_tail(&data->list, &scanning_hosts); + spin_unlock(&async_scan_lock); + + return data; + + err: + kfree(data); + return NULL; +} +EXPORT_SYMBOL_GPL(scsi_prep_async_scan); + +/** + * scsi_finish_async_scan - asynchronous scan has finished + * @data: cookie returned from earlier call to scsi_prep_async_scan() + * + * Once your driver has found all the devices currently present, call + * this function. It will announce all the devices it has found to + * the rest of the system. + */ +void scsi_finish_async_scan(struct async_scan_data *data) +{ + struct Scsi_Host *shost; + + if (!data) + return; + + shost = data->shost; + if (!shost->async_scan) { + printk("%s called twice for host %d", __FUNCTION__, + shost->host_no); + dump_stack(); + return; + } + + wait_for_completion(&data->prev_finished); + + scsi_sysfs_add_devices(shost); + + spin_lock(&async_scan_lock); + shost->async_scan = 0; + list_del(&data->list); + if (!list_empty(&scanning_hosts)) { + struct async_scan_data *next = list_entry(scanning_hosts.next, + struct async_scan_data, list); + complete(&next->prev_finished); + } + spin_unlock(&async_scan_lock); + + scsi_host_put(shost); + kfree(data); +} +EXPORT_SYMBOL_GPL(scsi_finish_async_scan); + +static int do_scan_async(void *_data) +{ + struct async_scan_data *data = _data; + scsi_scan_host_selected(data->shost, SCAN_WILD_CARD, SCAN_WILD_CARD, + SCAN_WILD_CARD, 0); + + scsi_finish_async_scan(data); + return 0; +} + /** * scsi_scan_host - scan the given adapter * @shost: adapter to scan **/ void scsi_scan_host(struct Scsi_Host *shost) { - scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD, - SCAN_WILD_CARD, 0); + struct async_scan_data *data; + + if (strncmp(scsi_scan_type, "none", 4) == 0) + return; + + data = scsi_prep_async_scan(shost); + if (!data) { + scsi_scan_host_selected(shost, SCAN_WILD_CARD, SCAN_WILD_CARD, + SCAN_WILD_CARD, 0); + return; + } + kthread_run(do_scan_async, data, "scsi_scan_%d", shost->host_no); } EXPORT_SYMBOL(scsi_scan_host); Index: ./include/scsi/scsi_device.h =================================================================== RCS file: /var/cvs/linux-2.6/include/scsi/scsi_device.h,v retrieving revision 1.28.2.1 retrieving revision 1.28 diff -u -p -r1.28.2.1 -r1.28 --- ./include/scsi/scsi_device.h 27 May 2006 15:58:17 -0000 1.28.2.1 +++ ./include/scsi/scsi_device.h 19 May 2006 02:43:19 -0000 1.28 @@ -298,6 +298,10 @@ extern int scsi_execute_async(struct scs void (*done)(void *, char *, int, int), gfp_t gfp); +struct async_scan_data; +struct async_scan_data * scsi_prep_async_scan(struct Scsi_Host *shost); +void scsi_finish_async_scan(struct async_scan_data *data); + static inline void scsi_device_reprobe(struct scsi_device *sdev) { device_reprobe(&sdev->sdev_gendev); Index: ./include/scsi/scsi_host.h =================================================================== RCS file: /var/cvs/linux-2.6/include/scsi/scsi_host.h,v retrieving revision 1.27.2.1 retrieving revision 1.27 diff -u -p -r1.27.2.1 -r1.27 --- ./include/scsi/scsi_host.h 27 May 2006 15:58:17 -0000 1.27.2.1 +++ ./include/scsi/scsi_host.h 19 May 2006 02:43:19 -0000 1.27 @@ -541,6 +541,9 @@ struct Scsi_Host { */ unsigned ordered_tag:1; + /* Are we currently performing an async scan? */ + unsigned async_scan:1; + /* * Optional work queue to be utilized by the transport */ ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-05-29 3:19 ` Asynchronous scsi scanning, version 9 Matthew Wilcox @ 2006-05-29 8:38 ` Stefan Richter 2006-05-29 13:05 ` Matthew Wilcox 0 siblings, 1 reply; 41+ messages in thread From: Stefan Richter @ 2006-05-29 8:38 UTC (permalink / raw) To: Matthew Wilcox; +Cc: linux-scsi Matthew Wilcox wrote: > Add the scsi_mod.scan kernel parameter to determine how scsi busses > are scanned. "sync" is the current behaviour. "none" punts scanning > scsi busses to userspace. "async" is the new default. This parameter is only relevant with LLDDs which use scsi_scan_host, right? Furthermore, "sync|async" basically means "serialized|parallelized across host adapters". Does it also mean "finishing before|after driver initialization"? (With LLDDs which use scsi_scan_host.) ... > --- ./include/scsi/scsi_host.h 27 May 2006 15:58:17 -0000 1.27.2.1 > +++ ./include/scsi/scsi_host.h 19 May 2006 02:43:19 -0000 1.27 > @@ -541,6 +541,9 @@ struct Scsi_Host { > */ > unsigned ordered_tag:1; > > + /* Are we currently performing an async scan? */ Perhaps add "private to scsi core" to the comment. > + unsigned async_scan:1; This flag is written under protection of async_scan_lock but read without lock protection and without being an atomic variable. Is this safe? I suppose it is as long as scan methods (by do_scan_async kthread, by another thread associated to the LLDD or transport, by userspace) are not mixed. -- Stefan Richter -=====-=-==- -=-= ===-- http://arcgraph.de/sr/ ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-05-29 8:38 ` Stefan Richter @ 2006-05-29 13:05 ` Matthew Wilcox 2006-05-29 13:11 ` Arjan van de Ven 2006-05-31 23:21 ` Patrick Mansfield 0 siblings, 2 replies; 41+ messages in thread From: Matthew Wilcox @ 2006-05-29 13:05 UTC (permalink / raw) To: Stefan Richter; +Cc: linux-scsi On Mon, May 29, 2006 at 10:38:13AM +0200, Stefan Richter wrote: > Matthew Wilcox wrote: > > Add the scsi_mod.scan kernel parameter to determine how scsi busses > > are scanned. "sync" is the current behaviour. "none" punts scanning > > scsi busses to userspace. "async" is the new default. > > This parameter is only relevant with LLDDs which use scsi_scan_host, right? Not entirely. If you set it to "none", scsi_scan_target() also returns without doing anything. If you use the scsi_prep_async_scan() and scsi_finish_async_scan() API, you can also use this infrastructure to make scanning sbp2 synchronised with other scsi hosts. Then the setting of sync vs async also triggers old vs new behaviour. > Furthermore, "sync|async" basically means "serialized|parallelized > across host adapters". Does it also mean "finishing before|after driver > initialization"? (With LLDDs which use scsi_scan_host.) That's what scsi_complete_async_scans() is for. If you have a built-in module, it will wait for the async scans to finish before we get as far as trying to mount root. It does change observable behaviour in that sys_module_init() will return before scans are complete. However, I believe most distros userspace copes with this these days. For example, Debian has: # wait for the udevd childs to finish log_action_begin_msg "Waiting for /dev to be fully populated" while [ -d /dev/.udev/queue/ ]; do sleep 1 udevd_timeout=$(($udevd_timeout - 1)) [...] Since the scsi scan is going to be finding new devices the entire time, the queue directory is going to not empty. > ... > > --- ./include/scsi/scsi_host.h 27 May 2006 15:58:17 -0000 1.27.2.1 > > +++ ./include/scsi/scsi_host.h 19 May 2006 02:43:19 -0000 1.27 > > @@ -541,6 +541,9 @@ struct Scsi_Host { > > */ > > unsigned ordered_tag:1; > > > > + /* Are we currently performing an async scan? */ > > Perhaps add "private to scsi core" to the comment. Sure, good idea. > > + unsigned async_scan:1; > > This flag is written under protection of async_scan_lock but read > without lock protection and without being an atomic variable. Is this > safe? I suppose it is as long as scan methods (by do_scan_async kthread, > by another thread associated to the LLDD or transport, by userspace) are > not mixed. Hmmm. It looks to me like there's some really narrow windows where it's unsafe. For example, drivers call scsi_add_host() which makes it visible to userspace. Then userspace could ask to scan something before the driver calls scsi_scan_host(), get past the check for async_scan, then the other thread sets async_scan, so when the first thread calls scsi_add_lun(), it then doesn't add the lun to sysfs. Actually, this one's safe because it'll get added by the second thread when it completes. I've looked some more and there are other races, but I can't see one which results in a double-add or a failed add. Can anyone see one? ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-05-29 13:05 ` Matthew Wilcox @ 2006-05-29 13:11 ` Arjan van de Ven 2006-05-29 13:19 ` Matthew Wilcox 2006-05-31 23:21 ` Patrick Mansfield 1 sibling, 1 reply; 41+ messages in thread From: Arjan van de Ven @ 2006-05-29 13:11 UTC (permalink / raw) To: Matthew Wilcox; +Cc: linux-scsi, Stefan Richter On Mon, 2006-05-29 at 07:05 -0600, Matthew Wilcox wrote: > On Mon, May 29, 2006 at 10:38:13AM +0200, Stefan Richter wrote: > > Matthew Wilcox wrote: > > > Add the scsi_mod.scan kernel parameter to determine how scsi busses > > > are scanned. "sync" is the current behaviour. "none" punts scanning > > > scsi busses to userspace. "async" is the new default. > > > > This parameter is only relevant with LLDDs which use scsi_scan_host, right? > > Not entirely. If you set it to "none", scsi_scan_target() also returns > without doing anything. If you use the scsi_prep_async_scan() and > scsi_finish_async_scan() API, you can also use this infrastructure to > make scanning sbp2 synchronised with other scsi hosts. Then the setting > of sync vs async also triggers old vs new behaviour. > > > Furthermore, "sync|async" basically means "serialized|parallelized > > across host adapters". Does it also mean "finishing before|after driver > > initialization"? (With LLDDs which use scsi_scan_host.) > > That's what scsi_complete_async_scans() is for. If you have a built-in > module, it will wait for the async scans to finish before we get as far > as trying to mount root. that sounds the wrong place, at least, I would expect the wait is needed before starting the initramfs, since that is where some distros mount their root (mount-by-label and I suspect many other cases will do it from there, such as iscsi rootfs etc) ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-05-29 13:11 ` Arjan van de Ven @ 2006-05-29 13:19 ` Matthew Wilcox 0 siblings, 0 replies; 41+ messages in thread From: Matthew Wilcox @ 2006-05-29 13:19 UTC (permalink / raw) To: Arjan van de Ven; +Cc: linux-scsi, Stefan Richter On Mon, May 29, 2006 at 03:11:07PM +0200, Arjan van de Ven wrote: > > That's what scsi_complete_async_scans() is for. If you have a built-in > > module, it will wait for the async scans to finish before we get as far > > as trying to mount root. > > that sounds the wrong place, at least, I would expect the wait is needed > before starting the initramfs, since that is where some distros mount > their root (mount-by-label and I suspect many other cases will do it > from there, such as iscsi rootfs etc) It's a late_initcall(), so it happens before initramfs too. ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-05-29 13:05 ` Matthew Wilcox 2006-05-29 13:11 ` Arjan van de Ven @ 2006-05-31 23:21 ` Patrick Mansfield 2006-06-01 12:22 ` Kay Sievers 2006-06-01 13:14 ` Alexander E. Patrakov 1 sibling, 2 replies; 41+ messages in thread From: Patrick Mansfield @ 2006-05-31 23:21 UTC (permalink / raw) To: Matthew Wilcox, linux-hotplug-devel; +Cc: Stefan Richter, linux-scsi [adding hotplug-devel ... maybe Marco or Kay can comment] On Mon, May 29, 2006 at 07:05:15AM -0600, Matthew Wilcox wrote: > On Mon, May 29, 2006 at 10:38:13AM +0200, Stefan Richter wrote: > > Matthew Wilcox wrote: > > > Add the scsi_mod.scan kernel parameter to determine how scsi busses > > > are scanned. "sync" is the current behaviour. "none" punts scanning > > > scsi busses to userspace. "async" is the new default. > > > > This parameter is only relevant with LLDDs which use scsi_scan_host, right? > > Not entirely. If you set it to "none", scsi_scan_target() also returns > without doing anything. If you use the scsi_prep_async_scan() and > scsi_finish_async_scan() API, you can also use this infrastructure to > make scanning sbp2 synchronised with other scsi hosts. Then the setting > of sync vs async also triggers old vs new behaviour. > > > Furthermore, "sync|async" basically means "serialized|parallelized > > across host adapters". Does it also mean "finishing before|after driver > > initialization"? (With LLDDs which use scsi_scan_host.) > > That's what scsi_complete_async_scans() is for. If you have a built-in > module, it will wait for the async scans to finish before we get as far > as trying to mount root. It does change observable behaviour in that > sys_module_init() will return before scans are complete. However, I > believe most distros userspace copes with this these days. For example, > Debian has: > > # wait for the udevd childs to finish > log_action_begin_msg "Waiting for /dev to be fully populated" > while [ -d /dev/.udev/queue/ ]; do > sleep 1 > udevd_timeout=$(($udevd_timeout - 1)) > [...] Not sure where that is, but AFAIR that is to process the cold plug case, where udev starts up, the hotplug/netlink events are replayed, and we don't want to continue until all those events have been processed. SLES 10 has similar code, but a sleep of 0.1 (see their /etc/init.d/boot.udev, and I think /sbin/mkinitrd). > Since the scsi scan is going to be finding new devices the entire time, > the queue directory is going to not empty. It won't always be finding new devices, there could be glitches like a timeout, or some read (partitition check) that happens to take more than a second, and the udev queue becomes empty even though the scsi /sd scan is still in progress. You really want some udev rule that mounts root or such and then the boot continues from there ... rather than waiting for an unrelated sets of events, and then trying to mount root unconditionally (and possibly failing). I thought Hannes or someone had posted an example udev rule or such for this. Maybe it is even in SLES 10? Same for applications - you want them to start after a dev (or set of devs) shows up, though if we wait for the root dev it is even less likely that an app's dev will be unavailable. Of course if you aren't using udev in your init{rd|ramfs}, udev rules and such can't fix the problem :-( -- Patrick Mansfield ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-05-31 23:21 ` Patrick Mansfield @ 2006-06-01 12:22 ` Kay Sievers 2006-10-26 19:53 ` maximilian attems 2006-06-01 13:14 ` Alexander E. Patrakov 1 sibling, 1 reply; 41+ messages in thread From: Kay Sievers @ 2006-06-01 12:22 UTC (permalink / raw) To: Patrick Mansfield Cc: Matthew Wilcox, linux-hotplug-devel, Stefan Richter, linux-scsi On Wed, 2006-05-31 at 16:21 -0700, Patrick Mansfield wrote: > [adding hotplug-devel ... maybe Marco or Kay can comment] > > On Mon, May 29, 2006 at 07:05:15AM -0600, Matthew Wilcox wrote: > > On Mon, May 29, 2006 at 10:38:13AM +0200, Stefan Richter wrote: > > > Matthew Wilcox wrote: > > > > Add the scsi_mod.scan kernel parameter to determine how scsi busses > > > > are scanned. "sync" is the current behaviour. "none" punts scanning > > > > scsi busses to userspace. "async" is the new default. > > > > > > This parameter is only relevant with LLDDs which use scsi_scan_host, right? > > > > Not entirely. If you set it to "none", scsi_scan_target() also returns > > without doing anything. If you use the scsi_prep_async_scan() and > > scsi_finish_async_scan() API, you can also use this infrastructure to > > make scanning sbp2 synchronised with other scsi hosts. Then the setting > > of sync vs async also triggers old vs new behaviour. > > > > > Furthermore, "sync|async" basically means "serialized|parallelized > > > across host adapters". Does it also mean "finishing before|after driver > > > initialization"? (With LLDDs which use scsi_scan_host.) > > > > That's what scsi_complete_async_scans() is for. If you have a built-in > > module, it will wait for the async scans to finish before we get as far > > as trying to mount root. It does change observable behaviour in that > > sys_module_init() will return before scans are complete. However, I > > believe most distros userspace copes with this these days. For example, > > Debian has: > > > > # wait for the udevd childs to finish > > log_action_begin_msg "Waiting for /dev to be fully populated" > > while [ -d /dev/.udev/queue/ ]; do > > sleep 1 > > udevd_timeout=$(($udevd_timeout - 1)) > > [...] That has replaced by a binary called "udevsettle" which waits for events to finish, by comparing the current kernel event sequence number exported in sysfs with the latest handled event by udev. > Not sure where that is, but AFAIR that is to process the cold plug case, > where udev starts up, the hotplug/netlink events are replayed, and we > don't want to continue until all those events have been processed. > > SLES 10 has similar code, but a sleep of 0.1 (see their /etc/init.d/boot.udev, > and I think /sbin/mkinitrd). It uses only udevsettle now, also the partitioner and similar needs this to wait for the partiton table rescan to finish, before continuing using the new devices. > > Since the scsi scan is going to be finding new devices the entire time, > > the queue directory is going to not empty. Watching only the queue is not enough, cause there are only received events exported, but not events still in the kernel netlink queue. Therefore you need to compare the current kernel seqnum like udevsettle is doing it. > It won't always be finding new devices, there could be glitches like a > timeout, or some read (partitition check) that happens to take more than a > second, and the udev queue becomes empty even though the scsi /sd scan is > still in progress. Right. For the settle time of usb-storage we watch for the kernel tread to go away. :) > You really want some udev rule that mounts root or such and then the boot > continues from there ... rather than waiting for an unrelated sets of > events, and then trying to mount root unconditionally (and possibly > failing). I thought Hannes or someone had posted an example udev rule or > such for this. Maybe it is even in SLES 10? It does not only wait for the queue to become empty. Initramfs creates dynamic udev rules based on the kernel commandline and waits until the device appears. The real root (localfs) waits for all needed devices mentioned in /etc/fstab before continuing. > Same for applications - you want them to start after a dev (or set of > devs) shows up, though if we wait for the root dev it is even less likely > that an app's dev will be unavailable. > > Of course if you aren't using udev in your init{rd|ramfs}, udev rules > and such can't fix the problem :-( Kay ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-01 12:22 ` Kay Sievers @ 2006-10-26 19:53 ` maximilian attems 0 siblings, 0 replies; 41+ messages in thread From: maximilian attems @ 2006-10-26 19:53 UTC (permalink / raw) To: Kay Sievers Cc: Patrick Mansfield, Matthew Wilcox, linux-hotplug-devel, Stefan Richter, linux-scsi, Greg Kroah-Hartman On Thu, 01 Jun 2006, Kay Sievers wrote: > On Wed, 2006-05-31 at 16:21 -0700, Patrick Mansfield wrote: > > [adding hotplug-devel ... maybe Marco or Kay can comment] > > > > On Mon, May 29, 2006 at 07:05:15AM -0600, Matthew Wilcox wrote: > > > On Mon, May 29, 2006 at 10:38:13AM +0200, Stefan Richter wrote: <snipp unrelated> > > > > > > That's what scsi_complete_async_scans() is for. If you have a built-in > > > module, it will wait for the async scans to finish before we get as far > > > as trying to mount root. It does change observable behaviour in that > > > sys_module_init() will return before scans are complete. However, I > > > believe most distros userspace copes with this these days. For example, > > > Debian has: > > > > > > # wait for the udevd childs to finish > > > log_action_begin_msg "Waiting for /dev to be fully populated" > > > while [ -d /dev/.udev/queue/ ]; do > > > sleep 1 > > > udevd_timeout=$(($udevd_timeout - 1)) > > > [...] > > That has replaced by a binary called "udevsettle" which waits for events > to finish, by comparing the current kernel event sequence number > exported in sysfs with the latest handled event by udev. usb-storage is still giving troubles in that area. in the case of usb-storage udevsettle exists much too early. the /sys uevent_seqnum is the same as the udev worked on, while dmesg is saying: usb-storage: waiting for device to settle before scanning it would be really helpful if udevsettle would have an uevent to wait on. [adding gregkh to cc] > > second, and the udev queue becomes empty even though the scsi /sd scan is > > still in progress. > > Right. For the settle time of usb-storage we watch for the kernel tread > to go away. :) bug reports don't comfirm that statement. nor do i see any code for it in udevsettle.c. -- maks ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-05-31 23:21 ` Patrick Mansfield 2006-06-01 12:22 ` Kay Sievers @ 2006-06-01 13:14 ` Alexander E. Patrakov 2006-06-01 13:21 ` maximilian attems 2006-06-01 13:23 ` Matthew Wilcox 1 sibling, 2 replies; 41+ messages in thread From: Alexander E. Patrakov @ 2006-06-01 13:14 UTC (permalink / raw) To: Patrick Mansfield Cc: Matthew Wilcox, linux-hotplug-devel, Stefan Richter, linux-scsi Patrick Mansfield wrote: >> That's what scsi_complete_async_scans() is for. If you have a built-in >> module, it will wait for the async scans to finish before we get as far >> as trying to mount root. It does change observable behaviour in that >> sys_module_init() will return before scans are complete. What are the expected interactions of this code with early userspace aka "kinit" that comes with (soon to be merged) klibc? Does this scsi_complete_async_scans() function get called at all if such "default" initramfs with kinit is used? The problem is that if it doesn't, kinit will not wait. -- Alexander E. Patrakov ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-01 13:14 ` Alexander E. Patrakov @ 2006-06-01 13:21 ` maximilian attems 2006-06-01 13:23 ` Matthew Wilcox 1 sibling, 0 replies; 41+ messages in thread From: maximilian attems @ 2006-06-01 13:21 UTC (permalink / raw) To: Alexander E. Patrakov Cc: Patrick Mansfield, Matthew Wilcox, linux-hotplug-devel, Stefan Richter, linux-scsi On Thu, Jun 01, 2006 at 07:14:05PM +0600, Alexander E. Patrakov wrote: > Patrick Mansfield wrote: > >>That's what scsi_complete_async_scans() is for. If you have a built-in > >>module, it will wait for the async scans to finish before we get as far > >>as trying to mount root. It does change observable behaviour in that > >>sys_module_init() will return before scans are complete. > > What are the expected interactions of this code with early userspace aka > "kinit" that comes with (soon to be merged) klibc? Does this > scsi_complete_async_scans() function get called at all if such "default" > initramfs with kinit is used? > > The problem is that if it doesn't, kinit will not wait. unless you build an hardcoded initramfs for an specific box, you want anyway an coldplugging utility on board. udev can be compiled with klibc, put both inside your initramfs. -- maks ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-01 13:14 ` Alexander E. Patrakov 2006-06-01 13:21 ` maximilian attems @ 2006-06-01 13:23 ` Matthew Wilcox 2006-06-01 13:26 ` Alexander E. Patrakov ` (2 more replies) 1 sibling, 3 replies; 41+ messages in thread From: Matthew Wilcox @ 2006-06-01 13:23 UTC (permalink / raw) To: Alexander E. Patrakov Cc: Patrick Mansfield, linux-hotplug-devel, Stefan Richter, linux-scsi On Thu, Jun 01, 2006 at 07:14:05PM +0600, Alexander E. Patrakov wrote: > Patrick Mansfield wrote: > >>That's what scsi_complete_async_scans() is for. If you have a built-in > >>module, it will wait for the async scans to finish before we get as far > >>as trying to mount root. It does change observable behaviour in that > >>sys_module_init() will return before scans are complete. > > What are the expected interactions of this code with early userspace aka > "kinit" that comes with (soon to be merged) klibc? Does this > scsi_complete_async_scans() function get called at all if such "default" > initramfs with kinit is used? It gets called as a late_initcall -- before initramfs gets started. The problem comes when you load your scsi driver as a module. There's a potential solution to this. If we add a hook to sys_module_init such that it'll call a specified function before returning, we can make it wait until all scans are done. This way, userspace never sees the asynchronous scanning behaviour. But if you have devices of different types, you won't get the overlapping scans. ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-01 13:23 ` Matthew Wilcox @ 2006-06-01 13:26 ` Alexander E. Patrakov 2006-06-01 14:00 ` Arjan van de Ven 2006-06-25 21:15 ` James Bottomley 2 siblings, 0 replies; 41+ messages in thread From: Alexander E. Patrakov @ 2006-06-01 13:26 UTC (permalink / raw) To: Matthew Wilcox Cc: Patrick Mansfield, linux-hotplug-devel, Stefan Richter, linux-scsi Matthew Wilcox wrote: > On Thu, Jun 01, 2006 at 07:14:05PM +0600, Alexander E. Patrakov wrote: >> Patrick Mansfield wrote: >>>> That's what scsi_complete_async_scans() is for. If you have a built-in >>>> module, it will wait for the async scans to finish before we get as far >>>> as trying to mount root. It does change observable behaviour in that >>>> sys_module_init() will return before scans are complete. >> What are the expected interactions of this code with early userspace aka >> "kinit" that comes with (soon to be merged) klibc? Does this >> scsi_complete_async_scans() function get called at all if such "default" >> initramfs with kinit is used? > > It gets called as a late_initcall -- before initramfs gets started. Many thanks for clarifying the situation. -- Alexander E. Patrakov ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-01 13:23 ` Matthew Wilcox 2006-06-01 13:26 ` Alexander E. Patrakov @ 2006-06-01 14:00 ` Arjan van de Ven 2006-06-25 21:15 ` James Bottomley 2 siblings, 0 replies; 41+ messages in thread From: Arjan van de Ven @ 2006-06-01 14:00 UTC (permalink / raw) To: Matthew Wilcox Cc: linux-scsi, Stefan Richter, linux-hotplug-devel, Patrick Mansfield, Alexander E. Patrakov On Thu, 2006-06-01 at 07:23 -0600, Matthew Wilcox wrote: > On Thu, Jun 01, 2006 at 07:14:05PM +0600, Alexander E. Patrakov wrote: > > Patrick Mansfield wrote: > > >>That's what scsi_complete_async_scans() is for. If you have a built-in > > >>module, it will wait for the async scans to finish before we get as far > > >>as trying to mount root. It does change observable behaviour in that > > >>sys_module_init() will return before scans are complete. > > > > What are the expected interactions of this code with early userspace aka > > "kinit" that comes with (soon to be merged) klibc? Does this > > scsi_complete_async_scans() function get called at all if such "default" > > initramfs with kinit is used? > > It gets called as a late_initcall -- before initramfs gets started. The > problem comes when you load your scsi driver as a module. > > There's a potential solution to this. If we add a hook to sys_module_init > such that it'll call a specified function before returning, we can make > it wait until all scans are done. This way, userspace never sees the > asynchronous scanning behaviour. But if you have devices of different > types, you won't get the overlapping scans. alternative is a syscall/ioctl/sysfs thing that userland can trigger to cause this wait to happen... would be nice I suppose as a general option..... ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-01 13:23 ` Matthew Wilcox 2006-06-01 13:26 ` Alexander E. Patrakov 2006-06-01 14:00 ` Arjan van de Ven @ 2006-06-25 21:15 ` James Bottomley 2006-06-25 22:46 ` Matthew Wilcox 2006-06-26 20:58 ` Linas Vepstas 2 siblings, 2 replies; 41+ messages in thread From: James Bottomley @ 2006-06-25 21:15 UTC (permalink / raw) To: Matthew Wilcox Cc: linux-hotplug-devel, Stefan Richter, Alexander E. Patrakov, linux-scsi, Patrick Mansfield On Thu, 2006-06-01 at 07:23 -0600, Matthew Wilcox wrote: > > What are the expected interactions of this code with early userspace aka > > "kinit" that comes with (soon to be merged) klibc? Does this > > scsi_complete_async_scans() function get called at all if such "default" > > initramfs with kinit is used? > > It gets called as a late_initcall -- before initramfs gets started. The > problem comes when you load your scsi driver as a module. Actually, the first problem is that SCSI doesn't compile as a module at all: late_initcalls are defined to be module_init calls for modules. Your adding this to scsi_scan.c now gives us two module_init calls, which won't build. I fixed this by putting an #ifdef MODULE around the late_initcall. > There's a potential solution to this. If we add a hook to sys_module_init > such that it'll call a specified function before returning, we can make > it wait until all scans are done. This way, userspace never sees the > asynchronous scanning behaviour. But if you have devices of different > types, you won't get the overlapping scans. How about this solution which works for me with debian on a huge 6 bus scsi machine: it introduces a new module scsi_wait_scan.ko whose sole job is to wait for the scans to complete in its init function. The initrd/initramfs sequence now becomes: insert all other modules modprobe scsi_wait_scan James --- Index: linux-2.6/drivers/scsi/Makefile =================================================================== --- linux-2.6.orig/drivers/scsi/Makefile 2006-06-24 13:37:50.000000000 -0500 +++ linux-2.6/drivers/scsi/Makefile 2006-06-24 13:38:08.000000000 -0500 @@ -22,6 +22,10 @@ obj-$(CONFIG_SCSI) += scsi_mod.o +ifeq ("$(CONFIG_SCSI)", "m") + obj-m += scsi_wait_scan.o +endif + obj-$(CONFIG_RAID_ATTRS) += raid_class.o # --- NOTE ORDERING HERE --- Index: linux-2.6/drivers/scsi/scsi_priv.h =================================================================== --- linux-2.6.orig/drivers/scsi/scsi_priv.h 2006-06-24 13:37:50.000000000 -0500 +++ linux-2.6/drivers/scsi/scsi_priv.h 2006-06-24 13:38:08.000000000 -0500 @@ -49,6 +49,9 @@ { }; #endif +/* scsi_scan.c */ +int scsi_complete_async_scans(void); + /* scsi_devinfo.c */ extern int scsi_get_device_flags(struct scsi_device *sdev, const unsigned char *vendor, Index: linux-2.6/drivers/scsi/scsi_scan.c =================================================================== --- linux-2.6.orig/drivers/scsi/scsi_scan.c 2006-06-24 13:37:50.000000000 -0500 +++ linux-2.6/drivers/scsi/scsi_scan.c 2006-06-24 13:38:08.000000000 -0500 @@ -125,7 +125,7 @@ struct completion prev_finished; }; -static int scsi_complete_async_scans(void) +int scsi_complete_async_scans(void) { struct async_scan_data *data; @@ -157,8 +157,10 @@ kfree(data); return 0; } +EXPORT_SYMBOL_GPL(scsi_complete_async_scans); +#ifndef MODULE late_initcall(scsi_complete_async_scans); - +#endif /** * scsi_unlock_floptical - unlock device via a special MODE SENSE command Index: linux-2.6/drivers/scsi/scsi_wait_scan.c =================================================================== --- /dev/null 1970-01-01 00:00:00.000000000 +0000 +++ linux-2.6/drivers/scsi/scsi_wait_scan.c 2006-06-24 13:38:45.000000000 -0500 @@ -0,0 +1,31 @@ +/* + * scsi_wait_scan.c + * + * Copyright (C) 2006 James Bottomley <James.Bottomley@SteelEye.com> + * + * This is a simple module to wait until all the async scans are + * complete. The idea is to use it in initrd/initramfs scripts. You + * modprobe it after all the modprobes of the root SCSI drivers and it + * will wait until they have all finished scanning their busses before + * allowing the boot to proceed + */ + +#include <linux/module.h> +#include "scsi_priv.h" + +static int __init wait_scan_init(void) +{ + scsi_complete_async_scans(); + return 0; +} + +static void __exit wait_scan_exit(void) +{ +} + +MODULE_DESCRIPTION("SCSI wait for scans"); +MODULE_AUTHOR("James Bottomley"); +MODULE_LICENSE("GPL"); + +module_init(wait_scan_init); +module_exit(wait_scan_exit); Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-25 21:15 ` James Bottomley @ 2006-06-25 22:46 ` Matthew Wilcox 2006-06-26 8:24 ` Arjan van de Ven 2006-06-26 20:58 ` Linas Vepstas 1 sibling, 1 reply; 41+ messages in thread From: Matthew Wilcox @ 2006-06-25 22:46 UTC (permalink / raw) To: James Bottomley Cc: linux-hotplug-devel, Stefan Richter, Alexander E. Patrakov, linux-scsi, Patrick Mansfield On Sun, Jun 25, 2006 at 04:15:26PM -0500, James Bottomley wrote: > Actually, the first problem is that SCSI doesn't compile as a module at > all: late_initcalls are defined to be module_init calls for modules. > Your adding this to scsi_scan.c now gives us two module_init calls, > which won't build. I fixed this by putting an #ifdef MODULE around the > late_initcall. I think that's a good idea (well, I would, I suggested it to you ;-) > > There's a potential solution to this. If we add a hook to sys_module_init > > such that it'll call a specified function before returning, we can make > > it wait until all scans are done. This way, userspace never sees the > > asynchronous scanning behaviour. But if you have devices of different > > types, you won't get the overlapping scans. > > How about this solution which works for me with debian on a huge 6 bus > scsi machine: it introduces a new module scsi_wait_scan.ko whose sole > job is to wait for the scans to complete in its init function. The > initrd/initramfs sequence now becomes: > > insert all other modules > modprobe scsi_wait_scan I think that's a great idea. I wonder about making it always fail initialisation, since then it can be loaded multiple times without being unloaded. I think Arjan was suggesting some proc or sysfs file that would cause scsi_complete_async_scans() to be called, but I couldn't figure out where a good place to put such a file would be. > @@ -22,6 +22,10 @@ > > obj-$(CONFIG_SCSI) += scsi_mod.o > > +ifeq ("$(CONFIG_SCSI)", "m") > + obj-m += scsi_wait_scan.o > +endif We seem to have a bit of an allergy to conditionals in Makefiles these days; how about: tmp-$(CONFIG_SCSI) := scsi_wait_scan.o obj-m += $(tmp-m) > > -static int scsi_complete_async_scans(void) > +int scsi_complete_async_scans(void) > { > struct async_scan_data *data; > > @@ -157,8 +157,10 @@ > kfree(data); > return 0; > } Do you think it's worth putting in something like: /* Only exported for the benefit of scsi_wait_scan */ And maybe ... #ifdef MODULE /* Only exported for the benefit of scsi_wait_scan */ EXPORT_SYMBOL_GPL(scsi_complete_async_scans); #else late_initcall(scsi_complete_async_scans); #endif I really don't want to see driver authors calling it -- if they are, something's gone pretty horribly wrong. Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-25 22:46 ` Matthew Wilcox @ 2006-06-26 8:24 ` Arjan van de Ven 2006-06-26 12:40 ` Matthew Wilcox 0 siblings, 1 reply; 41+ messages in thread From: Arjan van de Ven @ 2006-06-26 8:24 UTC (permalink / raw) To: Matthew Wilcox Cc: James Bottomley, linux-hotplug-devel, linux-scsi, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov > > How about this solution which works for me with debian on a huge 6 bus > > scsi machine: it introduces a new module scsi_wait_scan.ko whose sole > > job is to wait for the scans to complete in its init function. The > > initrd/initramfs sequence now becomes: > > > > insert all other modules > > modprobe scsi_wait_scan > > I think that's a great idea. I wonder about making it always fail > initialisation, since then it can be loaded multiple times without being > unloaded. I think Arjan was suggesting some proc or sysfs file that > would cause scsi_complete_async_scans() to be called, but I couldn't > figure out where a good place to put such a file would be. Hi, just to expand on this: The reason I am suggesting this is to allow the initrd to have a way to wait for device scans before (re)trying to mount the root filesystem. One of the interesting challenges today for the initrd with USB is that you don't know when the devices are visible; now I know this won't fix USB, but it at least introduces a proper method for ensuring that for scsi; I suppose we should make it a generic thing with a notifier chain so that all subsystems that want to can get a callback and finalize their initialization... I can imagine fiber channel cards wanting to use this to wait for LIP etc... One question is if this should get a timeout parameter or if that should be left up to the devices... (and I think the initrd needs to try to find the rootfs at least once without waiting, or there should be 2 levels of expensiveness to the wait, so that it'll first try the asynchronous way, but that it can do the more expensive wait rather than causing an outright panic() as happens today when the rootfs cannot be found..) Greetings, Arjan van de Ven Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 8:24 ` Arjan van de Ven @ 2006-06-26 12:40 ` Matthew Wilcox 2006-06-26 12:59 ` Arjan van de Ven 2006-06-26 14:44 ` Matthew Dharm 0 siblings, 2 replies; 41+ messages in thread From: Matthew Wilcox @ 2006-06-26 12:40 UTC (permalink / raw) To: Arjan van de Ven Cc: linux-scsi, Stefan Richter, linux-hotplug-devel, Patrick Mansfield, Alexander E. Patrakov, James Bottomley On Mon, Jun 26, 2006 at 10:24:45AM +0200, Arjan van de Ven wrote: > just to expand on this: The reason I am suggesting this is to allow the > initrd to have a way to wait for device scans before (re)trying to mount > the root filesystem. One of the interesting challenges today for the > initrd with USB is that you don't know when the devices are visible; now > I know this won't fix USB, but it at least introduces a proper method > for ensuring that for scsi; I suppose we should make it a generic thing > with a notifier chain so that all subsystems that want to can get a > callback and finalize their initialization... I can imagine fiber > channel cards wanting to use this to wait for LIP etc... Fortunately, USB is SCSI, so we can actually use this code to solve that problem too. I wasn't thinking of USB when I wrote the code (because I didn't know there was a problem). But really, it's very generic stuff; there's a list_head, a pointer to a Scsi_Host and a completion. I'm not 100% sure what the problem is with USB. If it's that we may not have discovered all the USB devices currently plugged in, then I think we need to change the way USB works to use one Scsi_Host for all USB storage devices, and then make each device either its own target or its own channel (probably the former; the latter is less well-tested code). If it's simply the chunk of code beginning with: /* Wait for the timeout to expire or for a disconnect */ that's causing the problem, then this is easily fixable in the current scheme: Index: drivers/usb/storage/usb.c =================================================================== RCS file: /var/cvs/linux-2.6/drivers/usb/storage/usb.c,v retrieving revision 1.22 diff -u -p -r1.22 usb.c --- drivers/usb/storage/usb.c 3 Apr 2006 13:45:11 -0000 1.22 +++ drivers/usb/storage/usb.c 26 Jun 2006 12:34:45 -0000 @@ -849,10 +849,13 @@ static void release_everything(struct us static int usb_stor_scan_thread(void * __us) { struct us_data *us = (struct us_data *)__us; + struct async_scan_data *data; printk(KERN_DEBUG "usb-storage: device found at %d\n", us->pusb_dev->devnum); + data = scsi_prep_async_scan(us_to_host(us)); + /* Wait for the timeout to expire or for a disconnect */ if (delay_use > 0) { printk(KERN_DEBUG "usb-storage: waiting for device " @@ -875,12 +878,14 @@ retry: us->max_lun = usb_stor_Bulk_max_lun(us); mutex_unlock(&us->dev_mutex); } - scsi_scan_host(us_to_host(us)); + scsi_scan_target(&us_to_host(us)->shost_gendev, 0, 0, + SCAN_WILD_CARD, 0); printk(KERN_DEBUG "usb-storage: device scan complete\n"); /* Should we unbind if no devices were detected? */ } + scsi_finish_async_scan(data); scsi_host_put(us_to_host(us)); complete_and_exit(&threads_gone, 0); } By the way, if USB intends to stick to its one-host-per-device scheme, it would do well to set host->max_id to 1, rather than failing targets in queuecommand. > One question is if this should get a timeout parameter or if that should > be left up to the devices... No timeout. It's up to the scanners to say they're done. > (and I think the initrd needs to try to find the rootfs at least once > without waiting, or there should be 2 levels of expensiveness to the > wait, so that it'll first try the asynchronous way, but that it can do > the more expensive wait rather than causing an outright panic() as > happens today when the rootfs cannot be found..) It's not just root, it's swap and all the other bits of fstab too. No point in being able to reliably mount root if the sysadmin has to manually intervene to mount /home on every boot. Or worse, occasionally intervene ... All the waiting is done in parallel anyway, so you're really trying to squeeze the last 0.0001% out of it; better to wait and be safe. ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 12:40 ` Matthew Wilcox @ 2006-06-26 12:59 ` Arjan van de Ven 2006-06-26 16:03 ` Greg KH 2006-06-26 14:44 ` Matthew Dharm 1 sibling, 1 reply; 41+ messages in thread From: Arjan van de Ven @ 2006-06-26 12:59 UTC (permalink / raw) To: Matthew Wilcox Cc: James Bottomley, linux-hotplug-devel, linux-scsi, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov > Fortunately, USB is SCSI, so we can actually use this code to solve that > problem too. I wasn't thinking of USB when I wrote the code (because I > didn't know there was a problem). But really, it's very generic stuff; > there's a list_head, a pointer to a Scsi_Host and a completion. > > I'm not 100% sure what the problem is with USB. I seem to remember that the problem with USB was that you can't even know how many things are still outstanding; devices are free to appear on the bus at any time after bus power on, without specified timeout... If the kernel can't know what's out there, it can't wait until it has a complete picture either, just by virtue of not being able to know when that is achieved ;-) But I could be wrong entirely; USB storage is not something I delve into daily... Greetings, Arjan van de Ven Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 12:59 ` Arjan van de Ven @ 2006-06-26 16:03 ` Greg KH 0 siblings, 0 replies; 41+ messages in thread From: Greg KH @ 2006-06-26 16:03 UTC (permalink / raw) To: Arjan van de Ven Cc: Matthew Wilcox, James Bottomley, linux-hotplug-devel, linux-scsi, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov On Mon, Jun 26, 2006 at 02:59:11PM +0200, Arjan van de Ven wrote: > > > Fortunately, USB is SCSI, so we can actually use this code to solve that > > problem too. I wasn't thinking of USB when I wrote the code (because I > > didn't know there was a problem). But really, it's very generic stuff; > > there's a list_head, a pointer to a Scsi_Host and a completion. > > > > I'm not 100% sure what the problem is with USB. > > I seem to remember that the problem with USB was that you can't even > know how many things are still outstanding; devices are free to appear > on the bus at any time after bus power on, without specified timeout... > If the kernel can't know what's out there, it can't wait until it has a > complete picture either, just by virtue of not being able to know when > that is achieved ;-) This is true. thanks, greg k-h ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 12:40 ` Matthew Wilcox 2006-06-26 12:59 ` Arjan van de Ven @ 2006-06-26 14:44 ` Matthew Dharm 2006-06-26 15:18 ` Matthew Wilcox 2006-06-26 18:55 ` [SPAM] " Doug Ledford 1 sibling, 2 replies; 41+ messages in thread From: Matthew Dharm @ 2006-06-26 14:44 UTC (permalink / raw) To: Matthew Wilcox Cc: James Bottomley, linux-hotplug-devel, linux-scsi, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov, Arjan van de Ven [-- Attachment #1.1: Type: text/plain, Size: 1611 bytes --] On Mon, Jun 26, 2006 at 06:40:01AM -0600, Matthew Wilcox wrote: > I'm not 100% sure what the problem is with USB. If it's that we may not > have discovered all the USB devices currently plugged in, then I think > we need to change the way USB works to use one Scsi_Host for all USB > storage devices, and then make each device either its own target or its > own channel (probably the former; the latter is less well-tested code). There are a couple of things which make this difficult for USB. First, some (many?) USB devices need to be left alone for several seconds after attachment in order to allow them to initialize to the point where they are usable. Second, depending on how many hubs are between the host and target, the time-to-discover the device is highly variable. Third, once discovered the device may still take a long time to be "ready". Think of this as a slow spin-up time. This is different from my first point in that the device can actually accept commands, but all commands will fail with some sort of not-ready type error. As for using one Scsi_Host... there are several usb-storage devices which attach to an entire SCSI bus (not just a single target), so can't make each device it's own target. Also, at the time this was all written it wasn't possible to dynamically remove (with any stability) individual targets or channels. Perhaps that has changed? Matt -- Matthew Dharm Home: mdharm-usb@one-eyed-alien.net Maintainer, Linux USB Mass Storage Driver You suck Stef. -- Greg User Friendly, 11/29/97 [-- Attachment #1.2: Type: application/pgp-signature, Size: 191 bytes --] [-- Attachment #2: Type: text/plain, Size: 300 bytes --] Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 [-- Attachment #3: Type: text/plain, Size: 226 bytes --] _______________________________________________ Linux-hotplug-devel mailing list http://linux-hotplug.sourceforge.net Linux-hotplug-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-hotplug-devel ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 14:44 ` Matthew Dharm @ 2006-06-26 15:18 ` Matthew Wilcox 2006-06-26 15:44 ` James Bottomley ` (2 more replies) 2006-06-26 18:55 ` [SPAM] " Doug Ledford 1 sibling, 3 replies; 41+ messages in thread From: Matthew Wilcox @ 2006-06-26 15:18 UTC (permalink / raw) To: Arjan van de Ven, linux-scsi, Stefan Richter, linux-hotplug-devel, Patrick Mansfield, Alexander E. Patrakov, James Bottomley On Mon, Jun 26, 2006 at 07:44:42AM -0700, Matthew Dharm wrote: > On Mon, Jun 26, 2006 at 06:40:01AM -0600, Matthew Wilcox wrote: > > I'm not 100% sure what the problem is with USB. If it's that we may not > > have discovered all the USB devices currently plugged in, then I think > > we need to change the way USB works to use one Scsi_Host for all USB > > storage devices, and then make each device either its own target or its > > own channel (probably the former; the latter is less well-tested code). > > There are a couple of things which make this difficult for USB. > > First, some (many?) USB devices need to be left alone for several seconds > after attachment in order to allow them to initialize to the point where > they are usable. That's OK. Once we know they're there, we can reserve their place and delay until they're ready to go. > Second, depending on how many hubs are between the host and target, the > time-to-discover the device is highly variable. Can, or does, USB keep track of hub discovery, and hence know whether or not it's completed USB discovery? > Third, once discovered the device may still take a long time to be "ready". > Think of this as a slow spin-up time. This is different from my first > point in that the device can actually accept commands, but all commands > will fail with some sort of not-ready type error. That's OK too. > As for using one Scsi_Host... there are several usb-storage devices which > attach to an entire SCSI bus (not just a single target), so can't make each > device it's own target. Oh. My fault for reading the comment rather than the code. /* reject if target != 0 or if LUN is higher than * the maximum known LUN */ else if (us->srb->device->id && !(us->flags & US_FL_SCM_MULT_TARG)) { Do these devices stick to occupying only target IDs from 0-7? If not, you may wish to increase ->max_id for those devices. I think it'd be worth exporting scsi_scan_channel() from the midlayer (and rearranging it to have __scsi_scan_channel() as was done with __scsi_scan_target) for USB's benefit. > Also, at the time this was all written it wasn't > possible to dynamically remove (with any stability) individual targets or > channels. Perhaps that has changed? It's definitely possible to remove individual targets dynamically now; Fibre Channel has sorted that out (and will complain loudly if it breaks). The scsi core doesn't really have a channel object; channel is just an integer that describes a path to a target. So I think there should be no problem in converting USB to have one host and many channels. Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 15:18 ` Matthew Wilcox @ 2006-06-26 15:44 ` James Bottomley 2006-06-26 16:02 ` Greg KH 2006-06-26 21:08 ` Matthew Dharm 2 siblings, 0 replies; 41+ messages in thread From: James Bottomley @ 2006-06-26 15:44 UTC (permalink / raw) To: Matthew Wilcox Cc: Arjan van de Ven, linux-scsi, Stefan Richter, linux-hotplug-devel, Patrick Mansfield, Alexander E. Patrakov On Mon, 2006-06-26 at 09:18 -0600, Matthew Wilcox wrote: > It's definitely possible to remove individual targets dynamically now; > Fibre Channel has sorted that out (and will complain loudly if it > breaks). > The scsi core doesn't really have a channel object; channel is just an > integer > that describes a path to a target. So I think there should be no > problem in > converting USB to have one host and many channels. Actually, I think usb should be exactly like FC ... in that should be an unscanned attachment, so if and when it gets done properly, it won't have any need for the async scanning updates anyway. James ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 15:18 ` Matthew Wilcox 2006-06-26 15:44 ` James Bottomley @ 2006-06-26 16:02 ` Greg KH 2006-06-26 21:08 ` Matthew Dharm 2 siblings, 0 replies; 41+ messages in thread From: Greg KH @ 2006-06-26 16:02 UTC (permalink / raw) To: Matthew Wilcox Cc: Arjan van de Ven, linux-scsi, Stefan Richter, linux-hotplug-devel, Patrick Mansfield, Alexander E. Patrakov, James Bottomley On Mon, Jun 26, 2006 at 09:18:28AM -0600, Matthew Wilcox wrote: > On Mon, Jun 26, 2006 at 07:44:42AM -0700, Matthew Dharm wrote: > > Second, depending on how many hubs are between the host and target, the > > time-to-discover the device is highly variable. > > Can, or does, USB keep track of hub discovery, and hence know whether > or not it's completed USB discovery? Nope, it never knows this. So that is why you can't have any type of "just wait until USB is done scanning all devices before continuing" behavior, because there is no such thing to trigger off of. sorry, greg k-h ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 15:18 ` Matthew Wilcox 2006-06-26 15:44 ` James Bottomley 2006-06-26 16:02 ` Greg KH @ 2006-06-26 21:08 ` Matthew Dharm 2006-06-26 22:15 ` Matthew Wilcox 2 siblings, 1 reply; 41+ messages in thread From: Matthew Dharm @ 2006-06-26 21:08 UTC (permalink / raw) To: Matthew Wilcox Cc: James Bottomley, linux-hotplug-devel, linux-scsi, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov, Arjan van de Ven [-- Attachment #1.1: Type: text/plain, Size: 1854 bytes --] On Mon, Jun 26, 2006 at 09:18:28AM -0600, Matthew Wilcox wrote: > On Mon, Jun 26, 2006 at 07:44:42AM -0700, Matthew Dharm wrote: > > As for using one Scsi_Host... there are several usb-storage devices which > > attach to an entire SCSI bus (not just a single target), so can't make each > > device it's own target. > > Oh. My fault for reading the comment rather than the code. > > /* reject if target != 0 or if LUN is higher than > * the maximum known LUN > */ > else if (us->srb->device->id && > !(us->flags & US_FL_SCM_MULT_TARG)) { > > Do these devices stick to occupying only target IDs from 0-7? If not, > you may wish to increase ->max_id for those devices. I think it'd be worth > exporting scsi_scan_channel() from the midlayer (and rearranging it to have > __scsi_scan_channel() as was done with __scsi_scan_target) for USB's benefit. These support target IDs to up 15. But I see the point of limiting ->max_id for devices which do not have the SCM_MULT_TARG flag set. > It's definitely possible to remove individual targets dynamically now; > Fibre Channel has sorted that out (and will complain loudly if it breaks). > The scsi core doesn't really have a channel object; channel is just an integer > that describes a path to a target. So I think there should be no problem in > converting USB to have one host and many channels. Interesting. What's the limit on the number of channels you can have? How do I set up multiple channels in code? Matt -- Matthew Dharm Home: mdharm-usb@one-eyed-alien.net Maintainer, Linux USB Mass Storage Driver What, are you one of those Microsoft-bashing Linux freaks? -- Customer to Greg User Friendly, 2/10/1999 [-- Attachment #1.2: Type: application/pgp-signature, Size: 191 bytes --] [-- Attachment #2: Type: text/plain, Size: 300 bytes --] Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 [-- Attachment #3: Type: text/plain, Size: 226 bytes --] _______________________________________________ Linux-hotplug-devel mailing list http://linux-hotplug.sourceforge.net Linux-hotplug-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-hotplug-devel ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 21:08 ` Matthew Dharm @ 2006-06-26 22:15 ` Matthew Wilcox 0 siblings, 0 replies; 41+ messages in thread From: Matthew Wilcox @ 2006-06-26 22:15 UTC (permalink / raw) To: Arjan van de Ven, linux-scsi, Stefan Richter, linux-hotplug-devel, Patrick Mansfield, Alexander E. Patrakov, James Bottomley On Mon, Jun 26, 2006 at 02:08:25PM -0700, Matthew Dharm wrote: > On Mon, Jun 26, 2006 at 09:18:28AM -0600, Matthew Wilcox wrote: > > Do these devices stick to occupying only target IDs from 0-7? If not, > > you may wish to increase ->max_id for those devices. I think it'd be worth > > exporting scsi_scan_channel() from the midlayer (and rearranging it to have > > __scsi_scan_channel() as was done with __scsi_scan_target) for USB's benefit. > > These support target IDs to up 15. But I see the point of limiting > ->max_id for devices which do not have the SCM_MULT_TARG flag set. Ever tried that? ;-) drivers/scsi/hosts.c sets max_id to 8 in scsi_host_alloc(). I don't see anything in USB setting max_id to 16. > > It's definitely possible to remove individual targets dynamically now; > > Fibre Channel has sorted that out (and will complain loudly if it breaks). > > The scsi core doesn't really have a channel object; channel is just an integer > > that describes a path to a target. So I think there should be no problem in > > converting USB to have one host and many channels. > > Interesting. > > What's the limit on the number of channels you can have? Just an unsigned int, I think. > How do I set up multiple channels in code? Increase shost->max_channel each time you want to create a new one. Then pass it to scsi_scan_target(). ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [SPAM] Re: Asynchronous scsi scanning, version 9 2006-06-26 14:44 ` Matthew Dharm 2006-06-26 15:18 ` Matthew Wilcox @ 2006-06-26 18:55 ` Doug Ledford 2006-06-26 21:04 ` Matthew Dharm 1 sibling, 1 reply; 41+ messages in thread From: Doug Ledford @ 2006-06-26 18:55 UTC (permalink / raw) To: Matthew Dharm Cc: James Bottomley, linux-hotplug-devel, linux-scsi, Matthew Wilcox, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov, Arjan van de Ven On Mon, 2006-06-26 at 07:44 -0700, Matthew Dharm wrote: > As for using one Scsi_Host... there are several usb-storage devices which > attach to an entire SCSI bus (not just a single target), so can't make each > device it's own target. This makes no sense to me at all. The USB devices attach to a USB bus, the whole notion of a SCSI bus for USB devices is as nothing more than an abstraction, and should be able to be arbitrarily changed in the USB stack any time you wish. In short, you aren't doing SCSI selections over a USB bus, so why should SCSI target IDs vs. host IDs matter? Aren't they just all mapped in the driver to the USB device anyway? -- Doug Ledford <dledford@xsintricity.com> Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [SPAM] Re: Asynchronous scsi scanning, version 9 2006-06-26 18:55 ` [SPAM] " Doug Ledford @ 2006-06-26 21:04 ` Matthew Dharm 2006-06-26 21:20 ` Doug Ledford 0 siblings, 1 reply; 41+ messages in thread From: Matthew Dharm @ 2006-06-26 21:04 UTC (permalink / raw) To: Doug Ledford Cc: James Bottomley, linux-hotplug-devel, linux-scsi, Matthew Wilcox, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov, Arjan van de Ven [-- Attachment #1.1: Type: text/plain, Size: 1353 bytes --] On Mon, Jun 26, 2006 at 02:55:50PM -0400, Doug Ledford wrote: > On Mon, 2006-06-26 at 07:44 -0700, Matthew Dharm wrote: > > > As for using one Scsi_Host... there are several usb-storage devices which > > attach to an entire SCSI bus (not just a single target), so can't make each > > device it's own target. > > This makes no sense to me at all. The USB devices attach to a USB bus, > the whole notion of a SCSI bus for USB devices is as nothing more than > an abstraction, and should be able to be arbitrarily changed in the USB > stack any time you wish. In short, you aren't doing SCSI selections > over a USB bus, so why should SCSI target IDs vs. host IDs matter? > Aren't they just all mapped in the driver to the USB device anyway? I guess my point is that there are some devices for which the concept of a 'bus' is -not- an abstraction. These devices have a USB connector at one end and a SCSI HD-68 connector on the other, and can be attached to 15 devices in a SCSI chain. Perhaps I misunderstand your objection? Matt -- Matthew Dharm Home: mdharm-usb@one-eyed-alien.net Maintainer, Linux USB Mass Storage Driver E: You run this ship with Windows?! YOU IDIOT! L: Give me a break, it came bundled with the computer! -- ESR and Lan Solaris User Friendly, 12/8/1998 [-- Attachment #1.2: Type: application/pgp-signature, Size: 191 bytes --] [-- Attachment #2: Type: text/plain, Size: 300 bytes --] Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 [-- Attachment #3: Type: text/plain, Size: 226 bytes --] _______________________________________________ Linux-hotplug-devel mailing list http://linux-hotplug.sourceforge.net Linux-hotplug-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/linux-hotplug-devel ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: [SPAM] Re: Asynchronous scsi scanning, version 9 2006-06-26 21:04 ` Matthew Dharm @ 2006-06-26 21:20 ` Doug Ledford 0 siblings, 0 replies; 41+ messages in thread From: Doug Ledford @ 2006-06-26 21:20 UTC (permalink / raw) To: Matthew Dharm Cc: James Bottomley, linux-hotplug-devel, linux-scsi, Matthew Wilcox, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov, Arjan van de Ven On Mon, 2006-06-26 at 14:04 -0700, Matthew Dharm wrote: > On Mon, Jun 26, 2006 at 02:55:50PM -0400, Doug Ledford wrote: > > On Mon, 2006-06-26 at 07:44 -0700, Matthew Dharm wrote: > > > > > As for using one Scsi_Host... there are several usb-storage devices which > > > attach to an entire SCSI bus (not just a single target), so can't make each > > > device it's own target. > > > > This makes no sense to me at all. The USB devices attach to a USB bus, > > the whole notion of a SCSI bus for USB devices is as nothing more than > > an abstraction, and should be able to be arbitrarily changed in the USB > > stack any time you wish. In short, you aren't doing SCSI selections > > over a USB bus, so why should SCSI target IDs vs. host IDs matter? > > Aren't they just all mapped in the driver to the USB device anyway? > > I guess my point is that there are some devices for which the concept of a > 'bus' is -not- an abstraction. These devices have a USB connector at one > end and a SCSI HD-68 connector on the other, and can be attached to 15 > devices in a SCSI chain. Yuk! OK, so such a nasty beast exists, but that just means realistically you should treat such a device as a scsi host instead of a scsi device. For all other normal target devices, the issue of scsi bus vs. scsi target shouldn't matter. > Perhaps I misunderstand your objection? > > Matt > -- Doug Ledford <dledford@xsintricity.com> Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-25 21:15 ` James Bottomley 2006-06-25 22:46 ` Matthew Wilcox @ 2006-06-26 20:58 ` Linas Vepstas 2006-06-26 21:14 ` James Bottomley 1 sibling, 1 reply; 41+ messages in thread From: Linas Vepstas @ 2006-06-26 20:58 UTC (permalink / raw) To: James Bottomley Cc: Matthew Wilcox, linux-hotplug-devel, Stefan Richter, Alexander E. Patrakov, linux-scsi, Patrick Mansfield On Sun, Jun 25, 2006 at 04:15:26PM -0500, James Bottomley wrote: > On Thu, 2006-06-01 at 07:23 -0600, Matthew Wilcox wrote: > > > There's a potential solution to this. If we add a hook to sys_module_init > > such that it'll call a specified function before returning, we can make > > it wait until all scans are done. This way, userspace never sees the > > asynchronous scanning behaviour. But if you have devices of different > > types, you won't get the overlapping scans. > > How about this solution which works for me with debian on a huge 6 bus > scsi machine: it introduces a new module scsi_wait_scan.ko whose sole > job is to wait for the scans to complete in its init function. The > initrd/initramfs sequence now becomes: > > insert all other modules > modprobe scsi_wait_scan I guess there's no way of figuring out where the root disk is, and waiting only for that? On large servers, there may be hundreds of attached disks. Anything which can parallelize and perform an async scan will help boot times; however, waiting for all disks to come on-line when only one is actually needed can cause trouble. In particular, some of the attached disks/scsi chains may be bad, in which case one has to wait for a tediously long device/bus/host reset sequence to complete, and off-line the broken disk. --linas ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 20:58 ` Linas Vepstas @ 2006-06-26 21:14 ` James Bottomley 2006-06-26 21:21 ` Linas Vepstas 2006-06-28 7:52 ` Hannes Reinecke 0 siblings, 2 replies; 41+ messages in thread From: James Bottomley @ 2006-06-26 21:14 UTC (permalink / raw) To: Linas Vepstas Cc: linux-hotplug-devel, linux-scsi, Matthew Wilcox, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov On Mon, 2006-06-26 at 15:58 -0500, Linas Vepstas wrote: > I guess there's no way of figuring out where the root disk is, and > waiting only for that? Well, sure, but you'd do that at user level: you know what the root disk is, so you just wait on the udev event announcing its arrival. James Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 21:14 ` James Bottomley @ 2006-06-26 21:21 ` Linas Vepstas 2006-06-26 21:41 ` James Bottomley 2006-06-28 7:52 ` Hannes Reinecke 1 sibling, 1 reply; 41+ messages in thread From: Linas Vepstas @ 2006-06-26 21:21 UTC (permalink / raw) To: James Bottomley Cc: linux-hotplug-devel, linux-scsi, Matthew Wilcox, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov On Mon, Jun 26, 2006 at 04:14:37PM -0500, James Bottomley wrote: > On Mon, 2006-06-26 at 15:58 -0500, Linas Vepstas wrote: > > I guess there's no way of figuring out where the root disk is, and > > waiting only for that? > > Well, sure, but you'd do that at user level: you know what the root > disk is, so you just wait on the udev event announcing its arrival. Its possible that I'm making a confused commont based on my mis-understanding a conversation I'm not following closely, but ... Didn't the last patch result in the boot process hanging until all scsi devices were discovered? I was concerned that this might take too long. But if there's no hang, then no problem. --linas Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 21:21 ` Linas Vepstas @ 2006-06-26 21:41 ` James Bottomley 0 siblings, 0 replies; 41+ messages in thread From: James Bottomley @ 2006-06-26 21:41 UTC (permalink / raw) To: Linas Vepstas Cc: linux-hotplug-devel, linux-scsi, Matthew Wilcox, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov On Mon, 2006-06-26 at 16:21 -0500, Linas Vepstas wrote: > Its possible that I'm making a confused commont based on my > mis-understanding a conversation I'm not following closely, but ... > > Didn't the last patch result in the boot process hanging until > all scsi devices were discovered? I was concerned that this might > take too long. But if there's no hang, then no problem. The last patch was a module distros could use to wait for all bus scans to complete. If you simply want the behaviour where you launch aync scans and wait for root to show up before proceeding, then you code that in udev. James Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-26 21:14 ` James Bottomley 2006-06-26 21:21 ` Linas Vepstas @ 2006-06-28 7:52 ` Hannes Reinecke 2006-06-28 16:03 ` James Bottomley 1 sibling, 1 reply; 41+ messages in thread From: Hannes Reinecke @ 2006-06-28 7:52 UTC (permalink / raw) To: James Bottomley Cc: Linas Vepstas, Matthew Wilcox, linux-hotplug-devel, Stefan Richter, Alexander E. Patrakov, linux-scsi, Patrick Mansfield James Bottomley wrote: > On Mon, 2006-06-26 at 15:58 -0500, Linas Vepstas wrote: >> I guess there's no way of figuring out where the root disk is, and >> waiting only for that? > > Well, sure, but you'd do that at user level: you know what the root > disk is, so you just wait on the udev event announcing its arrival. > Exactly. And it works well. The only point left addressing is that you never know how long you should be waiting. It would be nice if the transport class / SCSI ML could give some hint somewhere to the effect 'scanning in progress'. Otherwise you'll end up using arbitrary timeouts and you'll always find machines where this timeout doesn't work :-( Cheers, Hannes -- Dr. Hannes Reinecke hare@suse.de SuSE Linux Products GmbH S390 & zSeries Maxfeldstraße 5 +49 911 74053 688 90409 Nürnberg http://www.suse.de - To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html ^ permalink raw reply [flat|nested] 41+ messages in thread
* Re: Asynchronous scsi scanning, version 9 2006-06-28 7:52 ` Hannes Reinecke @ 2006-06-28 16:03 ` James Bottomley 0 siblings, 0 replies; 41+ messages in thread From: James Bottomley @ 2006-06-28 16:03 UTC (permalink / raw) To: Hannes Reinecke Cc: linux-hotplug-devel, linux-scsi, Matthew Wilcox, Linas Vepstas, Patrick Mansfield, Stefan Richter, Alexander E. Patrakov On Wed, 2006-06-28 at 09:52 +0200, Hannes Reinecke wrote: > The only point left addressing is that you never know how long you > should be waiting. It would be nice if the transport class / SCSI ML > could give some hint somewhere to the effect 'scanning in progress'. > Otherwise you'll end up using arbitrary timeouts and you'll always > find > machines where this timeout doesn't work :-( Well ... about 20s for a parallel bus is usual. All the other modern busses (FC, SAS etc.) are unscanned, so there's nothing to wait for at the mid-layer. Each does, however, have an internal discovery type process which it might be possible to expose via a new API. James Using Tomcat but need to do more? Need to support web services, security? Get stuff done quickly with pre-integrated technology to make your job easier Download IBM WebSphere Application Server v.1.0.1 based on Apache Geronimo http://sel.as-us.falkag.net/sel?cmd=lnk&kid=120709&bid=263057&dat=121642 ^ permalink raw reply [flat|nested] 41+ messages in thread
end of thread, other threads:[~2006-10-26 19:53 UTC | newest] Thread overview: 41+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2006-05-11 14:33 [RFC] Asynchronous scsi scanning Matthew Wilcox 2006-05-11 18:15 ` Mike Christie 2006-05-11 18:21 ` Matthew Wilcox 2006-05-11 18:49 ` Mike Christie 2006-05-11 18:56 ` Matthew Wilcox 2006-05-11 19:09 ` Mike Christie 2006-05-18 17:22 ` [PATCH] " Matthew Wilcox 2006-05-29 3:19 ` Asynchronous scsi scanning, version 9 Matthew Wilcox 2006-05-29 8:38 ` Stefan Richter 2006-05-29 13:05 ` Matthew Wilcox 2006-05-29 13:11 ` Arjan van de Ven 2006-05-29 13:19 ` Matthew Wilcox 2006-05-31 23:21 ` Patrick Mansfield 2006-06-01 12:22 ` Kay Sievers 2006-10-26 19:53 ` maximilian attems 2006-06-01 13:14 ` Alexander E. Patrakov 2006-06-01 13:21 ` maximilian attems 2006-06-01 13:23 ` Matthew Wilcox 2006-06-01 13:26 ` Alexander E. Patrakov 2006-06-01 14:00 ` Arjan van de Ven 2006-06-25 21:15 ` James Bottomley 2006-06-25 22:46 ` Matthew Wilcox 2006-06-26 8:24 ` Arjan van de Ven 2006-06-26 12:40 ` Matthew Wilcox 2006-06-26 12:59 ` Arjan van de Ven 2006-06-26 16:03 ` Greg KH 2006-06-26 14:44 ` Matthew Dharm 2006-06-26 15:18 ` Matthew Wilcox 2006-06-26 15:44 ` James Bottomley 2006-06-26 16:02 ` Greg KH 2006-06-26 21:08 ` Matthew Dharm 2006-06-26 22:15 ` Matthew Wilcox 2006-06-26 18:55 ` [SPAM] " Doug Ledford 2006-06-26 21:04 ` Matthew Dharm 2006-06-26 21:20 ` Doug Ledford 2006-06-26 20:58 ` Linas Vepstas 2006-06-26 21:14 ` James Bottomley 2006-06-26 21:21 ` Linas Vepstas 2006-06-26 21:41 ` James Bottomley 2006-06-28 7:52 ` Hannes Reinecke 2006-06-28 16:03 ` James Bottomley
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox; as well as URLs for NNTP newsgroup(s).