[patch] drivers: wait for threaded probes between initcall levels

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 18:41                 ` vmlinux.lds: consolidate initcall sections Andrew Morton
@ 2006-10-27 18:42                   ` Andrew Morton
  2006-10-27 18:47                     ` Stephen Hemminger
  2006-10-27 22:59                     ` Alan Cox
  0 siblings, 2 replies; 37+ messages in thread
From: Andrew Morton @ 2006-10-27 18:42 UTC (permalink / raw)
  To: Pavel Machek, Greg KH, Stephen Hemminger, Matthew Wilcox,
	Adrian Bunk, Linus Torvalds, Linux Kernel Mailing List, linux-pci

From: Andrew Morton <akpm@osdl.org>

The multithreaded-probing code has a problem: after one initcall level (eg,
core_initcall) has been processed, we will then start processing the next
level (postcore_initcall) while the kernel threads which are handling
core_initcall are still executing.  This breaks the guarantees which the
layered initcalls previously gave us.

IOW, we want to be multithreaded _within_ an initcall level, but not between
different levels.

Fix that up by causing the probing code to wait for all outstanding probes at
one level to complete before we start processing the next level.

Cc: Greg KH <greg@kroah.com>
Signed-off-by: Andrew Morton <akpm@osdl.org>
---

 drivers/base/dd.c                 |   30 ++++++++++++++++++++++++++++
 include/asm-generic/vmlinux.lds.h |    9 +++++++-
 include/linux/init.h              |   28 +++++++++++++++++---------
 3 files changed, 57 insertions(+), 10 deletions(-)

diff -puN drivers/base/dd.c~drivers-wait-for-threaded-probes-between-initcall-levels drivers/base/dd.c
--- a/drivers/base/dd.c~drivers-wait-for-threaded-probes-between-initcall-levels
+++ a/drivers/base/dd.c
@@ -18,6 +18,7 @@
 #include <linux/device.h>
 #include <linux/module.h>
 #include <linux/kthread.h>
+#include <linux/wait.h>
 
 #include "base.h"
 #include "power/power.h"
@@ -70,6 +71,8 @@ struct stupid_thread_structure {
 };
 
 static atomic_t probe_count = ATOMIC_INIT(0);
+static DECLARE_WAIT_QUEUE_HEAD(probe_waitqueue);
+
 static int really_probe(void *void_data)
 {
 	struct stupid_thread_structure *data = void_data;
@@ -121,6 +124,7 @@ probe_failed:
 done:
 	kfree(data);
 	atomic_dec(&probe_count);
+	wake_up(&probe_waitqueue);
 	return ret;
 }
 
@@ -337,6 +341,32 @@ void driver_detach(struct device_driver 
 	}
 }
 
+#ifdef CONFIG_PCI_MULTITHREAD_PROBE
+static int __init wait_for_probes(void)
+{
+	DEFINE_WAIT(wait);
+
+	printk(KERN_INFO "%s: waiting for %d threads\n", __FUNCTION__,
+			atomic_read(&probe_count));
+	if (!atomic_read(&probe_count))
+		return 0;
+	while (atomic_read(&probe_count)) {
+		prepare_to_wait(&probe_waitqueue, &wait, TASK_UNINTERRUPTIBLE);
+		if (atomic_read(&probe_count))
+			schedule();
+	}
+	finish_wait(&probe_waitqueue, &wait);
+	return 0;
+}
+
+core_initcall_sync(wait_for_probes);
+postcore_initcall_sync(wait_for_probes);
+arch_initcall_sync(wait_for_probes);
+subsys_initcall_sync(wait_for_probes);
+fs_initcall_sync(wait_for_probes);
+device_initcall_sync(wait_for_probes);
+late_initcall_sync(wait_for_probes);
+#endif
 
 EXPORT_SYMBOL_GPL(device_bind_driver);
 EXPORT_SYMBOL_GPL(device_release_driver);
diff -puN include/asm-generic/vmlinux.lds.h~drivers-wait-for-threaded-probes-between-initcall-levels include/asm-generic/vmlinux.lds.h
--- a/include/asm-generic/vmlinux.lds.h~drivers-wait-for-threaded-probes-between-initcall-levels
+++ a/include/asm-generic/vmlinux.lds.h
@@ -216,10 +216,17 @@
 
 #define INITCALLS							\
   	*(.initcall1.init)						\
+  	*(.initcall1s.init)						\
   	*(.initcall2.init)						\
+  	*(.initcall2s.init)						\
   	*(.initcall3.init)						\
+  	*(.initcall3s.init)						\
   	*(.initcall4.init)						\
+  	*(.initcall4s.init)						\
   	*(.initcall5.init)						\
+  	*(.initcall5s.init)						\
   	*(.initcall6.init)						\
-  	*(.initcall7.init)
+  	*(.initcall6s.init)						\
+  	*(.initcall7.init)						\
+  	*(.initcall7s.init)
 
diff -puN include/linux/init.h~drivers-wait-for-threaded-probes-between-initcall-levels include/linux/init.h
--- a/include/linux/init.h~drivers-wait-for-threaded-probes-between-initcall-levels
+++ a/include/linux/init.h
@@ -84,19 +84,29 @@ extern void setup_arch(char **);
  * by link order. 
  * For backwards compatibility, initcall() puts the call in 
  * the device init subsection.
+ *
+ * The `id' arg to __define_initcall() is needed so that multiple initcalls
+ * can point at the same handler without causing duplicate-symbol build errors.
  */
 
-#define __define_initcall(level,fn) \
-	static initcall_t __initcall_##fn __attribute_used__ \
+#define __define_initcall(level,fn,id) \
+	static initcall_t __initcall_##fn##id __attribute_used__ \
 	__attribute__((__section__(".initcall" level ".init"))) = fn
 
-#define core_initcall(fn)		__define_initcall("1",fn)
-#define postcore_initcall(fn)		__define_initcall("2",fn)
-#define arch_initcall(fn)		__define_initcall("3",fn)
-#define subsys_initcall(fn)		__define_initcall("4",fn)
-#define fs_initcall(fn)			__define_initcall("5",fn)
-#define device_initcall(fn)		__define_initcall("6",fn)
-#define late_initcall(fn)		__define_initcall("7",fn)
+#define core_initcall(fn)		__define_initcall("1",fn,1)
+#define core_initcall_sync(fn)		__define_initcall("1s",fn,1s)
+#define postcore_initcall(fn)		__define_initcall("2",fn,2)
+#define postcore_initcall_sync(fn)	__define_initcall("2s",fn,2s)
+#define arch_initcall(fn)		__define_initcall("3",fn,3)
+#define arch_initcall_sync(fn)		__define_initcall("3s",fn,3s)
+#define subsys_initcall(fn)		__define_initcall("4",fn,4)
+#define subsys_initcall_sync(fn)	__define_initcall("4s",fn,4s)
+#define fs_initcall(fn)			__define_initcall("5",fn,5)
+#define fs_initcall_sync(fn)		__define_initcall("5s",fn,5s)
+#define device_initcall(fn)		__define_initcall("6",fn,6)
+#define device_initcall_sync(fn)	__define_initcall("6s",fn,6s)
+#define late_initcall(fn)		__define_initcall("7",fn,7)
+#define late_initcall_sync(fn)		__define_initcall("7s",fn,7s)
 
 #define __initcall(fn) device_initcall(fn)
 
_


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 18:42                   ` [patch] drivers: wait for threaded probes between initcall levels Andrew Morton
@ 2006-10-27 18:47                     ` Stephen Hemminger
  2006-10-27 20:15                       ` Andrew Morton
  2006-10-27 22:59                     ` Alan Cox
  1 sibling, 1 reply; 37+ messages in thread
From: Stephen Hemminger @ 2006-10-27 18:47 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Pavel Machek, Greg KH, Matthew Wilcox, Adrian Bunk,
	Linus Torvalds, Linux Kernel Mailing List, linux-pci

On Fri, 27 Oct 2006 11:42:37 -0700
Andrew Morton <akpm@osdl.org> wrote:

> From: Andrew Morton <akpm@osdl.org>
> 
> The multithreaded-probing code has a problem: after one initcall level (eg,
> core_initcall) has been processed, we will then start processing the next
> level (postcore_initcall) while the kernel threads which are handling
> core_initcall are still executing.  This breaks the guarantees which the
> layered initcalls previously gave us.
> 
> IOW, we want to be multithreaded _within_ an initcall level, but not between
> different levels.
> 
> Fix that up by causing the probing code to wait for all outstanding probes at
> one level to complete before we start processing the next level.
> 
> Cc: Greg KH <greg@kroah.com>
> Signed-off-by: Andrew Morton <akpm@osdl.org>
> ---
> 

This looks like a good place to use a counting semaphore.

-- 
Stephen Hemminger <shemminger@osdl.org>

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 18:47                     ` Stephen Hemminger
@ 2006-10-27 20:15                       ` Andrew Morton
  2006-10-27 20:42                         ` Linus Torvalds
  0 siblings, 1 reply; 37+ messages in thread
From: Andrew Morton @ 2006-10-27 20:15 UTC (permalink / raw)
  To: Stephen Hemminger
  Cc: Pavel Machek, Greg KH, Matthew Wilcox, Adrian Bunk,
	Linus Torvalds, Linux Kernel Mailing List, linux-pci

On Fri, 27 Oct 2006 11:47:29 -0700
Stephen Hemminger <shemminger@osdl.org> wrote:

> On Fri, 27 Oct 2006 11:42:37 -0700
> Andrew Morton <akpm@osdl.org> wrote:
> 
> > From: Andrew Morton <akpm@osdl.org>
> > 
> > The multithreaded-probing code has a problem: after one initcall level (eg,
> > core_initcall) has been processed, we will then start processing the next
> > level (postcore_initcall) while the kernel threads which are handling
> > core_initcall are still executing.  This breaks the guarantees which the
> > layered initcalls previously gave us.
> > 
> > IOW, we want to be multithreaded _within_ an initcall level, but not between
> > different levels.
> > 
> > Fix that up by causing the probing code to wait for all outstanding probes at
> > one level to complete before we start processing the next level.
> > 
> > Cc: Greg KH <greg@kroah.com>
> > Signed-off-by: Andrew Morton <akpm@osdl.org>
> > ---
> > 
> 
> This looks like a good place to use a counting semaphore.
> 

I couldn't work out a way of doing that.  I guess one could a) count the
number of threads which are going to be started, b) start them all, c) do
an up() when each thread ends and d) handle errors somehow.


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 20:15                       ` Andrew Morton
@ 2006-10-27 20:42                         ` Linus Torvalds
  2006-10-27 20:48                           ` Linus Torvalds
  0 siblings, 1 reply; 37+ messages in thread
From: Linus Torvalds @ 2006-10-27 20:42 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Stephen Hemminger, Pavel Machek, Greg KH, Matthew Wilcox,
	Adrian Bunk, Linux Kernel Mailing List, linux-pci

On Fri, 27 Oct 2006, Andrew Morton wrote:
> 
> I couldn't work out a way of doing that.  I guess one could a) count the
> number of threads which are going to be started, b) start them all, c) do
> an up() when each thread ends and d) handle errors somehow.

No. First off, you want to _limit_ the maximum number of parallelism 
anyway (memory pressure and sanity), so you want to use the counting 
semaphore for that too.

The easiest way to do it would probably be something like this:

	#define PARALLELISM (10)

	static struct semaphore outstanding;

	struct thread_exec {
		int (*fn)(void *);
		void *arg;
		struct completion completion;
	};

	static void allow_parallel(int n)
	{
		while (--n >= 0)
			up(&outstanding);
	}

	static void wait_for_parallel(int n)
	{
		while (--n >= 0)
			down(&outstanding);
	}

	static int do_in_parallel(void *arg)
	{
		struct thread_exec *p = arg;
		int (*fn)(void *) = p->fn;
		void *arg = p->arg;
		int retval;

		/* Tell the caller we are done with the arguments */
		complete(&p->completion);

		/* Do the actual work in parallel */
		retval = p->fn(p->arg);

		/*
		 * And then tell the rest of the world that we've
		 * got one less parallel thing outstanding..
		 */
		up(&outstanding);
		return retval;
	}

	static void execute_in_parallel(int (*fn)(void *), void *arg)
	{
		struct thread_exec arg = { .fn = fn, .arg = arg };

		/* Make sure we can have more outstanding parallel work */
		down(&outstanding);

		arg.fn = fn;
		arg.arg = arg;
		init_completion(&arg.completion);

		kernel_thread(do_in_parallel, &arg);

		/* We need to wait until our "arg" is safe */
		wait_for_completion(&arg.completion)
	}

The above is ENTIRELY UNTESTED, but the point of it is that it should now 
allow you to do something like this:

	/* Set up how many parallel threads we can run */
	allow_parallel(PARALLELISM);

	...

	/*
	 * Run an arbitrary number of threads with that
	 * parallelism.
	 */
	for (i = 0; i < ... ; i++)
		execute_in_parallel(fnarray[i].function, 
				    fnarray[i].argument);

	...

	/* And wait for all of them to complete */
	wait_for_parallel(PARALLELISM);

and this is totally generic (ie this is useful for initcalls or anything 
else). Note also how you can set up the parallelism (and wait for it) 
totally independently (ie that can be done at some earlier stage, and the 
"execute_in_parallel()" can just be executed in any random situation in 
between - as many times as you like. It will always honor the parallelism.

By setting PARALLELISM to 1, you basically only ever allow one outstanding 
call at any time (ie it becomes serial), so you don't even have to make 
this a config option, you could do it as a runtime setup thing.

Hmm?

(And I repeat: the above code is untested, and was written in the email 
client. It has never seen a compiler, and not gotten a _whole_ lot of 
thinking).

		Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 20:42                         ` Linus Torvalds
@ 2006-10-27 20:48                           ` Linus Torvalds
  2006-10-28  1:11                             ` Greg KH
  0 siblings, 1 reply; 37+ messages in thread
From: Linus Torvalds @ 2006-10-27 20:48 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Stephen Hemminger, Pavel Machek, Greg KH, Matthew Wilcox,
	Adrian Bunk, Linux Kernel Mailing List, linux-pci



On Fri, 27 Oct 2006, Linus Torvalds wrote:

> 
> 	static int do_in_parallel(void *arg)
> 	{
> 		struct thread_exec *p = arg;
> 		int (*fn)(void *) = p->fn;
> 		void *arg = p->arg;
> 		int retval;
> 
> 		/* Tell the caller we are done with the arguments */
> 		complete(&p->completion);
> 
> 		/* Do the actual work in parallel */
> 		retval = p->fn(p->arg);

Duh. The whole reason I copied them was to _not_ do that. That last line 
should obviously be

		retval = fn(arg);

because "p" may gone after we've done the "complete()".

> (And I repeat: the above code is untested, and was written in the email 
> client. It has never seen a compiler, and not gotten a _whole_ lot of 
> thinking).

.. This hasn't changed, I just looked through the code once and found that 
obvious bug.


		Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 18:42                   ` [patch] drivers: wait for threaded probes between initcall levels Andrew Morton
  2006-10-27 18:47                     ` Stephen Hemminger
@ 2006-10-27 22:59                     ` Alan Cox
  2006-10-27 23:06                       ` Andrew Morton
  2006-10-27 23:12                       ` Olaf Hering
  1 sibling, 2 replies; 37+ messages in thread
From: Alan Cox @ 2006-10-27 22:59 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Pavel Machek, Greg KH, Stephen Hemminger, Matthew Wilcox,
	Adrian Bunk, Linus Torvalds, Linux Kernel Mailing List, linux-pci

Ar Gwe, 2006-10-27 am 11:42 -0700, ysgrifennodd Andrew Morton:
> IOW, we want to be multithreaded _within_ an initcall level, but not between
> different levels.

Thats actually insufficient. We have link ordered init sequences in
large numbers of driver subtrees (ATA, watchdog, etc). We'll need
several more initcall layers to fix that.

Alan


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 22:59                     ` Alan Cox
@ 2006-10-27 23:06                       ` Andrew Morton
  2006-10-28  5:09                         ` Grant Grundler
  2006-10-30  9:44                         ` Cornelia Huck
  2006-10-27 23:12                       ` Olaf Hering
  1 sibling, 2 replies; 37+ messages in thread
From: Andrew Morton @ 2006-10-27 23:06 UTC (permalink / raw)
  To: Alan Cox
  Cc: Pavel Machek, Greg KH, Stephen Hemminger, Matthew Wilcox,
	Adrian Bunk, Linus Torvalds, Linux Kernel Mailing List, linux-pci

On Fri, 27 Oct 2006 23:59:30 +0100
Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:

> Ar Gwe, 2006-10-27 am 11:42 -0700, ysgrifennodd Andrew Morton:
> > IOW, we want to be multithreaded _within_ an initcall level, but not between
> > different levels.
> 
> Thats actually insufficient. We have link ordered init sequences in
> large numbers of driver subtrees (ATA, watchdog, etc). We'll need
> several more initcall layers to fix that.
> 

It would be nice to express those dependencies in some clearer and less
fragile manner than link order.  I guess finer-grained initcall levels
would do that, but it doesn't scale very well.

But whatever.  I think multithreaded probing just doesn't pass the
benefit-versus-hassle test, sorry.   Make it dependent on CONFIG_GREGKH ;)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 22:59                     ` Alan Cox
  2006-10-27 23:06                       ` Andrew Morton
@ 2006-10-27 23:12                       ` Olaf Hering
  1 sibling, 0 replies; 37+ messages in thread
From: Olaf Hering @ 2006-10-27 23:12 UTC (permalink / raw)
  To: Alan Cox
  Cc: Andrew Morton, Pavel Machek, Greg KH, Stephen Hemminger,
	Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

On Fri, Oct 27, Alan Cox wrote:

> Ar Gwe, 2006-10-27 am 11:42 -0700, ysgrifennodd Andrew Morton:
> > IOW, we want to be multithreaded _within_ an initcall level, but not between
> > different levels.
> 
> Thats actually insufficient. We have link ordered init sequences in
> large numbers of driver subtrees (ATA, watchdog, etc). We'll need
> several more initcall layers to fix that.

Is it time for something better?
True dependencies, an addition to or as replacement for module_init()?
random example: hfs/super.c:
depends_on_initialized(init_hfs_fs: init_hfsplus_fs,kmem_cache_thingie,core_filesystem_thingie,foo,bar,worldpeace);
If init_hfsplus_fs() does not exist it should be no error.

Whatever the sytax will be and however its parsed during build, that link order
requirement bites every other month.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 20:48                           ` Linus Torvalds
@ 2006-10-28  1:11                             ` Greg KH
  2006-10-28  1:50                               ` Linus Torvalds
  0 siblings, 1 reply; 37+ messages in thread
From: Greg KH @ 2006-10-28  1:11 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Andrew Morton, Stephen Hemminger, Pavel Machek, Matthew Wilcox,
	Adrian Bunk, Linux Kernel Mailing List, linux-pci

On Fri, Oct 27, 2006 at 01:48:54PM -0700, Linus Torvalds wrote:
> 
> 
> On Fri, 27 Oct 2006, Linus Torvalds wrote:
> 
> > 
> > 	static int do_in_parallel(void *arg)
> > 	{
> > 		struct thread_exec *p = arg;
> > 		int (*fn)(void *) = p->fn;
> > 		void *arg = p->arg;
> > 		int retval;
> > 
> > 		/* Tell the caller we are done with the arguments */
> > 		complete(&p->completion);
> > 
> > 		/* Do the actual work in parallel */
> > 		retval = p->fn(p->arg);
> 
> Duh. The whole reason I copied them was to _not_ do that. That last line 
> should obviously be
> 
> 		retval = fn(arg);
> 
> because "p" may gone after we've done the "complete()".
> 
> > (And I repeat: the above code is untested, and was written in the email 
> > client. It has never seen a compiler, and not gotten a _whole_ lot of 
> > thinking).
> 
> .. This hasn't changed, I just looked through the code once and found that 
> obvious bug.

Heh, ok, I'll take this idea, and Andrew's patch, and rework things for
the next round of 2.6.20-rc kernels, and mark the current stuff as
BROKEN for now.

thanks,

greg k-h

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28  1:11                             ` Greg KH
@ 2006-10-28  1:50                               ` Linus Torvalds
  0 siblings, 0 replies; 37+ messages in thread
From: Linus Torvalds @ 2006-10-28  1:50 UTC (permalink / raw)
  To: Greg KH
  Cc: Andrew Morton, Stephen Hemminger, Pavel Machek, Matthew Wilcox,
	Adrian Bunk, Linux Kernel Mailing List, linux-pci



On Fri, 27 Oct 2006, Greg KH wrote:
> 
> Heh, ok, I'll take this idea, and Andrew's patch, and rework things for
> the next round of 2.6.20-rc kernels, and mark the current stuff as
> BROKEN for now.

My interface stuff is kind of designed for:

 - keep the current "init" sequence as-is for now

 - keep the _actual_ PCI probing non-parallel, so that we actually do all 
   the bus _discovery_ in a repeatable and sane order.

 - use the new "execute_in_parallel()" interface for things that actually 
   _sleep_. For example, all the PCI IDE _driver_ attachement should be 
   done synchronously, but perhaps the code that tries to see if there are 
   actual disks (ie the stuff that has timeouts etc) can use the parallel 
   execution.

 - module loading would do a "allow_parallel(1)" and 
   "wait_for_parallel(1)" thing when calling the module init function (so 
   that a module could use "execute_in_parallel()" whether compiled in or 
   not, and each "init level" at boot would also do this (with some bigger 
   number, like 10), so that for drivers etc that want to do async stuff, 
   they can do so in parallel (but they'd still have done the actual hard 
   device find/init serially - keeping the link order relevant for things 
   like IDE controller discovery)

How does this sound?

There's really no reason to parallelise the actual PCI config cycles and 
probing and stuff. It's only when some driver knows that it might have to 
do some longer-running thing that it might want to execute a thread in 
parallel with other things - but it still needs to be done in a controlled 
situation, so that when "driver_init()" stops and "filesystem_init()" 
starts, we must wait for all the driver-init parallel tasks to finish 
(since "filesystem_init()" is allowed to depend on them).

Hmm?

		Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 23:06                       ` Andrew Morton
@ 2006-10-28  5:09                         ` Grant Grundler
  2006-10-28  5:19                           ` Andrew Morton
  2006-10-30  9:44                         ` Cornelia Huck
  1 sibling, 1 reply; 37+ messages in thread
From: Grant Grundler @ 2006-10-28  5:09 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alan Cox, Pavel Machek, Greg KH, Stephen Hemminger,
	Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

On Fri, Oct 27, 2006 at 04:06:26PM -0700, Andrew Morton wrote:
> On Fri, 27 Oct 2006 23:59:30 +0100
> Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> 
> > Ar Gwe, 2006-10-27 am 11:42 -0700, ysgrifennodd Andrew Morton:
> > > IOW, we want to be multithreaded _within_ an initcall level, but not between
> > > different levels.
> > 
> > Thats actually insufficient. We have link ordered init sequences in
> > large numbers of driver subtrees (ATA, watchdog, etc). We'll need
> > several more initcall layers to fix that.
> > 
> 
> It would be nice to express those dependencies in some clearer and less
> fragile manner than link order.  I guess finer-grained initcall levels
> would do that, but it doesn't scale very well.

Would making use of depmod data be a step in the right direction?
ie nic driver calls extern function (e.g. pci_enable_device())
and therefore must depend on module which provides that function.

My guess is this probably isn't 100% sufficient to replace all initcall
levels.  But likely sufficient within a given initcall level.
My main concern are circular dependencies (which are rare).

> But whatever.  I think multithreaded probing just doesn't pass the
> benefit-versus-hassle test, sorry.   Make it dependent on CONFIG_GREGKH ;)

Isn't already? :)

I thought parallel PCI and SCSI probing on system with multiple NICs and
"SCSI" storage requires udev to create devices with consistent naming.

thanks,
grant

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28  5:09                         ` Grant Grundler
@ 2006-10-28  5:19                           ` Andrew Morton
  2006-10-28  5:32                             ` Andrew Morton
  2006-10-28  6:08                             ` Grant Grundler
  0 siblings, 2 replies; 37+ messages in thread
From: Andrew Morton @ 2006-10-28  5:19 UTC (permalink / raw)
  To: Grant Grundler
  Cc: Alan Cox, Pavel Machek, Greg KH, Stephen Hemminger,
	Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

On Fri, 27 Oct 2006 23:09:05 -0600
Grant Grundler <grundler@parisc-linux.org> wrote:

> On Fri, Oct 27, 2006 at 04:06:26PM -0700, Andrew Morton wrote:
> > On Fri, 27 Oct 2006 23:59:30 +0100
> > Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> > 
> > > Ar Gwe, 2006-10-27 am 11:42 -0700, ysgrifennodd Andrew Morton:
> > > > IOW, we want to be multithreaded _within_ an initcall level, but not between
> > > > different levels.
> > > 
> > > Thats actually insufficient. We have link ordered init sequences in
> > > large numbers of driver subtrees (ATA, watchdog, etc). We'll need
> > > several more initcall layers to fix that.
> > > 
> > 
> > It would be nice to express those dependencies in some clearer and less
> > fragile manner than link order.  I guess finer-grained initcall levels
> > would do that, but it doesn't scale very well.
> 
> Would making use of depmod data be a step in the right direction?

Nope.  The linkage-order problem is by definition applicable to
linked-into-vmlinux code, not to modules.

> ie nic driver calls extern function (e.g. pci_enable_device())
> and therefore must depend on module which provides that function.
> 
> My guess is this probably isn't 100% sufficient to replace all initcall
> levels.  But likely sufficient within a given initcall level.
> My main concern are circular dependencies (which are rare).

The simplest implementation of "A needs B to have run" is for A to simply
call B, and B arranges to not allow itself to be run more than once.

But that doesn't work in the case "A needs B to be run, but only if B is
present".  Resolving this one would require something like a fancy
"synchronisation object" against which dependers and dependees can register
interest, and a core engine which takes care of the case where a depender
registers against something which no dependees have registered.

The mind boggles.

> > But whatever.  I think multithreaded probing just doesn't pass the
> > benefit-versus-hassle test, sorry.   Make it dependent on CONFIG_GREGKH ;)
> 
> Isn't already? :)
> 
> I thought parallel PCI and SCSI probing on system with multiple NICs and
> "SCSI" storage requires udev to create devices with consistent naming.

For some reason people get upset when we rename all their devices.  They're
a humourless lot.


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28  5:19                           ` Andrew Morton
@ 2006-10-28  5:32                             ` Andrew Morton
  2006-10-28  6:08                             ` Grant Grundler
  1 sibling, 0 replies; 37+ messages in thread
From: Andrew Morton @ 2006-10-28  5:32 UTC (permalink / raw)
  To: Grant Grundler, Alan Cox, Pavel Machek, Greg KH,
	Stephen Hemminger, Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

On Fri, 27 Oct 2006 22:19:25 -0700
Andrew Morton <akpm@osdl.org> wrote:

> The simplest implementation of "A needs B to have run" is for A to simply
> call B, and B arranges to not allow itself to be run more than once.
> 
> But that doesn't work in the case "A needs B to be run, but only if B is
> present".  Resolving this one would require something like a fancy
> "synchronisation object" against which dependers and dependees can register
> interest, and a core engine which takes care of the case where a depender
> registers against something which no dependees have registered.

otoh, we could stick with the simple "A calls B" solution, and A also
provides an attribute-weak implementation of B to cover the "A needs B but
only if B is present" problems.

Had to say, really - one would need to study some specific problem cases.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28  5:19                           ` Andrew Morton
  2006-10-28  5:32                             ` Andrew Morton
@ 2006-10-28  6:08                             ` Grant Grundler
  2006-10-28 20:48                               ` Stefan Richter
  1 sibling, 1 reply; 37+ messages in thread
From: Grant Grundler @ 2006-10-28  6:08 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Grant Grundler, Alan Cox, Pavel Machek, Greg KH,
	Stephen Hemminger, Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

On Fri, Oct 27, 2006 at 10:19:25PM -0700, Andrew Morton wrote:
> > > It would be nice to express those dependencies in some clearer and less
> > > fragile manner than link order.  I guess finer-grained initcall levels
> > > would do that, but it doesn't scale very well.
> > 
> > Would making use of depmod data be a step in the right direction?
> 
> Nope.  The linkage-order problem is by definition applicable to
> linked-into-vmlinux code, not to modules.

But wouldn't the same concept apply to non-module symbols that
are tagged with EXPORT_SYMBOL()?
Maybe I'm just showing my ignorance about kernel linking here...

> > ie nic driver calls extern function (e.g. pci_enable_device())
> > and therefore must depend on module which provides that function.
> > 
> > My guess is this probably isn't 100% sufficient to replace all initcall
> > levels.  But likely sufficient within a given initcall level.
> > My main concern are circular dependencies (which are rare).
> 
> The simplest implementation of "A needs B to have run" is for A to simply
> call B, and B arranges to not allow itself to be run more than once.

Yes. we already have code like this in the kernel.
e.g. superio support in drivers/parisc.

> But that doesn't work in the case "A needs B to be run, but only if B is
> present".

I was thinking of "A is present and calls into module B, therefore B needs
to init first". In this case, A won't care if B is really present or not.
A depends on B to figure that out at runtime. If B is not configured into
the kernel, A won't ever call B since the "function" will be a NOP.
(e.g. #ifndef CONFIG_B/#define b_service() /* NOP */ /#endif)

>  Resolving this one would require something like a fancy
> "synchronisation object" against which dependers and dependees can register
> interest, and a core engine which takes care of the case where a depender
> registers against something which no dependees have registered.

I guess I was wondering if the kernel link step could use symbol information
in a similar way the kernel module autoloader uses depmod info. But other
parts of the kernel might not be as modular as most of the IO subsystems
are.

I'm not looking for ways to make the process more complicated for
the people maintaining code. Keeping the registrations of dependencies
up-to-date manually would just be another PITA.

...
> > I thought parallel PCI and SCSI probing on system with multiple NICs and
> > "SCSI" storage requires udev to create devices with consistent naming.
> 
> For some reason people get upset when we rename all their devices.  They're
> a humourless lot.

Hey! I resemble that remark! ;)

(yeah, I've been a victim of that problem way too many times.)

thanks,
grant

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
@ 2006-10-28  8:23 Adam J. Richter
  2006-10-28  9:22 ` Russell King
  2006-10-28 19:41 ` Linus Torvalds
  0 siblings, 2 replies; 37+ messages in thread
From: Adam J. Richter @ 2006-10-28  8:23 UTC (permalink / raw)
  To: torvalds
  Cc: akpm, bunk, greg, linux-kernel, linux-pci, matthew, pavel,
	shemminger

On Fri, 27 Oct 2006 13:42:44 -0700 (PDT), Linus Torvals wrote:
>        static struct semaphore outstanding;
[...]
>        static void allow_parallel(int n)
[...]
>        static void wait_for_parallel(int n)
[...]
>        static void execute_in_parallel(int (*fn)(void *), void *arg)

	This interface would have problems with nesting.

	As a realistic example of nesting, imagine if this facility were
used for initcalls, and one of those initcalls also used this facility to
attempt parallel initialization of several hardware devices attached
to some bus.  In this scenario, do_initcalls would call allow_parallelism(10),
and then one of the initcalls that wanted to spawn its own set of
parallel processes would also call allow_parallel(10) (unintentionally
making the number of allowed processes 20), kick off its parallel
processes, and then call wait_for_parallel(), which could return
prematurely, which could easily lead to one of the child threads that
was incorrectly assumed to have finished to then corrupt memory or do any
number of other things that leads to a crash that is not reliably
reproducible.

	Here are some possible ways to address this problem, and
their drawbacks.

	1. Just document that nesting is prohibited, adding some BUG()
test to prevent such use.  This would limit the usefulness of this
facility, and create some unnecessary coordination issues among
kernel developers in arguing policy over who gets to use this facility.

	2. Turn the "outstanding" counting semaphore into a parameter.
Each group of related threads would share this parameter.  For example,
do_initcalls might look something like this:

struct semaphore initcalls_sem = SEMAPHORE_INITIALIZER(...);

allow_parallel(PARALLELISM, &initcalls_sem);

for (call = __initcall_start; call < __initcall_end; call++)
	execute_in_parallel(call, NULL, &initcalls_sem);

wait_for_parallel(PARALLEISM, &initcalls_sem);

	The drawback of this solution is that the limitation on the total
number of parallel processes is not global.  So, the number of
parallel processes in a nesting situation could get quite high.
For example, if 10 top level threads each initiated another 10
secondary threads, that's up to 111 threads with just a nesting
depth of 2.

	3. Add an rw_semaphore passed as a parameter for
wait_for_parallelism, but keep original static semaphore for limiting
the parallelism.  wait_for_parallel would be the only function ever
to block on the rw_semaphore, so that should avoid any problem with
ordering of taking the two semaphores--I think.

	This solution deadlocks if the nesting depth exceeds the maximum
number of threads allowed, which could realistically occur when the maximum
parallelism allowed is 1.

	4. Same as #3, but also increase the global thread count by 1 on
every call to allow_parallel, and decrease it by one in the matching
wait_for_parallel.  The drawback here is that setting the global
number of threads to one at the outset would no longer guarantee
strict serial execution, which is minor compared to the other
problems listed above.  If we want to guarantee serial execution
when PARALLELISM=1, I think that can be arranged with a little more
complexity, but I'd like to know if that is actually important.

	I have attached an example of approach #4 below, also completely
untested, with no attempt made to try to compile it.

Adam Richter




#define PARALLELISM (10)

static struct semaphore outstanding;

struct thread_exec {
        int (*fn)(void *);
        void *arg;
	struct completion args_copied;
	struct rw_semaphore *fn_done;
};

/* In some .h file: */
static inline void start_parallel(void)
{
	up(&outstanding);
}

/* In some .h file: */
static inline void wait_for_parallel(struct rw_semaphore *fn_done)
{
	down_write(fn_done);
	down(&outstanding);
}

void allow_parallel(int n)	/* called once, at boot time */
{
        while (--n >= 0)
                up(&outstanding);
}

static int do_in_parallel(void *arg)
{
        struct thread_exec *p = arg;
        int (*fn)(void *) = p->fn;
        void *arg = p->arg;
	struct rw_semaphore *fn_done = p->fn_done;
        int retval;

        /* Tell the caller we are done with the arguments */
        complete(&p->args_copied);

        /* Do the actual work in parallel */
        retval = p->fn(p->arg);

        /*
         * And then tell the rest of the world that we've
         * got one less parallel thing outstanding..
         */
	up_read(fn_done);
        up(&outstanding);
        return retval;
}

void execute_in_parallel(int (*fn)(void *),
			void *arg,
			struct semaphore *fn_done)
{
        struct thread_exec arg = { .fn = fn, .arg = arg };

        /* Make sure we can have more outstanding parallel work */
        down(&outstanding);

        arg.fn = fn;
        arg.arg = arg;
	arg.fn_done = fn_done;
	down_read(fn_done);
        init_completion(&arg.args_copied);

        kernel_thread(do_in_parallel, &arg);

        wait_for_completion(&arg.args_copied)
}


Example of usage:

	/* Earlier in the boot process... */
        allow_parallel(PARALLELISM);


static void __init do_initcalls(void)
{
	DECLARE_RWSEM(initcalls_done);
	start_parallel();
	for (call = __initcall_start; call < __initcall_end; call++)
		execute_in_parallel(call, NULL, &initcalls_done);

	wait_for_parallel(&initcalls_done);
}

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28  8:23 Adam J. Richter
@ 2006-10-28  9:22 ` Russell King
  2006-10-28 12:10   ` Russell King
  2006-10-28 19:41 ` Linus Torvalds
  1 sibling, 1 reply; 37+ messages in thread
From: Russell King @ 2006-10-28  9:22 UTC (permalink / raw)
  To: Adam J. Richter
  Cc: torvalds, akpm, bunk, greg, linux-kernel, linux-pci, matthew,
	pavel, shemminger

On Sat, Oct 28, 2006 at 04:23:35PM +0800, Adam J. Richter wrote:
> 	This interface would have problems with nesting.

Adam (and the rest of the parallel crowd),

Just a passing thought (and nothing more)...

How does this behave with PCMCIA initialisation with a Cardbus card
inserted?

This is one scenario which needs checking before any of this parallel
probe code goes anywhere near mainline, since it's possible for the
Cardbus (PCI) device to be added and therefore probed while the Yenta
probe (PCI) is still running.

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:  2.6 Serial core

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28  9:22 ` Russell King
@ 2006-10-28 12:10   ` Russell King
  0 siblings, 0 replies; 37+ messages in thread
From: Russell King @ 2006-10-28 12:10 UTC (permalink / raw)
  To: Adam J. Richter
  Cc: torvalds, akpm, bunk, greg, linux-kernel, linux-pci, matthew,
	pavel, shemminger

On Sat, Oct 28, 2006 at 10:22:54AM +0100, Russell King wrote:
> On Sat, Oct 28, 2006 at 04:23:35PM +0800, Adam J. Richter wrote:
> > 	This interface would have problems with nesting.
> 
> Adam (and the rest of the parallel crowd),
> 
> Just a passing thought (and nothing more)...
> 
> How does this behave with PCMCIA initialisation with a Cardbus card
> inserted?
> 
> This is one scenario which needs checking before any of this parallel
> probe code goes anywhere near mainline, since it's possible for the
> Cardbus (PCI) device to be added and therefore probed while the Yenta
> probe (PCI) is still running.

Can someone make sure Adam gets my email please?  His server rejects
messages from my domain:

  adam@yggdrasil.com
    SMTP error from remote mail server after MAIL FROM:<rmk+adam=yggdrasil.com@arm.linux.org.uk> SIZE=3022:
    host yggdrasil.com [61.49.148.168]: 553 5.1.8 <rmk+adam=yggdrasil.com@arm.linux.org.uk>... Domain of sender address rmk+adam=yggdrasil.com@arm.linux.org.uk does not exist

(the error is obviously utter rubbish.)

Thanks.

-- 
Russell King
 Linux kernel    2.6 ARM Linux   - http://www.arm.linux.org.uk/
 maintainer of:  2.6 Serial core

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28  8:23 Adam J. Richter
  2006-10-28  9:22 ` Russell King
@ 2006-10-28 19:41 ` Linus Torvalds
  1 sibling, 0 replies; 37+ messages in thread
From: Linus Torvalds @ 2006-10-28 19:41 UTC (permalink / raw)
  To: Adam J. Richter
  Cc: akpm, bunk, greg, linux-kernel, linux-pci, matthew, pavel,
	shemminger

On Sat, 28 Oct 2006, Adam J. Richter wrote:

> On Fri, 27 Oct 2006 13:42:44 -0700 (PDT), Linus Torvals wrote:
> >        static struct semaphore outstanding;
> [...]
> >        static void allow_parallel(int n)
> [...]
> >        static void wait_for_parallel(int n)
> [...]
> >        static void execute_in_parallel(int (*fn)(void *), void *arg)
> 
> 	This interface would have problems with nesting.

You miss the point.

They _wouldn't_ be nested.

The "allow_parallel()" and "wait_for_parallel()" calls would be at some 
top-level situation (ie initcalls looping).

Nobody else than the top level would _ever_ use them. Anything under that 
level would just say "I want to do this in parallel" - which is just a 
statement, and has no nesting issues in itself.

The whole notion of "I want to do this in parallel" is basically 
non-nesting. If something is parallel, it's by definition not ordered, and 
thus nesting cannot make sense. All the "ordered" stuff would be either 
done without using "execute_in_parallel()" at all, or it would be ordered 
_within_ one thread that is executed in parallel.

		Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28  6:08                             ` Grant Grundler
@ 2006-10-28 20:48                               ` Stefan Richter
  2006-10-28 23:34                                 ` Alan Cox
  0 siblings, 1 reply; 37+ messages in thread
From: Stefan Richter @ 2006-10-28 20:48 UTC (permalink / raw)
  To: Grant Grundler
  Cc: Andrew Morton, Alan Cox, Pavel Machek, Greg KH, Stephen Hemminger,
	Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

Grant Grundler wrote:
> On Fri, Oct 27, 2006 at 10:19:25PM -0700, Andrew Morton wrote:
>>> I thought parallel PCI and SCSI probing on system with multiple NICs and
>>> "SCSI" storage requires udev to create devices with consistent naming.
>> For some reason people get upset when we rename all their devices.  They're
>> a humourless lot.
> 
> Hey! I resemble that remark! ;)
> 
> (yeah, I've been a victim of that problem way too many times.)

I hear network interfaces can be selected by their MACs, which are
globally unique and persistent. Most SCSI hardware has globally unique
and persistent unit properties too, and udev indeed uses them these days.
-- 
Stefan Richter
-=====-=-==- =-=- ===--
http://arcgraph.de/sr/

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28 20:48                               ` Stefan Richter
@ 2006-10-28 23:34                                 ` Alan Cox
  2006-10-29  2:01                                   ` Randy Dunlap
  0 siblings, 1 reply; 37+ messages in thread
From: Alan Cox @ 2006-10-28 23:34 UTC (permalink / raw)
  To: Stefan Richter
  Cc: Grant Grundler, Andrew Morton, Pavel Machek, Greg KH,
	Stephen Hemminger, Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

Ar Sad, 2006-10-28 am 22:48 +0200, ysgrifennodd Stefan Richter:
> I hear network interfaces can be selected by their MACs, which are
> globally unique and persistent. Most SCSI hardware has globally unique
> and persistent unit properties too, and udev indeed uses them these days.

You hear incorrectly. The MAC address is only required to be *machine
unique*, please see the 802.1/2 specification documents for more detail.
Distinguishing by card MAC is a hack that works on some systems only.

SCSI is also unreliable for serial numbers because of USB, brain-dead
raid controllers and other devices that fake the same ident for many
devices.

There is another ugly too - many driver/library layers "know" that
during boot the code is not re-entered so has no locking. Before you can
go multi-probe you also have to fix all the locking in the drivers that
have boot time specific functionality (eg IDE).

Alan

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
@ 2006-10-28 23:50 Adam J. Richter
  2006-10-28 23:55 ` Linus Torvalds
  0 siblings, 1 reply; 37+ messages in thread
From: Adam J. Richter @ 2006-10-28 23:50 UTC (permalink / raw)
  To: torvalds
  Cc: akpm, bunk, greg, linux-kernel, linux-pci, matthew, pavel,
	shemminger

On 2006-10-28 19:41:50, Linus Torvalds wrote:
>On Sat, 28 Oct 2006, Adam J. Richter wrote:
>
>> On Fri, 27 Oct 2006 13:42:44 -0700 (PDT), Linus Torvals wrote:
>> >        static struct semaphore outstanding;
>> [...]
>> >        static void allow_parallel(int n)
>> [...]
>> >        static void wait_for_parallel(int n)
>> [...]
>> >        static void execute_in_parallel(int (*fn)(void *), void *arg)
>> 
>>       This interface would have problems with nesting.
>
>You miss the point.
>
>They _wouldn't_ be nested.
>
>The "allow_parallel()" and "wait_for_parallel()" calls would be at some 
>top-level situation (ie initcalls looping).
>
>Nobody else than the top level would _ever_ use them. Anything under that 
>level would just say "I want to do this in parallel" - which is just a 
>statement, and has no nesting issues in itself.

	If only calls to execute_in_parallel nest, your original
implementation would always deadlock when the nesting depth exceeds
the allowed number of threads, and also potentially in some shallower
nesting depths given a very unlucky order of execution.  In your
original message, you mentioned allowing the parallelism limit to be
set as low as 1.

	One solution to this problem would be to have
execute_in_parallel execute the function directly when no threads are
available to do it, rather than blocking.  The disadvantage is that,
if no thread is immediately available, the call to
execute_in_parallel would not return until the function that was
passed in finishes, even if more threads become available much sooner.

	Here is what I am referring to, again completely untested:


	static void execute_in_parallel(int (*fn)(void *), void *arg)
	{
		struct thread_exec arg = { .fn = fn, .arg = arg };

		/* If no threads are available, call the function directly. */
		if (down_trylock(&outstanding) != 0) {
			fn(arg);
			return;
		}

		arg.fn = fn;
		arg.arg = arg;
		init_completion(&arg.args_copied);

		kernel_thread(do_in_parallel, &arg);

		/* We need to wait until our "arg" is safe */
		wait_for_completion(&arg.args_copied)
	}


Adam Richter

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28 23:50 [patch] drivers: wait for threaded probes between initcall levels Adam J. Richter
@ 2006-10-28 23:55 ` Linus Torvalds
  2006-10-30 14:23   ` Kyle Moffett
  0 siblings, 1 reply; 37+ messages in thread
From: Linus Torvalds @ 2006-10-28 23:55 UTC (permalink / raw)
  To: Adam J. Richter
  Cc: akpm, bunk, greg, linux-kernel, linux-pci, matthew, pavel,
	shemminger

On Sun, 29 Oct 2006, Adam J. Richter wrote:
> 
> 	If only calls to execute_in_parallel nest, your original
> implementation would always deadlock when the nesting depth exceeds
> the allowed number of threads, and also potentially in some shallower
> nesting depths given a very unlucky order of execution.  In your
> original message, you mentioned allowing the parallelism limit to be
> set as low as 1.

No, I'm saying that nesting simply shouldn't be _done_. There's no real 
reason. Any user would be already either parallel or doesn't need to be 
parallel at all. Why would something that already _is_ parallel start 
another parallel task?

IOW, what I was trying to say (perhaps badly) is that "nesting" really 
isn't a sensible operation - you'd never do it. You'd do the "startup" and 
"shutdown" things at the very highest level, and then in between those 
calls you can start a parallel activity at any depth of the call stack, 
but at no point does it really make sense to start it from within 
something that is already parallel.

Hmm?

(Btw, you do seem to have some strange email setup that doesn't allow me 
to email you directly, I just get a bounce. I'll try again, but you'll 
probably pick this up on linux-kernel rather than in your private 
mailbox).

		Linus

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28 23:34                                 ` Alan Cox
@ 2006-10-29  2:01                                   ` Randy Dunlap
  0 siblings, 0 replies; 37+ messages in thread
From: Randy Dunlap @ 2006-10-29  2:01 UTC (permalink / raw)
  To: Alan Cox
  Cc: Stefan Richter, Grant Grundler, Andrew Morton, Pavel Machek,
	Greg KH, Stephen Hemminger, Matthew Wilcox, Adrian Bunk,
	Linus Torvalds, Linux Kernel Mailing List, linux-pci

On Sun, 29 Oct 2006 00:34:57 +0100 Alan Cox wrote:

> Ar Sad, 2006-10-28 am 22:48 +0200, ysgrifennodd Stefan Richter:
> > I hear network interfaces can be selected by their MACs, which are
> > globally unique and persistent. Most SCSI hardware has globally unique
> > and persistent unit properties too, and udev indeed uses them these days.
> 
> You hear incorrectly. The MAC address is only required to be *machine
> unique*, please see the 802.1/2 specification documents for more detail.
> Distinguishing by card MAC is a hack that works on some systems only.

I would have expected "most" instead of "some", but you have
different experiences than I do.  What spec requires a MAC address
to be machine-unique?

IEEE "makes it possible for organizations to employ unique individual 
LAN MAC addresses, group addresses, and protocol identifiers."

IEEE 802 goes on to say:
"The concept of universal addressing is based on the 
idea that all potential members of a network need to
have a unique identifier (if they are going to coexist 
in the network). The advantage of a universal address is
that a station with such an address can be attached to any 
LAN in the world with an assurance that the address is unique."

and then "recommended" (but not required):

"The recommended approach is for each device associated 
with a distinct point of attachment to a LAN to
have its own unique MAC address. Typically, therefore, 
a LAN adapter card (or, e.g., an equivalent chip or
set of chips on a motherboard) should have one unique 
MAC address for each LAN attachment that it can
support at a given time.

and then this part seems contrary to the machine-unique quality
that you mentioned (I guess--don't know--that this is what
Sun used to do ?):

"NOTE—It is recognized that an alternative approach has 
gained currency in some LAN implementations, in which the
device is interpreted as a complete computer system, 
which can have multiple attachments to different LANs. Under this
interpretation, a single LAN MAC address is used to 
identify all of the system’s points of attachment to the LANs in
question. This approach, unlike the recommended one, does 
not automatically meet the requirements of
IEEE Std 802.1D-1998 MAC bridging."

> SCSI is also unreliable for serial numbers because of USB, brain-dead
> raid controllers and other devices that fake the same ident for many
> devices.
> 
> There is another ugly too - many driver/library layers "know" that
> during boot the code is not re-entered so has no locking. Before you can
> go multi-probe you also have to fix all the locking in the drivers that
> have boot time specific functionality (eg IDE).

---
~Randy

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
@ 2006-10-29 20:38 Adam J. Richter
  0 siblings, 0 replies; 37+ messages in thread
From: Adam J. Richter @ 2006-10-29 20:38 UTC (permalink / raw)
  To: torvalds
  Cc: akpm, bunk, greg, linux-kernel, linux-pci, matthew, pavel,
	shemminger

On 2006-10-28 23:55:42, Linus Torvalds wrote:
>On Sun, 29 Oct 2006, Adam J. Richter wrote:
>> 
>>       If only calls to execute_in_parallel nest, your original
>> implementation would always deadlock when the nesting depth exceeds
>> the allowed number of threads, and [...]
>
>No, I'm saying that nesting simply shouldn't be _done_. There's no real 
>reason. Any user would be already either parallel or doesn't need to be 
>parallel at all. Why would something that already _is_ parallel start 
>another parallel task?

	Suppose the system is initializing PCI cards in parallel.  The
thread that is initializing one particular PCI card discovers that it
is initializing a firewire controller.  After the already "parallel"
PCI firewire probe function initializes the card, it is going to
enumerate the devices attached to the firewire cable and spawn
separate threads to initialize drivers for each of these several
firewire devices.

	One way avoid this depth-first descent would be to change
device_attach() in drivers/base/dd.c to queue its work to helper daemon.

	Either way, we're talking about a few lines of code in
execute_in_parallel that can easily be added later if needed.  If you
really think all calls to execute_parallel will be done by the main
kernel thread, I suggest someone add a BUG_ON() statement to that
effect to execute_parallel to see.

	I would also like to suggest a very different approach, which
would not be quite as fast, but which I think would be more flexible
and would work partly by making the kernel do _less_.

	Perhaps we could offer a boot option to limit device_attach to
consider only drivers named by that option, such as
"limit_drivers=vc,ramdisk".  (If a user screwed his boot process with
the wrong limit_drivers= options, fixing the problem would be just a
matter of just eliminating the option.)  All other driver-device
bindings would be done explicitly by a user level mechanism, which
would implicitly provide the process context for blocking.  That is,
the parallelization would occur by a sysfs watcher like udev spawning
separate threads to call the user level sysfs interface for attaching
devices to drivers.  User level would also handle matching driver and
device ID information, determining parallelization limits, probe
order, choosing between multiple drivers available for devices or
deliberately not binding a driver to a device, and perhaps executing
other custom user level code along the way.

	Because the threads involved would come from clone() executed
by a user level daemon sysfs watcher like udev, execute_in_parallel()
would be less used, perhaps not used at all, depending on whether
parts the boot process besides walking the device tree would benefit
much from parallelization.

Adam Richter

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-27 23:06                       ` Andrew Morton
  2006-10-28  5:09                         ` Grant Grundler
@ 2006-10-30  9:44                         ` Cornelia Huck
  2006-10-30 10:48                           ` Alan Cox
  1 sibling, 1 reply; 37+ messages in thread
From: Cornelia Huck @ 2006-10-30  9:44 UTC (permalink / raw)
  To: Andrew Morton
  Cc: Alan Cox, Pavel Machek, Greg KH, Stephen Hemminger,
	Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

On Fri, 27 Oct 2006 16:06:26 -0700,
Andrew Morton <akpm@osdl.org> wrote:

> On Fri, 27 Oct 2006 23:59:30 +0100
> Alan Cox <alan@lxorguk.ukuu.org.uk> wrote:
> 
> > Ar Gwe, 2006-10-27 am 11:42 -0700, ysgrifennodd Andrew Morton:
> > > IOW, we want to be multithreaded _within_ an initcall level, but not between
> > > different levels.
> > 
> > Thats actually insufficient. We have link ordered init sequences in
> > large numbers of driver subtrees (ATA, watchdog, etc). We'll need
> > several more initcall layers to fix that.
> > 
> 
> It would be nice to express those dependencies in some clearer and less
> fragile manner than link order.  I guess finer-grained initcall levels
> would do that, but it doesn't scale very well.

Would it be sufficient just to make the busses wait until all their
devices are through with their setup? This is what the ccw bus on s390
does:
- Increase counter for device and start device recognition for it
- Continue for next device
- When async device recognition (and probable enablement) is finished,
  register device and decrease counter
- ccw bus init function waits till counter has dropped to 0

This has worked fine for us since 2.5. OTOH, s390 doesn't have such a
diverse set as hardware as a PC :)

-- 
Cornelia Huck
Linux for zSeries Developer
Tel.: +49-7031-16-4837, Mail: cornelia.huck@de.ibm.com

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30  9:44                         ` Cornelia Huck
@ 2006-10-30 10:48                           ` Alan Cox
  2006-10-30 12:29                             ` Matthew Wilcox
  0 siblings, 1 reply; 37+ messages in thread
From: Alan Cox @ 2006-10-30 10:48 UTC (permalink / raw)
  To: Cornelia Huck
  Cc: Andrew Morton, Pavel Machek, Greg KH, Stephen Hemminger,
	Matthew Wilcox, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci

Ar Llu, 2006-10-30 am 10:44 +0100, ysgrifennodd Cornelia Huck:
> Would it be sufficient just to make the busses wait until all their
> devices are through with their setup? This is what the ccw bus on s390
> does:

For ATA and IDE no, it might work with SCSI but your devices would
randomly re-order which is also obnoxious. IDE relies on both link probe
order and also has code that knows boot time processing is single
threaded. 


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 10:48                           ` Alan Cox
@ 2006-10-30 12:29                             ` Matthew Wilcox
  0 siblings, 0 replies; 37+ messages in thread
From: Matthew Wilcox @ 2006-10-30 12:29 UTC (permalink / raw)
  To: Alan Cox
  Cc: Cornelia Huck, Andrew Morton, Pavel Machek, Greg KH,
	Stephen Hemminger, Adrian Bunk, Linus Torvalds,
	Linux Kernel Mailing List, linux-pci, James Bottomley

On Mon, Oct 30, 2006 at 10:48:51AM +0000, Alan Cox wrote:
> Ar Llu, 2006-10-30 am 10:44 +0100, ysgrifennodd Cornelia Huck:
> > Would it be sufficient just to make the busses wait until all their
> > devices are through with their setup? This is what the ccw bus on s390
> > does:
> 
> For ATA and IDE no, it might work with SCSI but your devices would
> randomly re-order which is also obnoxious. IDE relies on both link probe
> order and also has code that knows boot time processing is single
> threaded. 

There's no need to parallelise the scanning of SCSI host adapters.
Indeed, it only causes pain.  With

http://git.kernel.org/git/?p=linux/kernel/git/jejb/scsi-misc-2.6.git;a=commitdiff;h=3e082a910d217b2e7b186077ebf5a1126a68c62f

and

http://git.parisc-linux.org/?p=linux-2.6.git;a=shortlog;h=scsi-async-scan

some bugfixing, and moving the scsi initialisation earlier (so it has
longer to complete while other things initialise), we should never have
to wait for scsi scans again.

And your devices only reorder as much as they ever used to with scsi.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-28 23:55 ` Linus Torvalds
@ 2006-10-30 14:23   ` Kyle Moffett
  2006-10-30 14:38     ` Arjan van de Ven
  2006-10-30 14:42     ` Matthew Wilcox
  0 siblings, 2 replies; 37+ messages in thread
From: Kyle Moffett @ 2006-10-30 14:23 UTC (permalink / raw)
  To: Linus Torvalds
  Cc: Adam J. Richter, akpm, bunk, greg, linux-kernel, linux-pci,
	matthew, pavel, shemminger

On Oct 28, 2006, at 19:55:42, Linus Torvalds wrote:
> On Sun, 29 Oct 2006, Adam J. Richter wrote:
>> If only calls to execute_in_parallel nest, your original  
>> implementation would always deadlock when the nesting depth  
>> exceeds the allowed number of threads, and also potentially in  
>> some shallower nesting depths given a very unlucky order of  
>> execution.  In your original message, you mentioned allowing the  
>> parallelism limit to be set as low as 1.
>
> No, I'm saying that nesting simply shouldn't be _done_. There's no  
> real reason. Any user would be already either parallel or doesn't  
> need to be parallel at all. Why would something that already _is_  
> parallel start another parallel task?

Well, I would argue that there actually _is_ a reason; the same  
reason that GNU make communicates between recursive invocations to  
control the maximum number of in-progress execution threads ("-J4"  
will have 4 make targets running at once, _even_ in the presence of  
recursive make invocations and nested directories).  Likewise in the  
context of recursively nested busses and devices; multiple PCI  
domains, USB, Firewire, etc.

> IOW, what I was trying to say (perhaps badly) is that "nesting"  
> really isn't a sensible operation - you'd never do it. You'd do the  
> "startup" and "shutdown" things at the very highest level, and then  
> in between those calls you can start a parallel activity at any  
> depth of the call stack, but at no point does it really make sense  
> to start it from within something that is already parallel.

Well, perhaps it does.  If I have (hypothetically) a 64-way system  
with several PCI domains, I should be able to not only start scanning  
each PCI domain individually,  but once each domain has been scanned  
it should be able to launch multiple probing threads, one for each  
device on the PCI bus.  That is, assuming that I have properly set up  
my udev to statically name devices.

Perhaps it would make more sense for the allow_parallel() call to  
specify instead a number of *additional* threads to spawn, such that  
allow_parallel(0) on the top level would force the normal serial boot  
order, allow_parallel(1) would allow one probing thread and the init  
thread to both probe hardware at once, etc.

With a little per-thread context on the stack, you could fairly  
easily keep track of the number of allowed sub-threads on a per- 
allow_parallel() basis.  Before you spawn each new thread, create its  
new per-thread state for it and pass its pointer to the child  
thread.  With each new do_in_parallel() call it would down the  
semaphores for each "context" up the tree until it hit the top, and  
then it would allocate a new context and fork off a new thread for  
the _previous_ call to do_in_parallel().  The last call would remain  
unforked, and so finalize_parallel() would first execute that call in  
the current thread and then reap all of the children by waiting on  
their completions then freeing their contexts.

I admit the complexity is a bit high, but since the maximum nesting  
is bounded by the complexity of the hardware and the number of  
busses, and the maximum memory-allocation is strictly limited in the  
single-threaded case this could allow 64-way systems to probe all  
their hardware an order of magnitude faster than today without  
noticeably impacting an embedded system even in the absolute worst case.

I _believe_ that this should also be coupled with a bit of cleanup of  
probe-order dependencies.  If a subsystem depends on another being  
initialized, the depended-on one could very easily export a  
wait_for_foo_init() function:

DECLARE_COMPLETION(foo_init_completion);
static int foo_init_result;

int wait_for_foo_init()
{
	wait_for_completion(&foo_init_completion);
	return foo_init_result;
}

int foo_init(struct parallel_state *state)
{
	struct foo_device *dev;

	allow_parallel(state, 3);

#if 1
	/* Assumes: int foo_probe_device(void *dev); */
	for_each_foo_device(dev)
		do_in_parallel(state, foo_probe_device, dev);
#else
	/* Assumes: int foo_probe_device(struct parallel_state *state,
			void *dev); */
	for_each_foo_device(dev)
		do_in_parallel_nested(state, foo_probe_device, dev);
#endif

	foo_init_result = finalize_parallel(state);
	complete(&foo_init_completion);
	return foo_init_result;
}

And of course if you wanted to init both the foo and bar busses in  
parallel you could implement a virtually identical function using the  
do_in_parallel_nested() variant on top of the foo_init() function.

I'm working on a sample implementation of the allow_parallel()  
do_in_parallel() and finalize_parallel() functions, but I'm going to  
take the time to make sure its right.  In the mean-time, I'm  
interested in any comments.

Cheers,
Kyle Moffett

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 14:23   ` Kyle Moffett
@ 2006-10-30 14:38     ` Arjan van de Ven
  2006-10-30 15:00       ` Xavier Bestel
  2006-10-30 18:56       ` Kyle Moffett
  2006-10-30 14:42     ` Matthew Wilcox
  1 sibling, 2 replies; 37+ messages in thread
From: Arjan van de Ven @ 2006-10-30 14:38 UTC (permalink / raw)
  To: Kyle Moffett
  Cc: Linus Torvalds, Adam J. Richter, akpm, bunk, greg, linux-kernel,
	linux-pci, matthew, pavel, shemminger

> I admit the complexity is a bit high, but since the maximum nesting  
> is bounded by the complexity of the hardware and the number of  
> busses, and the maximum memory-allocation is strictly limited in the  
> single-threaded case this could allow 64-way systems to probe all  
> their hardware an order of magnitude faster than today without  
> noticeably impacting an embedded system even in the absolute worst case.

how much of this complexity goes away if you consider the
scanning/probing as a series of "work elements", and you end up with a
queue of work elements that threads can pull work off one at a time (so
that if one element blocks the others just continue to flow). If you
then find, say, a new PCI bus you just put another work element to
process it at the end of the queue, or you process it synchronously. Etc
etc.

All you need to scale then is the number of worker threads on the
system, which should be relatively easy to size....
(check every X miliseconds if there are more than X outstanding work
elements, if there are, spawn one new worker thread if the total number
of worker threads is less than the system wide max. Worker threads die
if they have nothing to do for more than Y miliseconds)

Oh and... we have a concept for this already, or at least mostly, via
the work queue mechanism :)

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 14:23   ` Kyle Moffett
  2006-10-30 14:38     ` Arjan van de Ven
@ 2006-10-30 14:42     ` Matthew Wilcox
  2006-10-30 18:47       ` Kyle Moffett
  1 sibling, 1 reply; 37+ messages in thread
From: Matthew Wilcox @ 2006-10-30 14:42 UTC (permalink / raw)
  To: Kyle Moffett
  Cc: Linus Torvalds, Adam J. Richter, akpm, bunk, greg, linux-kernel,
	linux-pci, pavel, shemminger

On Mon, Oct 30, 2006 at 09:23:10AM -0500, Kyle Moffett wrote:
> recursive make invocations and nested directories).  Likewise in the  
> context of recursively nested busses and devices; multiple PCI  
> domains, USB, Firewire, etc.

I don't think you know what a PCI domain is ...

> Well, perhaps it does.  If I have (hypothetically) a 64-way system  
> with several PCI domains, I should be able to not only start scanning  
> each PCI domain individually,  but once each domain has been scanned  
> it should be able to launch multiple probing threads, one for each  
> device on the PCI bus.  That is, assuming that I have properly set up  
> my udev to statically name devices.

There's still one spinlock that protects *all* accesses to PCI config
space.  Maybe we should make it one per PCI root bridge or something,
but even that wouldn't help some architectures.

> I admit the complexity is a bit high, but since the maximum nesting  
> is bounded by the complexity of the hardware and the number of  
> busses, and the maximum memory-allocation is strictly limited in the  
> single-threaded case this could allow 64-way systems to probe all  
> their hardware an order of magnitude faster than today without  
> noticeably impacting an embedded system even in the absolute worst case.

To be honest, I think just scaling PARALLEL to NR_CPUS*4 or something
would be a reasonable way to go.

If people actually want to get serious about this, I know the PPC folks
have some openfirmware call that tells them about power domains and how
many scsi discs they can spin up at one time (for example).  Maybe
that's not necessary; if we can figure out what the system's max power
draw is and how close we are to it, we can decide whether to spawn
another thread or not.

It's quite complicated.  You can spin up a disc over *here*, but not
over *there* ... this really is a gigantic can of worms being opened.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 14:38     ` Arjan van de Ven
@ 2006-10-30 15:00       ` Xavier Bestel
  2006-10-30 15:05         ` Arjan van de Ven
  2006-10-30 18:56       ` Kyle Moffett
  1 sibling, 1 reply; 37+ messages in thread
From: Xavier Bestel @ 2006-10-30 15:00 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Kyle Moffett, Linus Torvalds, Adam J. Richter, akpm, bunk, greg,
	linux-kernel, linux-pci, matthew, pavel, shemminger

On Mon, 2006-10-30 at 15:38 +0100, Arjan van de Ven wrote:

> how much of this complexity goes away if you consider the
> scanning/probing as a series of "work elements", and you end up with a
> queue of work elements that threads can pull work off one at a time (so
> that if one element blocks the others just continue to flow). If you
> then find, say, a new PCI bus you just put another work element to
> process it at the end of the queue, or you process it synchronously. Etc
> etc.
> 
> All you need to scale then is the number of worker threads on the
> system, which should be relatively easy to size....
> (check every X miliseconds if there are more than X outstanding work
> elements, if there are, spawn one new worker thread if the total number
> of worker threads is less than the system wide max. Worker threads die
> if they have nothing to do for more than Y miliseconds)

Instead of checking every X ms, just check at each job insertion.

	Xav


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 15:00       ` Xavier Bestel
@ 2006-10-30 15:05         ` Arjan van de Ven
  2006-10-30 15:28           ` Xavier Bestel
  0 siblings, 1 reply; 37+ messages in thread
From: Arjan van de Ven @ 2006-10-30 15:05 UTC (permalink / raw)
  To: Xavier Bestel
  Cc: Kyle Moffett, Linus Torvalds, Adam J. Richter, akpm, bunk, greg,
	linux-kernel, linux-pci, matthew, pavel, shemminger

On Mon, 2006-10-30 at 16:00 +0100, Xavier Bestel wrote:
> On Mon, 2006-10-30 at 15:38 +0100, Arjan van de Ven wrote:
> 
> > how much of this complexity goes away if you consider the
> > scanning/probing as a series of "work elements", and you end up with a
> > queue of work elements that threads can pull work off one at a time (so
> > that if one element blocks the others just continue to flow). If you
> > then find, say, a new PCI bus you just put another work element to
> > process it at the end of the queue, or you process it synchronously. Etc
> > etc.
> > 
> > All you need to scale then is the number of worker threads on the
> > system, which should be relatively easy to size....
> > (check every X miliseconds if there are more than X outstanding work
> > elements, if there are, spawn one new worker thread if the total number
> > of worker threads is less than the system wide max. Worker threads die
> > if they have nothing to do for more than Y miliseconds)
> 
> Instead of checking every X ms, just check at each job insertion.

that would lead to a too eager amount of threads if processing the jobs
is really really quick ...


-- 
if you want to mail me at work (you don't), use arjan (at) linux.intel.com
Test the interaction between Linux and your BIOS via http://www.linuxfirmwarekit.org


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 15:05         ` Arjan van de Ven
@ 2006-10-30 15:28           ` Xavier Bestel
  0 siblings, 0 replies; 37+ messages in thread
From: Xavier Bestel @ 2006-10-30 15:28 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Kyle Moffett, Linus Torvalds, Adam J. Richter, akpm, bunk, greg,
	linux-kernel, linux-pci, matthew, pavel, shemminger

On Mon, 2006-10-30 at 16:05 +0100, Arjan van de Ven wrote:
> On Mon, 2006-10-30 at 16:00 +0100, Xavier Bestel wrote:
> > On Mon, 2006-10-30 at 15:38 +0100, Arjan van de Ven wrote:
> > 
> > > how much of this complexity goes away if you consider the
> > > scanning/probing as a series of "work elements", and you end up with a
> > > queue of work elements that threads can pull work off one at a time (so
> > > that if one element blocks the others just continue to flow). If you
> > > then find, say, a new PCI bus you just put another work element to
> > > process it at the end of the queue, or you process it synchronously. Etc
> > > etc.
> > > 
> > > All you need to scale then is the number of worker threads on the
> > > system, which should be relatively easy to size....
> > > (check every X miliseconds if there are more than X outstanding work
> > > elements, if there are, spawn one new worker thread if the total number
> > > of worker threads is less than the system wide max. Worker threads die
> > > if they have nothing to do for more than Y miliseconds)
> > 
> > Instead of checking every X ms, just check at each job insertion.
> 
> that would lead to a too eager amount of threads if processing the jobs
> is really really quick ...

Don't you have a "no more than X threads at once" limit ? You just
*check* at job insertion, not unconditionnaly fork.

	Xav


^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 14:42     ` Matthew Wilcox
@ 2006-10-30 18:47       ` Kyle Moffett
  2006-10-30 19:13         ` Matthew Wilcox
  0 siblings, 1 reply; 37+ messages in thread
From: Kyle Moffett @ 2006-10-30 18:47 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Linus Torvalds, Adam J. Richter, akpm, bunk, greg, linux-kernel,
	linux-pci, pavel, shemminger

On Oct 30, 2006, at 09:42:59, Matthew Wilcox wrote:
> On Mon, Oct 30, 2006 at 09:23:10AM -0500, Kyle Moffett wrote:
>> recursive make invocations and nested directories).  Likewise in  
>> the context of recursively nested busses and devices; multiple PCI  
>> domains, USB, Firewire, etc.
>
> I don't think you know what a PCI domain is ...

Fair enough, I guess I don't, really...

>> Well, perhaps it does.  If I have (hypothetically) a 64-way system  
>> with several PCI domains, I should be able to not only start  
>> scanning each PCI domain individually,  but once each domain has  
>> been scanned it should be able to launch multiple probing threads,  
>> one for each device on the PCI bus.  That is, assuming that I have  
>> properly set up my udev to statically name devices.
>
> There's still one spinlock that protects *all* accesses to PCI  
> config space.  Maybe we should make it one per PCI root bridge or  
> something, but even that wouldn't help some architectures.

Well, yes, but it would help some architectures.  It would seem  
rather stupid to build a hardware limitation into a 64+ cpu system  
such that it cannot initialize or reconfigure multiple pieces of  
hardware at once.  It also would help for more "mundane" systems such  
as my "Quad" G5 desktop which takes an appreciable time to probe all  
the various PCI, USB, SATA, and  Firewire devices in the system.

Cheers,
Kyle Moffett

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 14:38     ` Arjan van de Ven
  2006-10-30 15:00       ` Xavier Bestel
@ 2006-10-30 18:56       ` Kyle Moffett
  1 sibling, 0 replies; 37+ messages in thread
From: Kyle Moffett @ 2006-10-30 18:56 UTC (permalink / raw)
  To: Arjan van de Ven
  Cc: Linus Torvalds, Adam J. Richter, akpm, bunk, greg, linux-kernel,
	linux-pci, matthew, pavel, shemminger

On Oct 30, 2006, at 09:38:00, Arjan van de Ven wrote:
>> I admit the complexity is a bit high, but since the maximum  
>> nesting is bounded by the complexity of the hardware and the  
>> number of busses, and the maximum memory-allocation is strictly  
>> limited in the single-threaded case this could allow 64-way  
>> systems to probe all their hardware an order of magnitude faster  
>> than today without noticeably impacting an embedded system even in  
>> the absolute worst case.
>
> how much of this complexity goes away if you consider the scanning/ 
> probing as a series of "work elements", and you end up with a queue  
> of work elements that threads can pull work off one at a time (so  
> that if one element blocks the others just continue to flow). If  
> you then find, say, a new PCI bus you just put another work element  
> to process it at the end of the queue, or you process it  
> synchronously. Etc etc.

Well, I suppose the "trick" would be to ensure that the top-level  
code can probe multiple independent busses in parallel, while  
allowing certain of those to serialize their execution order for  
whatever reason without changing the resulting linear order.  This  
would make it possible to have independent pci.multithread_probe=1  
and scsi.multithread_probe=1 arguments so the sysadmin can force  
serialization for one subsystem if they don't have their device- 
numbering issues with that subsystem entirely sorted out.

Cheers,
Kyle Movvett

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 18:47       ` Kyle Moffett
@ 2006-10-30 19:13         ` Matthew Wilcox
  2006-10-31  5:39           ` Grant Grundler
  0 siblings, 1 reply; 37+ messages in thread
From: Matthew Wilcox @ 2006-10-30 19:13 UTC (permalink / raw)
  To: Kyle Moffett
  Cc: Linus Torvalds, Adam J. Richter, akpm, bunk, greg, linux-kernel,
	linux-pci, pavel, shemminger

On Mon, Oct 30, 2006 at 01:47:53PM -0500, Kyle Moffett wrote:
> Well, yes, but it would help some architectures.  It would seem  
> rather stupid to build a hardware limitation into a 64+ cpu system  
> such that it cannot initialize or reconfigure multiple pieces of  
> hardware at once.  It also would help for more "mundane" systems such  
> as my "Quad" G5 desktop which takes an appreciable time to probe all  
> the various PCI, USB, SATA, and  Firewire devices in the system.

Probing PCI devices really doesn't take that long.  It's the extra stuff
the drivers do at ->probe that takes the time.  And the stand-out
offender here is SCSI (and FC), which I'm working to fix.  Firewire, USB
and SATA are somewhere intermediate.

^ permalink raw reply	[flat|nested] 37+ messages in thread

* Re: [patch] drivers: wait for threaded probes between initcall levels
  2006-10-30 19:13         ` Matthew Wilcox
@ 2006-10-31  5:39           ` Grant Grundler
  0 siblings, 0 replies; 37+ messages in thread
From: Grant Grundler @ 2006-10-31  5:39 UTC (permalink / raw)
  To: Matthew Wilcox
  Cc: Kyle Moffett, Linus Torvalds, Adam J. Richter, akpm, bunk, greg,
	linux-kernel, linux-pci, pavel, shemminger

On Mon, Oct 30, 2006 at 12:13:07PM -0700, Matthew Wilcox wrote:
> Probing PCI devices really doesn't take that long.

Yeah - usually measured in "milliseconds".

> It's the extra stuff
> the drivers do at ->probe that takes the time.  And the stand-out
> offender here is SCSI (and FC), which I'm working to fix.  Firewire, USB
> and SATA are somewhere intermediate.

ISTR that the SATA Port timeout is 5 seconds or something like that.
And some cards have lots of ports...so my impression is SATA would
benefit alot from parallelism as well.

I'm certainly no SATA expert...maybe someone else could speak
more definitely on the topic of worst case SATA timeout.

thanks,
grant

^ permalink raw reply	[flat|nested] 37+ messages in thread

end of thread, other threads:[~2006-10-31  5:39 UTC | newest]

Thread overview: 37+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-10-28 23:50 [patch] drivers: wait for threaded probes between initcall levels Adam J. Richter
2006-10-28 23:55 ` Linus Torvalds
2006-10-30 14:23   ` Kyle Moffett
2006-10-30 14:38     ` Arjan van de Ven
2006-10-30 15:00       ` Xavier Bestel
2006-10-30 15:05         ` Arjan van de Ven
2006-10-30 15:28           ` Xavier Bestel
2006-10-30 18:56       ` Kyle Moffett
2006-10-30 14:42     ` Matthew Wilcox
2006-10-30 18:47       ` Kyle Moffett
2006-10-30 19:13         ` Matthew Wilcox
2006-10-31  5:39           ` Grant Grundler
  -- strict thread matches above, loose matches on Subject: below --
2006-10-29 20:38 Adam J. Richter
2006-10-28  8:23 Adam J. Richter
2006-10-28  9:22 ` Russell King
2006-10-28 12:10   ` Russell King
2006-10-28 19:41 ` Linus Torvalds
2006-10-23 23:22 Linux 2.6.19-rc3 Linus Torvalds
2006-10-26 22:45 ` 2.6.19-rc3: known unfixed regressions (v2) Adrian Bunk
2006-10-27  1:02   ` [RFC: 2.6.19 patch] let PCI_MULTITHREAD_PROBE depend on BROKEN Adrian Bunk
2006-10-27  1:20     ` Matthew Wilcox
2006-10-27  1:28       ` Andrew Morton
2006-10-27  2:11         ` Stephen Hemminger
2006-10-27 17:07           ` Greg KH
2006-10-27 17:22             ` Pavel Machek
2006-10-27 18:39               ` Andrew Morton
2006-10-27 18:41                 ` vmlinux.lds: consolidate initcall sections Andrew Morton
2006-10-27 18:42                   ` [patch] drivers: wait for threaded probes between initcall levels Andrew Morton
2006-10-27 18:47                     ` Stephen Hemminger
2006-10-27 20:15                       ` Andrew Morton
2006-10-27 20:42                         ` Linus Torvalds
2006-10-27 20:48                           ` Linus Torvalds
2006-10-28  1:11                             ` Greg KH
2006-10-28  1:50                               ` Linus Torvalds
2006-10-27 22:59                     ` Alan Cox
2006-10-27 23:06                       ` Andrew Morton
2006-10-28  5:09                         ` Grant Grundler
2006-10-28  5:19                           ` Andrew Morton
2006-10-28  5:32                             ` Andrew Morton
2006-10-28  6:08                             ` Grant Grundler
2006-10-28 20:48                               ` Stefan Richter
2006-10-28 23:34                                 ` Alan Cox
2006-10-29  2:01                                   ` Randy Dunlap
2006-10-30  9:44                         ` Cornelia Huck
2006-10-30 10:48                           ` Alan Cox
2006-10-30 12:29                             ` Matthew Wilcox
2006-10-27 23:12                       ` Olaf Hering

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox