* [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch
@ 2004-09-29 1:12 Matthew Dobson
2004-09-30 8:15 ` Nick Piggin
0 siblings, 1 reply; 12+ messages in thread
From: Matthew Dobson @ 2004-09-29 1:12 UTC (permalink / raw)
To: LKML, Andrew Morton, nickpiggin, Martin J. Bligh
IA64 already has their own version of SD_NODE_INIT, tuned for their
extremely large machines. I think that all arches would benefit from
having their own, arch-specific SD_NODE_INIT initializer, rather than
the one-size-fits-all variant we've got now.
This patch just creates one instance of SD_NODE_INIT per architecture in
the arch's include/asm/topology.h file. IA64's wasn't defined there, so
for consistency I moved it. Also, in each topology.h file I touched, I
removed the NODE_BALANCE_RATE definition since a grep of the -mm tree
revealed that it is defined all over the place, but no longer used.
This patch does NOT attempt any actual tuning of the values. Every
architecture has the same values as the current one-size-fits-all
version. Anyone who is interested in the 4 main NUMA arches (i386,
ia64, x86_64 & ppc64) please test this and feel free to send me any
"tweaked" values that might help performance for your arch.
Compiled and booted on i386 and x86_64.
[mcd@arrakis source]$ diffstat ~/linux/patches/sched_domains/per_arch-SD_INIT.patch
arch/ia64/kernel/domain.c | 1
include/asm-i386/topology.h | 49 +++++++++++++++++++++++++++++++++++-------
include/asm-ia64/processor.h | 21 ------------------
include/asm-ia64/topology.h | 23 +++++++++++++++++--
include/asm-ppc64/topology.h | 21 ++++++++++++++++--
include/asm-x86_64/topology.h | 22 ++++++++++++++++++
include/linux/sched.h | 21 ++----------------
7 files changed, 104 insertions(+), 54 deletions(-)
-Matt
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/arch/ia64/kernel/domain.c linux-2.6.9-rc2-mm4+per_arch-SD_INITs/arch/ia64/kernel/domain.c
--- linux-2.6.9-rc2-mm4/arch/ia64/kernel/domain.c 2004-09-27 15:57:19.000000000 -0700
+++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/arch/ia64/kernel/domain.c 2004-09-27 17:42:59.000000000 -0700
@@ -11,7 +11,6 @@
#include <linux/cpumask.h>
#include <linux/init.h>
#include <linux/topology.h>
-#include <asm/processor.h>
#define SD_NODES_PER_DOMAIN 6
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-i386/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-i386/topology.h
--- linux-2.6.9-rc2-mm4/include/asm-i386/topology.h 2004-09-16 15:02:45.000000000 -0700
+++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-i386/topology.h 2004-09-27 17:38:45.000000000 -0700
@@ -69,17 +69,50 @@ static inline cpumask_t pcibus_to_cpumas
/* Node-to-Node distance */
#define node_distance(from, to) ((from) != (to))
-/* Cross-node load balancing interval. */
-#define NODE_BALANCE_RATE 100
+#ifdef CONFIG_X86_NUMAQ
+/* sched_domains SD_NODE_INIT for IBM/Sequent NUMAQ machines */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 8, \
+ .max_interval = 32, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+#else
+/* sched_domains SD_NODE_INIT for other i386 NUMA machines */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 8, \
+ .max_interval = 32, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+#endif /* CONFIG_X86_NUMAQ */
#else /* !CONFIG_NUMA */
-/*
- * Other i386 platforms should define their own version of the
- * above macros here.
- */
-
#include <asm-generic/topology.h>
-
#endif /* CONFIG_NUMA */
#endif /* _ASM_I386_TOPOLOGY_H */
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-ia64/processor.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/processor.h
--- linux-2.6.9-rc2-mm4/include/asm-ia64/processor.h 2004-09-27 15:57:51.000000000 -0700
+++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/processor.h 2004-09-27 17:40:05.000000000 -0700
@@ -337,27 +337,6 @@ struct task_struct;
/* Prepare to copy thread state - unlazy all lazy status */
#define prepare_to_copy(tsk) do { } while (0)
-#ifdef CONFIG_NUMA
-#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
- .parent = NULL, \
- .groups = NULL, \
- .min_interval = 80, \
- .max_interval = 320, \
- .busy_factor = 320, \
- .imbalance_pct = 125, \
- .cache_hot_time = (10*1000000), \
- .cache_nice_tries = 1, \
- .per_cpu_gain = 100, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_EXEC \
- | SD_WAKE_BALANCE, \
- .last_balance = jiffies, \
- .balance_interval = 10, \
- .nr_balance_failed = 0, \
-}
-#endif
-
/*
* This is the mechanism for creating a new kernel thread.
*
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-ia64/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/topology.h
--- linux-2.6.9-rc2-mm4/include/asm-ia64/topology.h 2004-08-13 22:36:11.000000000 -0700
+++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/topology.h 2004-09-27 17:40:55.000000000 -0700
@@ -40,11 +40,28 @@
*/
#define node_to_first_cpu(node) (__ffs(node_to_cpumask(node)))
-/* Cross-node load balancing interval. */
-#define NODE_BALANCE_RATE 10
-
void build_cpu_to_node_map(void);
+/* sched_domains SD_NODE_INIT for IA64 NUMA machines */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 80, \
+ .max_interval = 320, \
+ .busy_factor = 320, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 10, \
+ .nr_balance_failed = 0, \
+}
+
#endif /* CONFIG_NUMA */
#include <asm-generic/topology.h>
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-ppc64/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ppc64/topology.h
--- linux-2.6.9-rc2-mm4/include/asm-ppc64/topology.h 2004-08-13 22:38:08.000000000 -0700
+++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ppc64/topology.h 2004-09-27 17:56:06.000000000 -0700
@@ -37,8 +37,25 @@ static inline int node_to_first_cpu(int
#define nr_cpus_node(node) (nr_cpus_in_node[node])
-/* Cross-node load balancing interval. */
-#define NODE_BALANCE_RATE 10
+/* sched_domains SD_NODE_INIT for PPC64 machines */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 8, \
+ .max_interval = 32, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
#else /* !CONFIG_NUMA */
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-x86_64/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-x86_64/topology.h
--- linux-2.6.9-rc2-mm4/include/asm-x86_64/topology.h 2004-09-16 15:02:46.000000000 -0700
+++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-x86_64/topology.h 2004-09-28 15:45:38.000000000 -0700
@@ -32,7 +32,27 @@ static inline cpumask_t __pcibus_to_cpum
/* broken generic file uses #ifndef later on this */
#define pcibus_to_cpumask(bus) __pcibus_to_cpumask(bus)
-#define NODE_BALANCE_RATE 30 /* CHECKME */
+#ifdef CONFIG_NUMA
+/* sched_domains SD_NODE_INIT for X86_64 machines */
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 8, \
+ .max_interval = 32, \
+ .busy_factor = 32, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_LOAD_BALANCE \
+ | SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 1, \
+ .nr_balance_failed = 0, \
+}
+#endif /* CONFIG_NUMA */
#endif
diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/linux/sched.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/sched.h
--- linux-2.6.9-rc2-mm4/include/linux/sched.h 2004-09-27 15:57:56.000000000 -0700
+++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/sched.h 2004-09-28 15:48:32.000000000 -0700
@@ -30,6 +30,7 @@
#include <linux/completion.h>
#include <linux/pid.h>
#include <linux/percpu.h>
+#include <linux/topology.h>
struct exec_domain;
@@ -538,25 +539,9 @@ extern void cpu_attach_domain(struct sch
}
#if defined(CONFIG_NUMA) && !defined(SD_NODE_INIT)
-#define SD_NODE_INIT (struct sched_domain) { \
- .span = CPU_MASK_NONE, \
- .parent = NULL, \
- .groups = NULL, \
- .min_interval = 8, \
- .max_interval = 32, \
- .busy_factor = 32, \
- .imbalance_pct = 125, \
- .cache_hot_time = (10*1000), \
- .cache_nice_tries = 1, \
- .per_cpu_gain = 100, \
- .flags = SD_LOAD_BALANCE \
- | SD_BALANCE_EXEC \
- | SD_WAKE_BALANCE, \
- .last_balance = jiffies, \
- .balance_interval = 1, \
- .nr_balance_failed = 0, \
-}
+#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h
#endif
+
#endif /* ARCH_HAS_SCHED_TUNE */
#endif /* CONFIG_SMP */
^ permalink raw reply [flat|nested] 12+ messages in thread* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-29 1:12 [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch Matthew Dobson @ 2004-09-30 8:15 ` Nick Piggin 2004-09-30 18:36 ` Matthew Dobson 0 siblings, 1 reply; 12+ messages in thread From: Nick Piggin @ 2004-09-30 8:15 UTC (permalink / raw) To: colpatch; +Cc: LKML, Andrew Morton, Martin J. Bligh Matthew Dobson wrote: > IA64 already has their own version of SD_NODE_INIT, tuned for their > extremely large machines. I think that all arches would benefit from > having their own, arch-specific SD_NODE_INIT initializer, rather than > the one-size-fits-all variant we've got now. > I suppose the patch is pretty good (IIRC Martin liked the idea). I guess it will at least increase the incidence of copy+paste, if not getting people to think harder ;) Can I be lame and ask that you keep this around until closer to 2.6.10? I have a few possible scheduler performance improvments that I'd like to get tested in -mm after 2.6.9 and this would make things a bit harder :P I don't think anyone is looking at getting any tweaks in before then... ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-30 8:15 ` Nick Piggin @ 2004-09-30 18:36 ` Matthew Dobson 2004-09-30 19:23 ` Andrew Morton 2004-09-30 20:45 ` Andi Kleen 0 siblings, 2 replies; 12+ messages in thread From: Matthew Dobson @ 2004-09-30 18:36 UTC (permalink / raw) To: Nick Piggin; +Cc: LKML, Andrew Morton, Martin J. Bligh, Andi Kleen On Thu, 2004-09-30 at 01:15, Nick Piggin wrote: > Matthew Dobson wrote: > > IA64 already has their own version of SD_NODE_INIT, tuned for their > > extremely large machines. I think that all arches would benefit from > > having their own, arch-specific SD_NODE_INIT initializer, rather than > > the one-size-fits-all variant we've got now. > > > > I suppose the patch is pretty good (IIRC Martin liked the idea). > I guess it will at least increase the incidence of copy+paste, > if not getting people to think harder ;) Thanks! Martin does like the idea, and I think Andi Kleen likes the idea of being able to tune sched_domains for x86_64, too. Any comments, Andi? The patch is pretty simple. I don't think it will increase any copy+pasting because I don't believe anyone has modified SD_NODE_INIT at all since it's been implemented, and certainly not for many kernel releases. I think part of the reason for that is that it is currently impossible to tweak the values for your architecture of choice because modifying the values now will change EVERYONE's sched_domains timings. Which is bad. :( If anyone wants to tweak SD_NODE_INIT, they shouldn't be copying+pasting those values to all architectures. Besides, IA64 already gets their own SD_NODE_INIT to play with, why shouldn't everyone else! ;) > Can I be lame and ask that you keep this around until closer > to 2.6.10? I have a few possible scheduler performance > improvments that I'd like to get tested in -mm after 2.6.9 > and this would make things a bit harder :P > > I don't think anyone is looking at getting any tweaks in before > then... I would like to try to get this in before then, unless this will really make things difficult for you. 2.6.9 is looking to be a pickup point for distros, so getting this patch in now (pre 2.6.9) means that distros can add tiny patches to their builds to simply tweak individual architecture values without having to diverge from mainline by implementing this patch on their own. It also means that any tuning work that the distros do can easily be pushed back to mainline by simpling sending a patch per architecture with new values. It makes those patches safe and minimizes conflicts. Will this patch really make your scheduler improvements that much harder to test/implement? I don't think it should, unless your improvements are tweaking the values in SD_NODE_INIT, since that is all this touches... Even after this patch, SD_NODE_INIT is still picked up in include/linux/sched.h, so the changes required to cope with this patch should be minimal... -Matt ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-30 18:36 ` Matthew Dobson @ 2004-09-30 19:23 ` Andrew Morton 2004-09-30 20:20 ` Matthew Dobson 2004-10-01 6:15 ` Martin J. Bligh 2004-09-30 20:45 ` Andi Kleen 1 sibling, 2 replies; 12+ messages in thread From: Andrew Morton @ 2004-09-30 19:23 UTC (permalink / raw) To: colpatch; +Cc: nickpiggin, linux-kernel, mbligh, ak Matthew Dobson <colpatch@us.ibm.com> wrote: > > I would like to try to get this in before then, unless this will really > make things difficult for you. It's about three weeks late for 2.6.9. I already have a string of CPU scheduler patches awaiting the 2.6.10 stream and once we're at -rc2 we really should only be looking at bugfixes. Grumble, mutter.. it looks like one of those "if it compiled, it works" things. Problem is, any time anyone touches that particular piece of the kernel, half the architectures stop compiing. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-30 19:23 ` Andrew Morton @ 2004-09-30 20:20 ` Matthew Dobson 2004-10-01 6:15 ` Martin J. Bligh 1 sibling, 0 replies; 12+ messages in thread From: Matthew Dobson @ 2004-09-30 20:20 UTC (permalink / raw) To: Andrew Morton; +Cc: nickpiggin, LKML, Martin J. Bligh, Andi Kleen On Thu, 2004-09-30 at 12:23, Andrew Morton wrote: > Matthew Dobson <colpatch@us.ibm.com> wrote: > > > > I would like to try to get this in before then, unless this will really > > make things difficult for you. > > It's about three weeks late for 2.6.9. I already have a string of CPU > scheduler patches awaiting the 2.6.10 stream and once we're at -rc2 we > really should only be looking at bugfixes. Yeah, that's entirely my fault for slacking on sending this out... I should have sent this a while ago. It is a small portion of some larger sched_domains changes that I am working on, but at some point I realized my larger changeset will be far more controversial and have a much larger impact than some of the smaller bits, as well as not being ready for prime time yet. Plus, like I said earlier, this allows arch-specific tweaking with minimal intrusiveness from the application of this patch forward. > Grumble, mutter.. it looks like one of those "if it compiled, it works" > things. Problem is, any time anyone touches that particular piece of the > kernel, half the architectures stop compiing. It *should* be. I'd be quite happy if you just picked it up in -mm to assure it far wider testing. I've compiled and booted it on x86, x86_64 & ppc64. I've got no access to ia64 right now, or I'd test it there. But the patch *will* spit out #errors for any arch that doesn't have SD_NODE_INIT defined if they also have NUMA defined. I'm don't know of anyone else (ie: *not* x86, x86_64, ppc64 & ia64) that is building NUMA kernels, but if they are, it's a trivial patch to their include/asm/topology.h to make the arch build. Of course, the ultimate decision is yours, Andrew... -Matt ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-30 19:23 ` Andrew Morton 2004-09-30 20:20 ` Matthew Dobson @ 2004-10-01 6:15 ` Martin J. Bligh 2004-10-01 22:20 ` Matthew Dobson 1 sibling, 1 reply; 12+ messages in thread From: Martin J. Bligh @ 2004-10-01 6:15 UTC (permalink / raw) To: Andrew Morton, colpatch; +Cc: nickpiggin, linux-kernel, ak --Andrew Morton <akpm@osdl.org> wrote (on Thursday, September 30, 2004 12:23:12 -0700): > Matthew Dobson <colpatch@us.ibm.com> wrote: >> >> I would like to try to get this in before then, unless this will really >> make things difficult for you. > > It's about three weeks late for 2.6.9. I already have a string of CPU > scheduler patches awaiting the 2.6.10 stream and once we're at -rc2 we > really should only be looking at bugfixes. Yup, seems a bit late for that, but early 2.6.10 would be nice if possible? > Grumble, mutter.. it looks like one of those "if it compiled, it works" > things. Problem is, any time anyone touches that particular piece of the > kernel, half the architectures stop compiing. I tested it - worked for me ;-) This is the first step to getting the arches to actually use the flexibility we had, and stop Andi complaining the scheduler is tuned for one arch rather than another ;-) These params definitely need to be per arch/subarch, and probably some other ones too, but this seems like a good start. M. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-10-01 6:15 ` Martin J. Bligh @ 2004-10-01 22:20 ` Matthew Dobson 2004-10-02 16:02 ` Martin J. Bligh 0 siblings, 1 reply; 12+ messages in thread From: Matthew Dobson @ 2004-10-01 22:20 UTC (permalink / raw) To: Martin J. Bligh; +Cc: Andrew Morton, Nick Piggin, LKML, Andi Kleen On Thu, 2004-09-30 at 23:15, Martin J. Bligh wrote: > --Andrew Morton <akpm@osdl.org> wrote (on Thursday, September 30, 2004 12:23:12 -0700): > > > Matthew Dobson <colpatch@us.ibm.com> wrote: > >> > >> I would like to try to get this in before then, unless this will really > >> make things difficult for you. > > > > It's about three weeks late for 2.6.9. I already have a string of CPU > > scheduler patches awaiting the 2.6.10 stream and once we're at -rc2 we > > really should only be looking at bugfixes. > > Yup, seems a bit late for that, but early 2.6.10 would be nice if possible? > > > Grumble, mutter.. it looks like one of those "if it compiled, it works" > > things. Problem is, any time anyone touches that particular piece of the > > kernel, half the architectures stop compiing. > > I tested it - worked for me ;-) > > This is the first step to getting the arches to actually use the flexibility > we had, and stop Andi complaining the scheduler is tuned for one arch rather > than another ;-) These params definitely need to be per arch/subarch, and > probably some other ones too, but this seems like a good start. > > M. Martin, Andi, Andrew & anyone else still reading this thread, Here's yet another version of a patch to implement per-arch SD_*_INITs. This follows the same basic idea of my last patch, but 1) defines an arch-specific SD_NODE_INIT for the 4 NUMA arches (i386, x86_64, IA64 & PPC64), 2) defines *default* SD_CPU_INIT & SD_SIBLING_INIT for *all* arches, with the possibility of them being overridden by simply defining an arch-specific version in include/asm/topology.h. The motivation behind the third version of this patch is that Martin feels that there should be no "default" NUMA initializer because NUMA characteristics are *very* arch/platform specific, and hence a "default" NUMA initializer can only lead to confusion. I agree with most of that, but don't quite see as much harm in having a default as he does. Nevertheless, to keep him quiet, I've run up this version of the patch. Martin, please run this through your magic test suite and make sure I didn't break anything trivial. [mcd@arrakis source]$ diffstat ~/linux/patches/sched_domains/per_arch-SD_INIT.patch arch/ia64/kernel/domain.c | 1 include/asm-i386/topology.h | 20 +++++++++++ include/asm-ia64/processor.h | 21 ----------- include/asm-ia64/topology.h | 20 +++++++++++ include/asm-ppc64/topology.h | 20 +++++++++++ include/asm-x86_64/topology.h | 22 ++++++++++++ include/linux/sched.h | 74 +----------------------------------------- include/linux/topology.h | 72 ++++++++++++++++++++++++++++++++++++++++ 8 files changed, 156 insertions(+), 94 deletions(-) -Matt diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/arch/ia64/kernel/domain.c linux-2.6.9-rc2-mm4+per_arch-SD_INITs/arch/ia64/kernel/domain.c --- linux-2.6.9-rc2-mm4/arch/ia64/kernel/domain.c 2004-09-27 15:57:19.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/arch/ia64/kernel/domain.c 2004-09-27 17:42:59.000000000 -0700 @@ -11,7 +11,6 @@ #include <linux/cpumask.h> #include <linux/init.h> #include <linux/topology.h> -#include <asm/processor.h> #define SD_NODES_PER_DOMAIN 6 diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-i386/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-i386/topology.h --- linux-2.6.9-rc2-mm4/include/asm-i386/topology.h 2004-09-16 15:02:45.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-i386/topology.h 2004-10-01 15:06:30.000000000 -0700 @@ -72,6 +72,26 @@ static inline cpumask_t pcibus_to_cpumas /* Cross-node load balancing interval. */ #define NODE_BALANCE_RATE 100 +/* sched_domains SD_NODE_INIT for NUMAQ machines */ +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 8, \ + .max_interval = 32, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + #else /* !CONFIG_NUMA */ /* * Other i386 platforms should define their own version of the diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-ia64/processor.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/processor.h --- linux-2.6.9-rc2-mm4/include/asm-ia64/processor.h 2004-09-27 15:57:51.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/processor.h 2004-09-27 17:40:05.000000000 -0700 @@ -337,27 +337,6 @@ struct task_struct; /* Prepare to copy thread state - unlazy all lazy status */ #define prepare_to_copy(tsk) do { } while (0) -#ifdef CONFIG_NUMA -#define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 80, \ - .max_interval = 320, \ - .busy_factor = 320, \ - .imbalance_pct = 125, \ - .cache_hot_time = (10*1000000), \ - .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 10, \ - .nr_balance_failed = 0, \ -} -#endif - /* * This is the mechanism for creating a new kernel thread. * diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-ia64/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/topology.h --- linux-2.6.9-rc2-mm4/include/asm-ia64/topology.h 2004-08-13 22:36:11.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/topology.h 2004-09-30 16:06:47.000000000 -0700 @@ -45,6 +45,26 @@ void build_cpu_to_node_map(void); +/* sched_domains SD_NODE_INIT for IA64 NUMA machines */ +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 80, \ + .max_interval = 320, \ + .busy_factor = 320, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000000), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 10, \ + .nr_balance_failed = 0, \ +} + #endif /* CONFIG_NUMA */ #include <asm-generic/topology.h> diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-ppc64/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ppc64/topology.h --- linux-2.6.9-rc2-mm4/include/asm-ppc64/topology.h 2004-08-13 22:38:08.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ppc64/topology.h 2004-10-01 15:07:24.000000000 -0700 @@ -40,6 +40,26 @@ static inline int node_to_first_cpu(int /* Cross-node load balancing interval. */ #define NODE_BALANCE_RATE 10 +/* sched_domains SD_NODE_INIT for PPC64 machines */ +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 8, \ + .max_interval = 32, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} + #else /* !CONFIG_NUMA */ #include <asm-generic/topology.h> diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-x86_64/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-x86_64/topology.h --- linux-2.6.9-rc2-mm4/include/asm-x86_64/topology.h 2004-09-16 15:02:46.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-x86_64/topology.h 2004-10-01 15:07:35.000000000 -0700 @@ -34,6 +34,28 @@ static inline cpumask_t __pcibus_to_cpum #define NODE_BALANCE_RATE 30 /* CHECKME */ +#ifdef CONFIG_NUMA +/* sched_domains SD_NODE_INIT for x86_64 machines */ +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 8, \ + .max_interval = 32, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} +#endif + #endif #include <asm-generic/topology.h> diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/linux/sched.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/sched.h --- linux-2.6.9-rc2-mm4/include/linux/sched.h 2004-09-27 15:57:56.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/sched.h 2004-09-30 16:03:06.000000000 -0700 @@ -30,6 +30,7 @@ #include <linux/completion.h> #include <linux/pid.h> #include <linux/percpu.h> +#include <linux/topology.h> struct exec_domain; @@ -486,78 +487,7 @@ extern cpumask_t cpu_isolated_map; extern void init_sched_build_groups(struct sched_group groups[], cpumask_t span, int (*group_fn)(int cpu)); extern void cpu_attach_domain(struct sched_domain *sd, int cpu); -#endif - -#ifndef ARCH_HAS_SCHED_TUNE -#ifdef CONFIG_SCHED_SMT -#define ARCH_HAS_SCHED_WAKE_IDLE -/* Common values for SMT siblings */ -#define SD_SIBLING_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 1, \ - .max_interval = 2, \ - .busy_factor = 8, \ - .imbalance_pct = 110, \ - .cache_hot_time = 0, \ - .cache_nice_tries = 0, \ - .per_cpu_gain = 25, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ - | SD_SHARE_CPUPOWER, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} -#endif - -/* Common values for CPUs */ -#define SD_CPU_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 1, \ - .max_interval = 4, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_hot_time = (5*1000/2), \ - .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE \ - | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} - -#if defined(CONFIG_NUMA) && !defined(SD_NODE_INIT) -#define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 8, \ - .max_interval = 32, \ - .busy_factor = 32, \ - .imbalance_pct = 125, \ - .cache_hot_time = (10*1000), \ - .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} -#endif -#endif /* ARCH_HAS_SCHED_TUNE */ +#endif /* ARCH_HAS_SCHED_DOMAIN */ #endif /* CONFIG_SMP */ diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/linux/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/topology.h --- linux-2.6.9-rc2-mm4/include/linux/topology.h 2004-09-16 15:02:47.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/topology.h 2004-10-01 15:15:56.000000000 -0700 @@ -61,4 +61,76 @@ static inline int __next_node_with_cpus( #define PENALTY_FOR_NODE_WITH_CPUS (1) #endif +/* + * Below are the 3 major initializers used in building sched_domains: + * SD_SIBLING_INIT, for SMT domains + * SD_CPU_INIT, for SMP domains + * SD_NODE_INIT, for NUMA domains + * + * Any architecture that cares to do any tuning to these values should do so + * by defining their own arch-specific initializer in include/asm/topology.h. + * A definition there will automagically override these default initializers + * and allow arch-specific performance tuning of sched_domains. + */ +#ifdef CONFIG_SCHED_SMT +/* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is, + * so can't we drop this in favor of CONFIG_SCHED_SMT? + */ +#define ARCH_HAS_SCHED_WAKE_IDLE +/* Common values for SMT siblings */ +#ifndef SD_SIBLING_INIT +#define SD_SIBLING_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 2, \ + .busy_factor = 8, \ + .imbalance_pct = 110, \ + .cache_hot_time = 0, \ + .cache_nice_tries = 0, \ + .per_cpu_gain = 25, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_AFFINE \ + | SD_WAKE_IDLE \ + | SD_SHARE_CPUPOWER, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} +#endif +#endif /* CONFIG_SCHED_SMT */ + +/* Common values for CPUs */ +#ifndef SD_CPU_INIT +#define SD_CPU_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_hot_time = (5*1000/2), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} +#endif + +#ifdef CONFIG_NUMA +#ifndef SD_NODE_INIT +#error Please define an appropriate SD_NODE_INIT in include/asm/topology.h!!! +#endif +#endif /* CONFIG_NUMA */ + #endif /* _LINUX_TOPOLOGY_H */ ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-10-01 22:20 ` Matthew Dobson @ 2004-10-02 16:02 ` Martin J. Bligh 0 siblings, 0 replies; 12+ messages in thread From: Martin J. Bligh @ 2004-10-02 16:02 UTC (permalink / raw) To: colpatch; +Cc: Andrew Morton, Nick Piggin, LKML, Andi Kleen > Martin, Andi, Andrew & anyone else still reading this thread, > Here's yet another version of a patch to implement per-arch SD_*_INITs. > This follows the same basic idea of my last patch, but > 1) defines an arch-specific SD_NODE_INIT for the 4 NUMA arches (i386, > x86_64, IA64 & PPC64), > 2) defines *default* SD_CPU_INIT & SD_SIBLING_INIT for *all* arches, > with the possibility of them being overridden by simply defining an > arch-specific version in include/asm/topology.h. Looks good. tested. works ;-) M. ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-30 18:36 ` Matthew Dobson 2004-09-30 19:23 ` Andrew Morton @ 2004-09-30 20:45 ` Andi Kleen 2004-09-30 21:06 ` Matthew Dobson 1 sibling, 1 reply; 12+ messages in thread From: Andi Kleen @ 2004-09-30 20:45 UTC (permalink / raw) To: Matthew Dobson Cc: Nick Piggin, LKML, Andrew Morton, Martin J. Bligh, Andi Kleen On Thu, Sep 30, 2004 at 11:36:52AM -0700, Matthew Dobson wrote: > On Thu, 2004-09-30 at 01:15, Nick Piggin wrote: > > Matthew Dobson wrote: > > > IA64 already has their own version of SD_NODE_INIT, tuned for their > > > extremely large machines. I think that all arches would benefit from > > > having their own, arch-specific SD_NODE_INIT initializer, rather than > > > the one-size-fits-all variant we've got now. > > > > > > > I suppose the patch is pretty good (IIRC Martin liked the idea). > > I guess it will at least increase the incidence of copy+paste, > > if not getting people to think harder ;) > > Thanks! Martin does like the idea, and I think Andi Kleen likes the > idea of being able to tune sched_domains for x86_64, too. Any comments, > Andi? It doesn't help me directly - what i need is the same thing for SD_SIBLING_INIT for the CMP changes. But it seems I need to do some other work to properly support the K8 CMP first, so I'm defering attacking this a bit. > The patch is pretty simple. I don't think it will increase any > copy+pasting because I don't believe anyone has modified SD_NODE_INIT at > all since it's been implemented, and certainly not for many kernel > releases. I think part of the reason for that is that it is currently > impossible to tweak the values for your architecture of choice because > modifying the values now will change EVERYONE's sched_domains timings. > Which is bad. :( If anyone wants to tweak SD_NODE_INIT, they shouldn't > be copying+pasting those values to all architectures. Besides, IA64 > already gets their own SD_NODE_INIT to play with, why shouldn't everyone > else! ;) It would be nice if there was a SD_DEFAULT_NODE_INIT and a SD_DEFAULT_SIBLING_INIT in some generic file that architecture code can use as a base for tweaking. For the CMP change I currently only want to remove SD_SHAREPOWER from SIBLING_INIT to get rid of SMT nice. Later we'll probably want a SD_DEFAULT_CMP_INIT too that gives generic values for a dual core. Dual cores should be soon pretty common and tuning for them will be needed on several architectures (ppc64, ia64, x86, x86-64, sparc, parisc? ...). But figuring out good values for this will require a lot of benchmarking first. -Andi ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-30 20:45 ` Andi Kleen @ 2004-09-30 21:06 ` Matthew Dobson 2004-09-30 21:12 ` Andi Kleen 0 siblings, 1 reply; 12+ messages in thread From: Matthew Dobson @ 2004-09-30 21:06 UTC (permalink / raw) To: Andi Kleen; +Cc: Nick Piggin, LKML, Andrew Morton, Martin J. Bligh On Thu, 2004-09-30 at 13:45, Andi Kleen wrote: > On Thu, Sep 30, 2004 at 11:36:52AM -0700, Matthew Dobson wrote: > > On Thu, 2004-09-30 at 01:15, Nick Piggin wrote: > > > Matthew Dobson wrote: > > > > IA64 already has their own version of SD_NODE_INIT, tuned for their > > > > extremely large machines. I think that all arches would benefit from > > > > having their own, arch-specific SD_NODE_INIT initializer, rather than > > > > the one-size-fits-all variant we've got now. > > > > > > > > > > I suppose the patch is pretty good (IIRC Martin liked the idea). > > > I guess it will at least increase the incidence of copy+paste, > > > if not getting people to think harder ;) > > > > Thanks! Martin does like the idea, and I think Andi Kleen likes the > > idea of being able to tune sched_domains for x86_64, too. Any comments, > > Andi? > > It doesn't help me directly - what i need is the same thing > for SD_SIBLING_INIT for the CMP changes. > > But it seems I need to do some other work to properly support the K8 > CMP first, so I'm defering attacking this a bit. I see... Martin was under the impression you were looking to tweak the SD_NODE_INIT values. I'd really like to see all 3 initializers become per-arch. Siblings and CPUs are going to behave differently on different platforms. The idea that a P3 in a NUMAQ box will perform optimally with the same SD_CPU_INIT values as a Power5 CPU or an Opteron is just silly. But, I figured this would be a baby step in the right direction, and doing only for NUMA architectures minimizes the number of affected machines. If this works well, I would do the same with SD_SIBLING_INIT and SD_CPU_INIT. > > The patch is pretty simple. I don't think it will increase any > > copy+pasting because I don't believe anyone has modified SD_NODE_INIT at > > all since it's been implemented, and certainly not for many kernel > > releases. I think part of the reason for that is that it is currently > > impossible to tweak the values for your architecture of choice because > > modifying the values now will change EVERYONE's sched_domains timings. > > Which is bad. :( If anyone wants to tweak SD_NODE_INIT, they shouldn't > > be copying+pasting those values to all architectures. Besides, IA64 > > already gets their own SD_NODE_INIT to play with, why shouldn't everyone > > else! ;) > > It would be nice if there was a SD_DEFAULT_NODE_INIT and a > SD_DEFAULT_SIBLING_INIT in some generic > file that architecture code can use as a base for tweaking. > For the CMP change I currently only want to remove SD_SHAREPOWER > from SIBLING_INIT to get rid of SMT nice. Well, you can certainly base the x86_64 CMP values on the current SD_SIBLING_INIT values. Those are well publicized, see include/linux/sched.h! ;) > Later we'll probably want a SD_DEFAULT_CMP_INIT too that gives > generic values for a dual core. Dual cores should be soon pretty > common and tuning for them will be needed on several architectures > (ppc64, ia64, x86, x86-64, sparc, parisc? ...). But figuring out good > values for this will require a lot of benchmarking first. > > -Andi I suppose it would be pretty trivial to define defaults in include/asm-generic/topology.h, and allow arches that care to define their own SD_*_INITs without disrupting anyone else. Actually, that's far better than what I've got now. I'll run that patch up after the meeting I'm currently late for and post it in a couple hours. And I agree that LOTS of benchmarking will be required to find the optimal values for these fields. -Matt ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-30 21:06 ` Matthew Dobson @ 2004-09-30 21:12 ` Andi Kleen 2004-09-30 23:47 ` Matthew Dobson 0 siblings, 1 reply; 12+ messages in thread From: Andi Kleen @ 2004-09-30 21:12 UTC (permalink / raw) To: Matthew Dobson Cc: Andi Kleen, Nick Piggin, LKML, Andrew Morton, Martin J. Bligh > Well, you can certainly base the x86_64 CMP values on the current > SD_SIBLING_INIT values. Those are well publicized, see > include/linux/sched.h! ;) Current BK has it in kernel/sched.c. And it also broke NUMA kernels on UP, but that's a different issue. > I suppose it would be pretty trivial to define defaults in > include/asm-generic/topology.h, and allow arches that care to define > their own SD_*_INITs without disrupting anyone else. Actually, that's > far better than what I've got now. I'll run that patch up after the > meeting I'm currently late for and post it in a couple hours. Full override isn't good imho because it could lead to bit rot, better is to have defaults that can be used as a base, but tweaked. -Andi ^ permalink raw reply [flat|nested] 12+ messages in thread
* Re: [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch 2004-09-30 21:12 ` Andi Kleen @ 2004-09-30 23:47 ` Matthew Dobson 0 siblings, 0 replies; 12+ messages in thread From: Matthew Dobson @ 2004-09-30 23:47 UTC (permalink / raw) To: Andi Kleen; +Cc: Nick Piggin, LKML, Andrew Morton, Martin J. Bligh On Thu, 2004-09-30 at 14:12, Andi Kleen wrote: > > Well, you can certainly base the x86_64 CMP values on the current > > SD_SIBLING_INIT values. Those are well publicized, see > > include/linux/sched.h! ;) > > Current BK has it in kernel/sched.c. Fair enough. I was thinking about the -mm tree. :) > And it also broke NUMA kernels on UP, but that's a different issue. What broke NUMA kernels on UP? Are you talking about the cpu_online_map vs. cpu_possible_map thing from a little bit ago? > > I suppose it would be pretty trivial to define defaults in > > include/asm-generic/topology.h, and allow arches that care to define > > their own SD_*_INITs without disrupting anyone else. Actually, that's > > far better than what I've got now. I'll run that patch up after the > > meeting I'm currently late for and post it in a couple hours. > > Full override isn't good imho because it could lead to bit rot, > better is to have defaults that can be used as a base, but tweaked. I'm not sure why it would lead to bit rot? Because every arch would define their own initializers and not use the generic ones? If so, we could always rip them out... I doubt that will happen, since I don't foresee most arches caring enough to set up custom initializers. Especially since no one has done it yet, and some of the bigger arches really need to. I'm also not quite sure what you mean about using one common set definitions as a base? How would you tweak a generic SD_NODE_INIT initializer without overriding it? Here's a smaller patch (only compile tested) to implement this in a much better way. What it does is: 1) Rip SD_*_INIT definitions out of linux/sched.h and move them into linux/topology.h and have linux/sched.h include linux/topology.h 2) Move IA64's arch-specific SD_NODE_INIT definition from asm/processor.h to asm/topology.h. This way, all an architecture has to do to set up their own arch-specific initializers is define them in asm/topology.h. It makes it totally trivial for an arch to set this up without changing or breaking anyone else's values. [mcd@arrakis source]$ diffstat ~/linux/patches/sched_domains/per_arch-SD_INIT.patch arch/ia64/kernel/domain.c | 1 include/asm-ia64/processor.h | 21 ----------- include/asm-ia64/topology.h | 20 +++++++++++ include/linux/sched.h | 74 +--------------------------------------- include/linux/topology.h | 78 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 100 insertions(+), 94 deletions(-) -Matt diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/arch/ia64/kernel/domain.c linux-2.6.9-rc2-mm4+per_arch-SD_INITs/arch/ia64/kernel/domain.c --- linux-2.6.9-rc2-mm4/arch/ia64/kernel/domain.c 2004-09-27 15:57:19.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/arch/ia64/kernel/domain.c 2004-09-27 17:42:59.000000000 -0700 @@ -11,7 +11,6 @@ #include <linux/cpumask.h> #include <linux/init.h> #include <linux/topology.h> -#include <asm/processor.h> #define SD_NODES_PER_DOMAIN 6 diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-ia64/processor.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/processor.h --- linux-2.6.9-rc2-mm4/include/asm-ia64/processor.h 2004-09-27 15:57:51.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/processor.h 2004-09-27 17:40:05.000000000 -0700 @@ -337,27 +337,6 @@ struct task_struct; /* Prepare to copy thread state - unlazy all lazy status */ #define prepare_to_copy(tsk) do { } while (0) -#ifdef CONFIG_NUMA -#define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 80, \ - .max_interval = 320, \ - .busy_factor = 320, \ - .imbalance_pct = 125, \ - .cache_hot_time = (10*1000000), \ - .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 10, \ - .nr_balance_failed = 0, \ -} -#endif - /* * This is the mechanism for creating a new kernel thread. * diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/asm-ia64/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/topology.h --- linux-2.6.9-rc2-mm4/include/asm-ia64/topology.h 2004-08-13 22:36:11.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/asm-ia64/topology.h 2004-09-30 16:06:47.000000000 -0700 @@ -45,6 +45,26 @@ void build_cpu_to_node_map(void); +/* sched_domains SD_NODE_INIT for IA64 NUMA machines */ +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 80, \ + .max_interval = 320, \ + .busy_factor = 320, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000000), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 10, \ + .nr_balance_failed = 0, \ +} + #endif /* CONFIG_NUMA */ #include <asm-generic/topology.h> diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/linux/sched.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/sched.h --- linux-2.6.9-rc2-mm4/include/linux/sched.h 2004-09-27 15:57:56.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/sched.h 2004-09-30 16:03:06.000000000 -0700 @@ -30,6 +30,7 @@ #include <linux/completion.h> #include <linux/pid.h> #include <linux/percpu.h> +#include <linux/topology.h> struct exec_domain; @@ -486,78 +487,7 @@ extern cpumask_t cpu_isolated_map; extern void init_sched_build_groups(struct sched_group groups[], cpumask_t span, int (*group_fn)(int cpu)); extern void cpu_attach_domain(struct sched_domain *sd, int cpu); -#endif - -#ifndef ARCH_HAS_SCHED_TUNE -#ifdef CONFIG_SCHED_SMT -#define ARCH_HAS_SCHED_WAKE_IDLE -/* Common values for SMT siblings */ -#define SD_SIBLING_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 1, \ - .max_interval = 2, \ - .busy_factor = 8, \ - .imbalance_pct = 110, \ - .cache_hot_time = 0, \ - .cache_nice_tries = 0, \ - .per_cpu_gain = 25, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE \ - | SD_WAKE_IDLE \ - | SD_SHARE_CPUPOWER, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} -#endif - -/* Common values for CPUs */ -#define SD_CPU_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 1, \ - .max_interval = 4, \ - .busy_factor = 64, \ - .imbalance_pct = 125, \ - .cache_hot_time = (5*1000/2), \ - .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_NEWIDLE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_AFFINE \ - | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} - -#if defined(CONFIG_NUMA) && !defined(SD_NODE_INIT) -#define SD_NODE_INIT (struct sched_domain) { \ - .span = CPU_MASK_NONE, \ - .parent = NULL, \ - .groups = NULL, \ - .min_interval = 8, \ - .max_interval = 32, \ - .busy_factor = 32, \ - .imbalance_pct = 125, \ - .cache_hot_time = (10*1000), \ - .cache_nice_tries = 1, \ - .per_cpu_gain = 100, \ - .flags = SD_LOAD_BALANCE \ - | SD_BALANCE_EXEC \ - | SD_WAKE_BALANCE, \ - .last_balance = jiffies, \ - .balance_interval = 1, \ - .nr_balance_failed = 0, \ -} -#endif -#endif /* ARCH_HAS_SCHED_TUNE */ +#endif /* ARCH_HAS_SCHED_DOMAIN */ #endif /* CONFIG_SMP */ diff -Nurp --exclude-from=/home/mcd/.dontdiff linux-2.6.9-rc2-mm4/include/linux/topology.h linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/topology.h --- linux-2.6.9-rc2-mm4/include/linux/topology.h 2004-09-16 15:02:47.000000000 -0700 +++ linux-2.6.9-rc2-mm4+per_arch-SD_INITs/include/linux/topology.h 2004-09-30 16:27:43.000000000 -0700 @@ -61,4 +61,82 @@ static inline int __next_node_with_cpus( #define PENALTY_FOR_NODE_WITH_CPUS (1) #endif +#ifdef CONFIG_SCHED_SMT +/* MCD - Do we really need this? It is always on if CONFIG_SCHED_SMT is, + * so can't we drop this in favor of CONFIG_SCHED_SMT? + */ +#define ARCH_HAS_SCHED_WAKE_IDLE +/* Common values for SMT siblings */ +#ifndef SD_SIBLING_INIT +#define SD_SIBLING_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 2, \ + .busy_factor = 8, \ + .imbalance_pct = 110, \ + .cache_hot_time = 0, \ + .cache_nice_tries = 0, \ + .per_cpu_gain = 25, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_AFFINE \ + | SD_WAKE_IDLE \ + | SD_SHARE_CPUPOWER, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} +#endif +#endif /* CONFIG_SCHED_SMT */ + +/* Common values for CPUs */ +#ifndef SD_CPU_INIT +#define SD_CPU_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 1, \ + .max_interval = 4, \ + .busy_factor = 64, \ + .imbalance_pct = 125, \ + .cache_hot_time = (5*1000/2), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_NEWIDLE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_AFFINE \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} +#endif + +#ifdef CONFIG_NUMA +#ifndef SD_NODE_INIT +#define SD_NODE_INIT (struct sched_domain) { \ + .span = CPU_MASK_NONE, \ + .parent = NULL, \ + .groups = NULL, \ + .min_interval = 8, \ + .max_interval = 32, \ + .busy_factor = 32, \ + .imbalance_pct = 125, \ + .cache_hot_time = (10*1000), \ + .cache_nice_tries = 1, \ + .per_cpu_gain = 100, \ + .flags = SD_LOAD_BALANCE \ + | SD_BALANCE_EXEC \ + | SD_WAKE_BALANCE, \ + .last_balance = jiffies, \ + .balance_interval = 1, \ + .nr_balance_failed = 0, \ +} +#endif +#endif /* CONFIG_NUMA */ + #endif /* _LINUX_TOPOLOGY_H */ ^ permalink raw reply [flat|nested] 12+ messages in thread
end of thread, other threads:[~2004-10-02 16:03 UTC | newest] Thread overview: 12+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2004-09-29 1:12 [RFC PATCH] sched_domains: Make SD_NODE_INIT per-arch Matthew Dobson 2004-09-30 8:15 ` Nick Piggin 2004-09-30 18:36 ` Matthew Dobson 2004-09-30 19:23 ` Andrew Morton 2004-09-30 20:20 ` Matthew Dobson 2004-10-01 6:15 ` Martin J. Bligh 2004-10-01 22:20 ` Matthew Dobson 2004-10-02 16:02 ` Martin J. Bligh 2004-09-30 20:45 ` Andi Kleen 2004-09-30 21:06 ` Matthew Dobson 2004-09-30 21:12 ` Andi Kleen 2004-09-30 23:47 ` Matthew Dobson
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox