From: Jesse Barnes <jbarnes@engr.sgi.com>
To: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: linux-kernel@vger.kernel.org, linux-ia64@vger.kernel.org,
John Hawkes <hawkes@sgi.com>, Ingo Molnar <mingo@elte.hu>
Subject: Re: [PATCH] add scheduler domains for ia64
Date: Tue, 17 Aug 2004 20:57:32 +0000 [thread overview]
Message-ID: <200408171657.32357.jbarnes@engr.sgi.com> (raw)
In-Reply-To: <411EB463.5090809@yahoo.com.au>
[-- Attachment #1: Type: text/plain, Size: 903 bytes --]
On Saturday, August 14, 2004 8:54 pm, Nick Piggin wrote:
> Yeah, all the SD_*_INIT values are overridable. We could even say, put
> in an SD_NODE2_INIT for a 2nd level NUMA domain in the generic code,
> for example.
Yeah, we'll need different values for each level in the hierarchy.
> I'd say your closest-node setup would probably get close to what you want.
> The main thing you want is to not do huge amounts of balancing work in
> interrupt context, and also not to move a task from one side of the
> system to the other when one node is a little bit out of balance.
>
> I guess if you want to do anything fancier then we can take a look at
> re-exporting the domain setup.
Ok, sounds good. How does this look? It sits on top of 2.6.8.1-mm1, ripping
out the ia64 specific bits and moving things to sched.c. I've also added an
ia64 specific SD_NODE_INIT and an #if !defined to sched.c
Jesse
[-- Attachment #2: node-span.patch --]
[-- Type: text/x-diff, Size: 6020 bytes --]
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/arch/ia64/kernel/smpboot.c linux-2.6.8.1-mm1.nodespan/arch/ia64/kernel/smpboot.c
--- linux-2.6.8.1-mm1/arch/ia64/kernel/smpboot.c 2004-08-17 13:41:43.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/arch/ia64/kernel/smpboot.c 2004-08-17 13:34:28.000000000 -0700
@@ -707,69 +707,3 @@ init_smp_config(void)
ia64_sal_strerror(sal_ret));
}
-#ifdef CONFIG_NUMA
-
-/**
- * find_next_best_node - find the next node to include in a sched_domain
- * @node: node whose sched_domain we're building
- * @used_nodes: nodes already in the sched_domain
- *
- * Find the next node to include in a given scheduling domain. Simply
- * finds the closest node not already in the @used_nodes map.
- *
- * Should use nodemask_t.
- */
-static int __init find_next_best_node(int node, unsigned long *used_nodes)
-{
- int i, n, val, min_val, best_node = 0;
-
- min_val = INT_MAX;
-
- for (i = 0; i < numnodes; i++) {
- /* Start at @node */
- n = (node + i) % numnodes;
-
- /* Skip already used nodes */
- if (test_bit(n, used_nodes))
- continue;
-
- /* Simple min distance search */
- val = node_distance(node, i);
-
- if (val < min_val) {
- min_val = val;
- best_node = n;
- }
- }
-
- set_bit(best_node, used_nodes);
- return best_node;
-}
-
-/**
- * sched_domain_node_span - get a cpumask for a node's sched_domain
- * @node: node whose cpumask we're constructing
- * @size: number of nodes to include in this span
- *
- * Given a node, construct a good cpumask for its sched_domain to span. It
- * should be one that prevents unnecessary balancing, but also spreads tasks
- * out optimally.
- */
-cpumask_t __init sched_domain_node_span(int node, int size)
-{
- int i;
- cpumask_t span;
- DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
-
- cpus_clear(span);
- bitmap_zero(used_nodes, MAX_NUMNODES);
-
- for (i = 0; i < size; i++) {
- int next_node = find_next_best_node(node, used_nodes);
- cpus_or(span, span, node_to_cpumask(next_node));
- }
-
- return span;
-}
-#endif /* CONFIG_NUMA */
-
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/include/asm-ia64/processor.h linux-2.6.8.1-mm1.nodespan/include/asm-ia64/processor.h
--- linux-2.6.8.1-mm1/include/asm-ia64/processor.h 2004-08-17 13:41:22.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/include/asm-ia64/processor.h 2004-08-17 13:37:13.000000000 -0700
@@ -335,8 +335,23 @@ struct task_struct;
#define prepare_to_copy(tsk) do { } while (0)
#ifdef CONFIG_NUMA
-/* smpboot.c defines a numa specific scheduler domain routine */
-#define ARCH_HAS_SCHED_DOMAIN
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 80, \
+ .max_interval = 320, \
+ .busy_factor = 320, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 10, \
+ .nr_balance_failed = 0, \
+}
#endif
/*
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/kernel/sched.c linux-2.6.8.1-mm1.nodespan/kernel/sched.c
--- linux-2.6.8.1-mm1/kernel/sched.c 2004-08-17 13:41:37.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/kernel/sched.c 2004-08-17 13:43:36.000000000 -0700
@@ -401,7 +401,8 @@ struct sched_domain {
.nr_balance_failed = 0, \
}
-#ifdef CONFIG_NUMA
+/* Arch can override this macro in processor.h */
+#if defined(CONFIG_NUMA) && !defined(SD_NODE_INIT)
/* Common values for NUMA nodes */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
@@ -2218,10 +2219,8 @@ static void active_load_balance(runqueue
for_each_domain(busiest_cpu, sd)
if (cpu_isset(busiest->push_cpu, sd->span))
break;
- if (!sd) {
- WARN_ON(1);
+ if (!sd)
return;
- }
group = sd->groups;
while (!cpu_isset(busiest_cpu, group->cpumask))
@@ -4121,15 +4120,74 @@ static void cpu_attach_domain(struct sch
}
#ifdef CONFIG_NUMA
-#ifdef ARCH_HAS_SCHED_DOMAIN
-extern cpumask_t __init sched_domain_node_span(int node, int size);
-#else
+/**
+ * find_next_best_node - find the next node to include in a sched_domain
+ * @node: node whose sched_domain we're building
+ * @used_nodes: nodes already in the sched_domain
+ *
+ * Find the next node to include in a given scheduling domain. Simply
+ * finds the closest node not already in the @used_nodes map.
+ *
+ * Should use nodemask_t.
+ */
+static int __init find_next_best_node(int node, unsigned long *used_nodes)
+{
+ int i, n, val, min_val, best_node = 0;
+
+ min_val = INT_MAX;
+
+ for (i = 0; i < numnodes; i++) {
+ /* Start at @node */
+ n = (node + i) % numnodes;
+
+ /* Skip already used nodes */
+ if (test_bit(n, used_nodes))
+ continue;
+
+ /* Simple min distance search */
+ val = node_distance(node, i);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ set_bit(best_node, used_nodes);
+ return best_node;
+}
+
+/**
+ * sched_domain_node_span - get a cpumask for a node's sched_domain
+ * @node: node whose cpumask we're constructing
+ * @size: number of nodes to include in this span
+ *
+ * Given a node, construct a good cpumask for its sched_domain to span. It
+ * should be one that prevents unnecessary balancing, but also spreads tasks
+ * out optimally.
+ */
+cpumask_t __init sched_domain_node_span(int node, int size)
+{
+ int i;
+ cpumask_t span;
+ DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
+
+ cpus_clear(span);
+ bitmap_zero(used_nodes, MAX_NUMNODES);
+
+ for (i = 0; i < size; i++) {
+ int next_node = find_next_best_node(node, used_nodes);
+ cpus_or(span, span, node_to_cpumask(next_node));
+ }
+
+ return span;
+}
+#else /* !CONFIG_NUMA */
static cpumask_t __init sched_domain_node_span(int node, int size)
{
return cpu_possible_map;
}
-#endif /* ARCH_HAS_SCHED_DOMAIN */
-#endif
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
WARNING: multiple messages have this Message-ID (diff)
From: Jesse Barnes <jbarnes@engr.sgi.com>
To: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: linux-kernel@vger.kernel.org, linux-ia64@vger.kernel.org,
John Hawkes <hawkes@sgi.com>, Ingo Molnar <mingo@elte.hu>
Subject: Re: [PATCH] add scheduler domains for ia64
Date: Tue, 17 Aug 2004 16:57:32 -0400 [thread overview]
Message-ID: <200408171657.32357.jbarnes@engr.sgi.com> (raw)
In-Reply-To: <411EB463.5090809@yahoo.com.au>
[-- Attachment #1: Type: text/plain, Size: 903 bytes --]
On Saturday, August 14, 2004 8:54 pm, Nick Piggin wrote:
> Yeah, all the SD_*_INIT values are overridable. We could even say, put
> in an SD_NODE2_INIT for a 2nd level NUMA domain in the generic code,
> for example.
Yeah, we'll need different values for each level in the hierarchy.
> I'd say your closest-node setup would probably get close to what you want.
> The main thing you want is to not do huge amounts of balancing work in
> interrupt context, and also not to move a task from one side of the
> system to the other when one node is a little bit out of balance.
>
> I guess if you want to do anything fancier then we can take a look at
> re-exporting the domain setup.
Ok, sounds good. How does this look? It sits on top of 2.6.8.1-mm1, ripping
out the ia64 specific bits and moving things to sched.c. I've also added an
ia64 specific SD_NODE_INIT and an #if !defined to sched.c
Jesse
[-- Attachment #2: node-span.patch --]
[-- Type: text/x-diff, Size: 6020 bytes --]
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/arch/ia64/kernel/smpboot.c linux-2.6.8.1-mm1.nodespan/arch/ia64/kernel/smpboot.c
--- linux-2.6.8.1-mm1/arch/ia64/kernel/smpboot.c 2004-08-17 13:41:43.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/arch/ia64/kernel/smpboot.c 2004-08-17 13:34:28.000000000 -0700
@@ -707,69 +707,3 @@ init_smp_config(void)
ia64_sal_strerror(sal_ret));
}
-#ifdef CONFIG_NUMA
-
-/**
- * find_next_best_node - find the next node to include in a sched_domain
- * @node: node whose sched_domain we're building
- * @used_nodes: nodes already in the sched_domain
- *
- * Find the next node to include in a given scheduling domain. Simply
- * finds the closest node not already in the @used_nodes map.
- *
- * Should use nodemask_t.
- */
-static int __init find_next_best_node(int node, unsigned long *used_nodes)
-{
- int i, n, val, min_val, best_node = 0;
-
- min_val = INT_MAX;
-
- for (i = 0; i < numnodes; i++) {
- /* Start at @node */
- n = (node + i) % numnodes;
-
- /* Skip already used nodes */
- if (test_bit(n, used_nodes))
- continue;
-
- /* Simple min distance search */
- val = node_distance(node, i);
-
- if (val < min_val) {
- min_val = val;
- best_node = n;
- }
- }
-
- set_bit(best_node, used_nodes);
- return best_node;
-}
-
-/**
- * sched_domain_node_span - get a cpumask for a node's sched_domain
- * @node: node whose cpumask we're constructing
- * @size: number of nodes to include in this span
- *
- * Given a node, construct a good cpumask for its sched_domain to span. It
- * should be one that prevents unnecessary balancing, but also spreads tasks
- * out optimally.
- */
-cpumask_t __init sched_domain_node_span(int node, int size)
-{
- int i;
- cpumask_t span;
- DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
-
- cpus_clear(span);
- bitmap_zero(used_nodes, MAX_NUMNODES);
-
- for (i = 0; i < size; i++) {
- int next_node = find_next_best_node(node, used_nodes);
- cpus_or(span, span, node_to_cpumask(next_node));
- }
-
- return span;
-}
-#endif /* CONFIG_NUMA */
-
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/include/asm-ia64/processor.h linux-2.6.8.1-mm1.nodespan/include/asm-ia64/processor.h
--- linux-2.6.8.1-mm1/include/asm-ia64/processor.h 2004-08-17 13:41:22.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/include/asm-ia64/processor.h 2004-08-17 13:37:13.000000000 -0700
@@ -335,8 +335,23 @@ struct task_struct;
#define prepare_to_copy(tsk) do { } while (0)
#ifdef CONFIG_NUMA
-/* smpboot.c defines a numa specific scheduler domain routine */
-#define ARCH_HAS_SCHED_DOMAIN
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 80, \
+ .max_interval = 320, \
+ .busy_factor = 320, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 10, \
+ .nr_balance_failed = 0, \
+}
#endif
/*
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/kernel/sched.c linux-2.6.8.1-mm1.nodespan/kernel/sched.c
--- linux-2.6.8.1-mm1/kernel/sched.c 2004-08-17 13:41:37.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/kernel/sched.c 2004-08-17 13:43:36.000000000 -0700
@@ -401,7 +401,8 @@ struct sched_domain {
.nr_balance_failed = 0, \
}
-#ifdef CONFIG_NUMA
+/* Arch can override this macro in processor.h */
+#if defined(CONFIG_NUMA) && !defined(SD_NODE_INIT)
/* Common values for NUMA nodes */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
@@ -2218,10 +2219,8 @@ static void active_load_balance(runqueue
for_each_domain(busiest_cpu, sd)
if (cpu_isset(busiest->push_cpu, sd->span))
break;
- if (!sd) {
- WARN_ON(1);
+ if (!sd)
return;
- }
group = sd->groups;
while (!cpu_isset(busiest_cpu, group->cpumask))
@@ -4121,15 +4120,74 @@ static void cpu_attach_domain(struct sch
}
#ifdef CONFIG_NUMA
-#ifdef ARCH_HAS_SCHED_DOMAIN
-extern cpumask_t __init sched_domain_node_span(int node, int size);
-#else
+/**
+ * find_next_best_node - find the next node to include in a sched_domain
+ * @node: node whose sched_domain we're building
+ * @used_nodes: nodes already in the sched_domain
+ *
+ * Find the next node to include in a given scheduling domain. Simply
+ * finds the closest node not already in the @used_nodes map.
+ *
+ * Should use nodemask_t.
+ */
+static int __init find_next_best_node(int node, unsigned long *used_nodes)
+{
+ int i, n, val, min_val, best_node = 0;
+
+ min_val = INT_MAX;
+
+ for (i = 0; i < numnodes; i++) {
+ /* Start at @node */
+ n = (node + i) % numnodes;
+
+ /* Skip already used nodes */
+ if (test_bit(n, used_nodes))
+ continue;
+
+ /* Simple min distance search */
+ val = node_distance(node, i);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ set_bit(best_node, used_nodes);
+ return best_node;
+}
+
+/**
+ * sched_domain_node_span - get a cpumask for a node's sched_domain
+ * @node: node whose cpumask we're constructing
+ * @size: number of nodes to include in this span
+ *
+ * Given a node, construct a good cpumask for its sched_domain to span. It
+ * should be one that prevents unnecessary balancing, but also spreads tasks
+ * out optimally.
+ */
+cpumask_t __init sched_domain_node_span(int node, int size)
+{
+ int i;
+ cpumask_t span;
+ DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
+
+ cpus_clear(span);
+ bitmap_zero(used_nodes, MAX_NUMNODES);
+
+ for (i = 0; i < size; i++) {
+ int next_node = find_next_best_node(node, used_nodes);
+ cpus_or(span, span, node_to_cpumask(next_node));
+ }
+
+ return span;
+}
+#else /* !CONFIG_NUMA */
static cpumask_t __init sched_domain_node_span(int node, int size)
{
return cpu_possible_map;
}
-#endif /* ARCH_HAS_SCHED_DOMAIN */
-#endif
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
next prev parent reply other threads:[~2004-08-17 20:57 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-08-13 18:08 [PATCH] add scheduler domains for ia64 Jesse Barnes
2004-08-13 18:08 ` Jesse Barnes
2004-08-14 3:23 ` Nick Piggin
2004-08-14 3:23 ` Nick Piggin
2004-08-14 20:52 ` Jesse Barnes
2004-08-14 20:52 ` Jesse Barnes
2004-08-15 0:54 ` Nick Piggin
2004-08-15 0:54 ` Nick Piggin
2004-08-17 20:57 ` Jesse Barnes [this message]
2004-08-17 20:57 ` Jesse Barnes
2004-08-20 2:11 ` Nick Piggin
2004-08-20 2:11 ` Nick Piggin
2004-08-20 2:22 ` Jesse Barnes
2004-08-20 2:22 ` Jesse Barnes
2004-08-20 6:28 ` Andrew Morton
2004-08-20 6:28 ` Andrew Morton
2004-08-20 14:57 ` Jesse Barnes
2004-08-20 14:57 ` Jesse Barnes
2004-08-20 8:06 ` Ingo Molnar
2004-08-20 8:06 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200408171657.32357.jbarnes@engr.sgi.com \
--to=jbarnes@engr.sgi.com \
--cc=hawkes@sgi.com \
--cc=linux-ia64@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=nickpiggin@yahoo.com.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.