From: Jesse Barnes <jbarnes@engr.sgi.com>
To: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: linux-kernel@vger.kernel.org, linux-ia64@vger.kernel.org,
John Hawkes <hawkes@sgi.com>, Ingo Molnar <mingo@elte.hu>
Subject: Re: [PATCH] add scheduler domains for ia64
Date: Tue, 17 Aug 2004 20:57:32 +0000 [thread overview]
Message-ID: <200408171657.32357.jbarnes@engr.sgi.com> (raw)
In-Reply-To: <411EB463.5090809@yahoo.com.au>
[-- Attachment #1: Type: text/plain, Size: 903 bytes --]
On Saturday, August 14, 2004 8:54 pm, Nick Piggin wrote:
> Yeah, all the SD_*_INIT values are overridable. We could even say, put
> in an SD_NODE2_INIT for a 2nd level NUMA domain in the generic code,
> for example.
Yeah, we'll need different values for each level in the hierarchy.
> I'd say your closest-node setup would probably get close to what you want.
> The main thing you want is to not do huge amounts of balancing work in
> interrupt context, and also not to move a task from one side of the
> system to the other when one node is a little bit out of balance.
>
> I guess if you want to do anything fancier then we can take a look at
> re-exporting the domain setup.
Ok, sounds good. How does this look? It sits on top of 2.6.8.1-mm1, ripping
out the ia64 specific bits and moving things to sched.c. I've also added an
ia64 specific SD_NODE_INIT and an #if !defined to sched.c
Jesse
[-- Attachment #2: node-span.patch --]
[-- Type: text/x-diff, Size: 6020 bytes --]
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/arch/ia64/kernel/smpboot.c linux-2.6.8.1-mm1.nodespan/arch/ia64/kernel/smpboot.c
--- linux-2.6.8.1-mm1/arch/ia64/kernel/smpboot.c 2004-08-17 13:41:43.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/arch/ia64/kernel/smpboot.c 2004-08-17 13:34:28.000000000 -0700
@@ -707,69 +707,3 @@ init_smp_config(void)
ia64_sal_strerror(sal_ret));
}
-#ifdef CONFIG_NUMA
-
-/**
- * find_next_best_node - find the next node to include in a sched_domain
- * @node: node whose sched_domain we're building
- * @used_nodes: nodes already in the sched_domain
- *
- * Find the next node to include in a given scheduling domain. Simply
- * finds the closest node not already in the @used_nodes map.
- *
- * Should use nodemask_t.
- */
-static int __init find_next_best_node(int node, unsigned long *used_nodes)
-{
- int i, n, val, min_val, best_node = 0;
-
- min_val = INT_MAX;
-
- for (i = 0; i < numnodes; i++) {
- /* Start at @node */
- n = (node + i) % numnodes;
-
- /* Skip already used nodes */
- if (test_bit(n, used_nodes))
- continue;
-
- /* Simple min distance search */
- val = node_distance(node, i);
-
- if (val < min_val) {
- min_val = val;
- best_node = n;
- }
- }
-
- set_bit(best_node, used_nodes);
- return best_node;
-}
-
-/**
- * sched_domain_node_span - get a cpumask for a node's sched_domain
- * @node: node whose cpumask we're constructing
- * @size: number of nodes to include in this span
- *
- * Given a node, construct a good cpumask for its sched_domain to span. It
- * should be one that prevents unnecessary balancing, but also spreads tasks
- * out optimally.
- */
-cpumask_t __init sched_domain_node_span(int node, int size)
-{
- int i;
- cpumask_t span;
- DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
-
- cpus_clear(span);
- bitmap_zero(used_nodes, MAX_NUMNODES);
-
- for (i = 0; i < size; i++) {
- int next_node = find_next_best_node(node, used_nodes);
- cpus_or(span, span, node_to_cpumask(next_node));
- }
-
- return span;
-}
-#endif /* CONFIG_NUMA */
-
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/include/asm-ia64/processor.h linux-2.6.8.1-mm1.nodespan/include/asm-ia64/processor.h
--- linux-2.6.8.1-mm1/include/asm-ia64/processor.h 2004-08-17 13:41:22.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/include/asm-ia64/processor.h 2004-08-17 13:37:13.000000000 -0700
@@ -335,8 +335,23 @@ struct task_struct;
#define prepare_to_copy(tsk) do { } while (0)
#ifdef CONFIG_NUMA
-/* smpboot.c defines a numa specific scheduler domain routine */
-#define ARCH_HAS_SCHED_DOMAIN
+#define SD_NODE_INIT (struct sched_domain) { \
+ .span = CPU_MASK_NONE, \
+ .parent = NULL, \
+ .groups = NULL, \
+ .min_interval = 80, \
+ .max_interval = 320, \
+ .busy_factor = 320, \
+ .imbalance_pct = 125, \
+ .cache_hot_time = (10*1000000), \
+ .cache_nice_tries = 1, \
+ .per_cpu_gain = 100, \
+ .flags = SD_BALANCE_EXEC \
+ | SD_WAKE_BALANCE, \
+ .last_balance = jiffies, \
+ .balance_interval = 10, \
+ .nr_balance_failed = 0, \
+}
#endif
/*
diff -Napur -X /home/jbarnes/dontdiff linux-2.6.8.1-mm1/kernel/sched.c linux-2.6.8.1-mm1.nodespan/kernel/sched.c
--- linux-2.6.8.1-mm1/kernel/sched.c 2004-08-17 13:41:37.000000000 -0700
+++ linux-2.6.8.1-mm1.nodespan/kernel/sched.c 2004-08-17 13:43:36.000000000 -0700
@@ -401,7 +401,8 @@ struct sched_domain {
.nr_balance_failed = 0, \
}
-#ifdef CONFIG_NUMA
+/* Arch can override this macro in processor.h */
+#if defined(CONFIG_NUMA) && !defined(SD_NODE_INIT)
/* Common values for NUMA nodes */
#define SD_NODE_INIT (struct sched_domain) { \
.span = CPU_MASK_NONE, \
@@ -2218,10 +2219,8 @@ static void active_load_balance(runqueue
for_each_domain(busiest_cpu, sd)
if (cpu_isset(busiest->push_cpu, sd->span))
break;
- if (!sd) {
- WARN_ON(1);
+ if (!sd)
return;
- }
group = sd->groups;
while (!cpu_isset(busiest_cpu, group->cpumask))
@@ -4121,15 +4120,74 @@ static void cpu_attach_domain(struct sch
}
#ifdef CONFIG_NUMA
-#ifdef ARCH_HAS_SCHED_DOMAIN
-extern cpumask_t __init sched_domain_node_span(int node, int size);
-#else
+/**
+ * find_next_best_node - find the next node to include in a sched_domain
+ * @node: node whose sched_domain we're building
+ * @used_nodes: nodes already in the sched_domain
+ *
+ * Find the next node to include in a given scheduling domain. Simply
+ * finds the closest node not already in the @used_nodes map.
+ *
+ * Should use nodemask_t.
+ */
+static int __init find_next_best_node(int node, unsigned long *used_nodes)
+{
+ int i, n, val, min_val, best_node = 0;
+
+ min_val = INT_MAX;
+
+ for (i = 0; i < numnodes; i++) {
+ /* Start at @node */
+ n = (node + i) % numnodes;
+
+ /* Skip already used nodes */
+ if (test_bit(n, used_nodes))
+ continue;
+
+ /* Simple min distance search */
+ val = node_distance(node, i);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ set_bit(best_node, used_nodes);
+ return best_node;
+}
+
+/**
+ * sched_domain_node_span - get a cpumask for a node's sched_domain
+ * @node: node whose cpumask we're constructing
+ * @size: number of nodes to include in this span
+ *
+ * Given a node, construct a good cpumask for its sched_domain to span. It
+ * should be one that prevents unnecessary balancing, but also spreads tasks
+ * out optimally.
+ */
+cpumask_t __init sched_domain_node_span(int node, int size)
+{
+ int i;
+ cpumask_t span;
+ DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
+
+ cpus_clear(span);
+ bitmap_zero(used_nodes, MAX_NUMNODES);
+
+ for (i = 0; i < size; i++) {
+ int next_node = find_next_best_node(node, used_nodes);
+ cpus_or(span, span, node_to_cpumask(next_node));
+ }
+
+ return span;
+}
+#else /* !CONFIG_NUMA */
static cpumask_t __init sched_domain_node_span(int node, int size)
{
return cpu_possible_map;
}
-#endif /* ARCH_HAS_SCHED_DOMAIN */
-#endif
+#endif /* CONFIG_NUMA */
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
next prev parent reply other threads:[~2004-08-17 20:57 UTC|newest]
Thread overview: 10+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-08-13 18:08 [PATCH] add scheduler domains for ia64 Jesse Barnes
2004-08-14 3:23 ` Nick Piggin
2004-08-14 20:52 ` Jesse Barnes
2004-08-15 0:54 ` Nick Piggin
2004-08-17 20:57 ` Jesse Barnes [this message]
2004-08-20 2:11 ` Nick Piggin
2004-08-20 2:22 ` Jesse Barnes
2004-08-20 6:28 ` Andrew Morton
2004-08-20 14:57 ` Jesse Barnes
2004-08-20 8:06 ` Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200408171657.32357.jbarnes@engr.sgi.com \
--to=jbarnes@engr.sgi.com \
--cc=hawkes@sgi.com \
--cc=linux-ia64@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=nickpiggin@yahoo.com.au \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox