From: Jesse Barnes <jbarnes@engr.sgi.com>
To: Nick Piggin <nickpiggin@yahoo.com.au>
Cc: Dimitri Sivanich <sivanich@sgi.com>,
"Siddha, Suresh B" <suresh.b.siddha@intel.com>,
Andrew Morton <akpm@osdl.org>, Anton Blanchard <anton@samba.org>,
Andi Kleen <ak@suse.de>, Ingo Molnar <mingo@elte.hu>,
linux-kernel <linux-kernel@vger.kernel.org>,
John Hawkes <hawkes@sgi.com>
Subject: Re: [PATCH] consolidate sched domains
Date: Tue, 27 Jul 2004 09:15:40 -0700 [thread overview]
Message-ID: <200407270915.40600.jbarnes@engr.sgi.com> (raw)
In-Reply-To: <4105CBD9.7080209@yahoo.com.au>
[-- Attachment #1: Type: text/plain, Size: 717 bytes --]
On Monday, July 26, 2004 8:28 pm, Nick Piggin wrote:
> You'll also want Jack Steiner's one liner. (I've sent all these to Andrew.)
Including the consolidation patch?
> Looks pretty neat. It may even be usable in the generic setup code if more
> architectures start needing it.
>
> For now, put it in your arch code when it is ready to be merged up of
> course.
> I would be very interested to see what sort of performance improvements you
> get out of the scheduler...
Ok, this new patch has no effect on platforms that don't define
ARCH_HAS_SCHED_DOMAIN, but changes the arch specific callback. I didn't want
to duplicate all the code you just ripped out, but if you think that's best I
can...
Thanks,
Jesse
[-- Attachment #2: sched-domain-node-span-4.patch --]
[-- Type: text/plain, Size: 4213 bytes --]
===== arch/ia64/kernel/smpboot.c 1.55 vs edited =====
--- 1.55/arch/ia64/kernel/smpboot.c 2004-06-04 02:21:54 -07:00
+++ edited/arch/ia64/kernel/smpboot.c 2004-07-27 09:09:42 -07:00
@@ -719,3 +719,70 @@
printk(KERN_ERR "SMP: Can't set SAL AP Boot Rendezvous: %s\n",
ia64_sal_strerror(sal_ret));
}
+
+#ifdef CONFIG_NUMA
+
+/**
+ * find_next_best_node - find the next node to include in a sched_domain
+ * @node: node whose sched_domain we're building
+ * @used_nodes: nodes already in the sched_domain
+ *
+ * Find the next node to include in a given scheduling domain. Simply
+ * finds the closest node not already in the @used_nodes map.
+ *
+ * Should use nodemask_t.
+ */
+static int __init find_next_best_node(int node, unsigned long *used_nodes)
+{
+ int i, n, val, min_val, best_node = 0;
+
+ min_val = INT_MAX;
+
+ for (i = 0; i < numnodes; i++) {
+ /* Start at @node */
+ n = (node + i) % numnodes;
+
+ /* Skip already used nodes */
+ if (test_bit(n, used_nodes))
+ continue;
+
+ /* Simple min distance search */
+ val = node_distance(node, i);
+
+ if (val < min_val) {
+ min_val = val;
+ best_node = n;
+ }
+ }
+
+ set_bit(best_node, used_nodes);
+ return best_node;
+}
+
+/**
+ * sched_domain_node_span - get a cpumask for a node's sched_domain
+ * @node: node whose cpumask we're constructing
+ * @size: number of nodes to include in this span
+ *
+ * Given a node, construct a good cpumask for its sched_domain to span. It
+ * should be one that prevents unnecessary balancing, but also spreads tasks
+ * out optimally.
+ */
+cpumask_t __init sched_domain_node_span(int node, int size)
+{
+ int i;
+ cpumask_t span;
+ DECLARE_BITMAP(used_nodes, MAX_NUMNODES);
+
+ cpus_clear(span);
+ bitmap_zero(used_nodes, MAX_NUMNODES);
+
+ for (i = 0; i < size; i++) {
+ int next_node = find_next_best_node(node, used_nodes);
+ cpus_or(span, span, node_to_cpumask(next_node));
+ }
+
+ return span;
+}
+#endif /* CONFIG_NUMA */
+
===== include/asm-ia64/processor.h 1.60 vs edited =====
--- 1.60/include/asm-ia64/processor.h 2004-06-04 18:14:13 -07:00
+++ edited/include/asm-ia64/processor.h 2004-07-27 09:07:15 -07:00
@@ -335,6 +335,11 @@
/* Prepare to copy thread state - unlazy all lazy status */
#define prepare_to_copy(tsk) do { } while (0)
+#ifdef CONFIG_NUMA
+/* smpboot.c defines a numa specific scheduler domain routine */
+#define ARCH_HAS_SCHED_DOMAIN
+#endif
+
/*
* This is the mechanism for creating a new kernel thread.
*
===== kernel/sched.c 1.318 vs edited =====
--- 1.318/kernel/sched.c 2004-07-27 08:55:58 -07:00
+++ edited/kernel/sched.c 2004-07-27 09:10:22 -07:00
@@ -3692,8 +3692,13 @@
}
#ifdef ARCH_HAS_SCHED_DOMAIN
-extern void __init arch_init_sched_domains(void);
+extern cpumask_t __init sched_domain_node_span(int node, int size);
#else
+static cpumask_t __init sched_domain_node_span(int node, int size)
+{
+ return cpu_possible_map;
+}
+#endif /* ARCH_HAS_SCHED_DOMAIN */
#ifdef CONFIG_SCHED_SMT
static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
@@ -3708,10 +3713,18 @@
static struct sched_group sched_group_phys[NR_CPUS];
__init static int cpu_to_phys_group(int cpu)
{
+#ifdef CONFIG_SCHED_SMT
return first_cpu(cpu_sibling_map[cpu]);
+#else
+ return cpu;
+#endif
}
#ifdef CONFIG_NUMA
+
+/* Number of nearby nodes in a node's scheduling domain */
+#define SD_NODES_PER_DOMAIN 4
+
static DEFINE_PER_CPU(struct sched_domain, node_domains);
static struct sched_group sched_group_nodes[MAX_NUMNODES];
__init static int cpu_to_node_group(int cpu)
@@ -3779,7 +3792,8 @@
sd = &per_cpu(node_domains, i);
group = cpu_to_node_group(i);
*sd = SD_NODE_INIT;
- sd->span = cpu_possible_map;
+ /* FIXME: should be multilevel, in arch code */
+ sd->span = sched_domain_node_span(i, SD_NODES_PER_DOMAIN);
sd->groups = &sched_group_nodes[group];
#endif
@@ -3847,6 +3861,8 @@
sd->groups->cpu_power = power;
#ifdef CONFIG_NUMA
+ if (i != first_cpu(sd->groups->cpumask))
+ continue;
sd = &per_cpu(node_domains, i);
sd->groups->cpu_power += power;
#endif
@@ -3863,7 +3879,6 @@
cpu_attach_domain(sd, i);
}
}
-#endif /* ARCH_HAS_SCHED_DOMAIN */
#define SCHED_DOMAIN_DEBUG
#ifdef SCHED_DOMAIN_DEBUG
next prev parent reply other threads:[~2004-07-27 16:22 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2004-07-23 3:18 [PATCH] consolidate sched domains Nick Piggin
2004-07-23 5:31 ` Ingo Molnar
2004-07-23 15:30 ` Dimitri Sivanich
2004-07-23 21:50 ` Siddha, Suresh B
2004-07-24 3:09 ` Nick Piggin
2004-07-26 2:22 ` Dimitri Sivanich
2004-07-26 4:05 ` Nick Piggin
2004-07-26 18:06 ` Jesse Barnes
2004-07-27 3:28 ` Nick Piggin
2004-07-27 16:15 ` Jesse Barnes [this message]
2004-07-28 1:08 ` Nick Piggin
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=200407270915.40600.jbarnes@engr.sgi.com \
--to=jbarnes@engr.sgi.com \
--cc=ak@suse.de \
--cc=akpm@osdl.org \
--cc=anton@samba.org \
--cc=hawkes@sgi.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=nickpiggin@yahoo.com.au \
--cc=sivanich@sgi.com \
--cc=suresh.b.siddha@intel.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox