public inbox for linux-ia64@vger.kernel.org
 help / color / mirror / Atom feed
From: Jesse Barnes <jbarnes@engr.sgi.com>
To: linux-ia64@vger.kernel.org
Subject: [PATCH] top level scheduler domain for ia64
Date: Tue, 19 Oct 2004 21:27:27 +0000	[thread overview]
Message-ID: <200410191427.27336.jbarnes@engr.sgi.com> (raw)

[-- Attachment #1: Type: text/plain, Size: 810 bytes --]

Some have noticed that the overlapping sched domains code doesn't quite work 
as intended (it results in disjoint domains on some machines), and that a top 
level, machine spanning domain is needed.  This patch from John Hawkes adds 
it to the ia64 code.  This allows processes to run on all CPUs in large 
systems, though balancing is limited.  It should go to Linus soon now 
otherwise large systems will only have ~16p (depending on topology) usable by 
the scheduler.  I sanity checked it on a small system after rediffing John's 
original, and he's done some testing on very large systems.

Nick, can you buy off on the sched.c change?  Alternatively, do you want to 
send that fix separately John?

Signed-off-by: Jesse Barnes <jbarnes@sgi.com>
Signed-off-by: John Hawkes <hawkes@sgi.com>

Thanks,
Jesse

[-- Attachment #2: sched-domains-top-level-3.patch --]
[-- Type: text/plain, Size: 3203 bytes --]

===== arch/ia64/kernel/domain.c 1.3 vs edited =====
--- 1.3/arch/ia64/kernel/domain.c	2004-10-18 22:26:51 -07:00
+++ edited/arch/ia64/kernel/domain.c	2004-10-19 14:18:07 -07:00
@@ -119,6 +119,14 @@
  */
 static DEFINE_PER_CPU(struct sched_domain, node_domains);
 static struct sched_group *sched_group_nodes[MAX_NUMNODES];
+
+static DEFINE_PER_CPU(struct sched_domain, allnodes_domains);
+static struct sched_group sched_group_allnodes[MAX_NUMNODES];
+
+static int __devinit cpu_to_allnodes_group(int cpu)
+{
+	return cpu_to_node(cpu);
+}
 #endif
 
 /*
@@ -149,9 +157,21 @@
 		cpus_and(nodemask, nodemask, cpu_default_map);
 
 #ifdef CONFIG_NUMA
+		if (num_online_cpus()
+				> SD_NODES_PER_DOMAIN*cpus_weight(nodemask)) {
+			sd = &per_cpu(allnodes_domains, i);
+			*sd = SD_ALLNODES_INIT;
+			sd->span = cpu_default_map;
+			group = cpu_to_allnodes_group(i);
+			sd->groups = &sched_group_allnodes[group];
+			p = sd;
+		} else
+			p = NULL;
+
 		sd = &per_cpu(node_domains, i);
 		*sd = SD_NODE_INIT;
 		sd->span = sched_domain_node_span(node);
+		sd->parent = p;
 		cpus_and(sd->span, sd->span, cpu_default_map);
 #endif
 
@@ -201,6 +221,9 @@
 	}
 
 #ifdef CONFIG_NUMA
+	init_sched_build_groups(sched_group_allnodes, cpu_default_map,
+				&cpu_to_allnodes_group);
+
 	for (i = 0; i < MAX_NUMNODES; i++) {
 		/* Set up node groups */
 		struct sched_group *sg, *prev;
@@ -282,6 +305,15 @@
 		power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
 				(cpus_weight(sd->groups->cpumask)-1) / 10;
 		sd->groups->cpu_power = power;
+
+#ifdef CONFIG_NUMA
+		sd = &per_cpu(allnodes_domains, i);
+		if (sd->groups) {
+			power = SCHED_LOAD_SCALE + SCHED_LOAD_SCALE *
+				(cpus_weight(sd->groups->cpumask)-1) / 10;
+			sd->groups->cpu_power = power;
+		}
+#endif
 	}
 
 #ifdef CONFIG_NUMA
===== include/asm-ia64/topology.h 1.12 vs edited =====
--- 1.12/include/asm-ia64/topology.h	2004-10-18 22:26:52 -07:00
+++ edited/include/asm-ia64/topology.h	2004-10-19 14:18:06 -07:00
@@ -58,7 +58,26 @@
 				| SD_BALANCE_EXEC	\
 				| SD_WAKE_BALANCE,	\
 	.last_balance		= jiffies,		\
-	.balance_interval	= 10,			\
+	.balance_interval	= 1,			\
+	.nr_balance_failed	= 0,			\
+}
+
+/* sched_domains SD_ALLNODES_INIT for IA64 NUMA machines */
+#define SD_ALLNODES_INIT (struct sched_domain) {	\
+	.span			= CPU_MASK_NONE,	\
+	.parent			= NULL,			\
+	.groups			= NULL,			\
+	.min_interval		= 80,			\
+	.max_interval		= 320,			\
+	.busy_factor		= 320,			\
+	.imbalance_pct		= 125,			\
+	.cache_hot_time		= (10*1000000),		\
+	.cache_nice_tries	= 1,			\
+	.per_cpu_gain		= 100,			\
+	.flags			= SD_LOAD_BALANCE	\
+				| SD_BALANCE_EXEC,	\
+	.last_balance		= jiffies,		\
+	.balance_interval	= 100*(63+num_online_cpus())/64,   \
 	.nr_balance_failed	= 0,			\
 }
 
===== kernel/sched.c 1.367 vs edited =====
--- 1.367/kernel/sched.c	2004-10-18 22:26:52 -07:00
+++ edited/kernel/sched.c	2004-10-19 14:18:06 -07:00
@@ -4378,11 +4378,10 @@
 			printk("domain %d: ", level);
 
 			if (!(sd->flags & SD_LOAD_BALANCE)) {
-				printk("does not balance");
+				printk("does not load-balance");
 				if (sd->parent)
 					printk(" ERROR !SD_LOAD_BALANCE domain has parent");
 				printk("\n");
-				break;
 			}
 
 			printk("span %s\n", str);

             reply	other threads:[~2004-10-19 21:27 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2004-10-19 21:27 Jesse Barnes [this message]
2004-10-20  0:02 ` [PATCH] top level scheduler domain for ia64 Nick Piggin
2004-10-20 17:48 ` Luck, Tony
2004-10-20 18:02 ` Nick Piggin
2004-10-20 18:03 ` Jesse Barnes
2004-10-21 14:11 ` Xavier Bru
2004-10-21 14:34 ` Nick Piggin
2004-10-28  9:29 ` Takayoshi Kochi
2004-10-28 15:26 ` Jesse Barnes
2004-11-01  6:35 ` Takayoshi Kochi
2004-11-01 17:07 ` Jesse Barnes
2004-11-01 17:16 ` Matthew Wilcox
2004-11-01 18:36 ` Jesse Barnes
2004-11-01 18:53 ` Luck, Tony
2004-11-01 19:02 ` Jesse Barnes
2004-11-01 19:45 ` Luck, Tony
2004-11-01 22:39 ` Jesse Barnes
2004-11-02  0:12 ` Zou, Nanhai
2004-11-02  7:36 ` Takayoshi Kochi
2004-11-02  8:48 ` Gerald Pfeifer
2004-11-02  9:31 ` Takayoshi Kochi
2004-11-02 21:31 ` Luck, Tony
2004-11-03  6:15 ` Takayoshi Kochi
2004-11-03 16:22 ` Jesse Barnes
2004-11-03 16:57 ` Luck, Tony
2004-11-03 17:04 ` Jesse Barnes
2004-11-08 17:31 ` John Hawkes

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=200410191427.27336.jbarnes@engr.sgi.com \
    --to=jbarnes@engr.sgi.com \
    --cc=linux-ia64@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox