public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [patch 2/2] cpu hotplug notifier for updating sched domains
@ 2004-09-07 18:50 nathanl
  2004-09-08  0:44 ` Nick Piggin
  0 siblings, 1 reply; 7+ messages in thread
From: nathanl @ 2004-09-07 18:50 UTC (permalink / raw)
  To: linux-kernel; +Cc: akpm, rusty, piggin, nathanl


Register a cpu hotplug notifier which reinitializes the scheduler
domains hierarchy.  The notifier temporarily attaches all running cpus
to a "dummy" domain (like we currently do during boot) to avoid
balancing.  It then calls arch_init_sched_domains which rebuilds the
"real" domains and reattaches the cpus to them.

Also change __init attributes to __devinit where necessary.

Signed-off-by: Nathan Lynch <nathanl@austin.ibm.com>


---


diff -puN kernel/sched.c~sched-domains-cpu-hotplug-notifier kernel/sched.c
--- 2.6.9-rc1-bk14/kernel/sched.c~sched-domains-cpu-hotplug-notifier	2004-09-07 13:32:50.000000000 -0500
+++ 2.6.9-rc1-bk14-nathanl/kernel/sched.c	2004-09-07 13:34:38.000000000 -0500
@@ -4191,7 +4191,10 @@ spinlock_t kernel_flag __cacheline_align
 EXPORT_SYMBOL(kernel_flag);
 
 #ifdef CONFIG_SMP
-/* Attach the domain 'sd' to 'cpu' as its base domain */
+/*
+ * Attach the domain 'sd' to 'cpu' as its base domain.  Callers must
+ * hold the cpu_control_semaphore.
+ */
 static void cpu_attach_domain(struct sched_domain *sd, int cpu)
 {
 	migration_req_t req;
@@ -4199,8 +4202,6 @@ static void cpu_attach_domain(struct sch
 	runqueue_t *rq = cpu_rq(cpu);
 	int local = 1;
 
-	lock_cpu_hotplug();
-
 	spin_lock_irqsave(&rq->lock, flags);
 
 	if (cpu == smp_processor_id() || !cpu_online(cpu)) {
@@ -4219,8 +4220,6 @@ static void cpu_attach_domain(struct sch
 		wake_up_process(rq->migration_thread);
 		wait_for_completion(&req.done);
 	}
-
-	unlock_cpu_hotplug();
 }
 
 #ifdef CONFIG_NUMA
@@ -4234,7 +4233,7 @@ static void cpu_attach_domain(struct sch
  *
  * Should use nodemask_t.
  */
-static int __init find_next_best_node(int node, unsigned long *used_nodes)
+static int __devinit find_next_best_node(int node, unsigned long *used_nodes)
 {
 	int i, n, val, min_val, best_node = 0;
 
@@ -4270,7 +4269,7 @@ static int __init find_next_best_node(in
  * should be one that prevents unnecessary balancing, but also spreads tasks
  * out optimally.
  */
-cpumask_t __init sched_domain_node_span(int node, int size)
+cpumask_t __devinit sched_domain_node_span(int node, int size)
 {
 	int i;
 	cpumask_t span;
@@ -4294,7 +4293,7 @@ cpumask_t __init sched_domain_node_span(
 #ifdef CONFIG_SCHED_SMT
 static DEFINE_PER_CPU(struct sched_domain, cpu_domains);
 static struct sched_group sched_group_cpus[NR_CPUS];
-__init static int cpu_to_cpu_group(int cpu)
+static int __devinit cpu_to_cpu_group(int cpu)
 {
 	return cpu;
 }
@@ -4302,7 +4301,7 @@ __init static int cpu_to_cpu_group(int c
 
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
 static struct sched_group sched_group_phys[NR_CPUS];
-__init static int cpu_to_phys_group(int cpu)
+static int __devinit cpu_to_phys_group(int cpu)
 {
 #ifdef CONFIG_SCHED_SMT
 	return first_cpu(cpu_sibling_map[cpu]);
@@ -4318,7 +4317,7 @@ __init static int cpu_to_phys_group(int 
 
 static DEFINE_PER_CPU(struct sched_domain, node_domains);
 static struct sched_group sched_group_nodes[MAX_NUMNODES];
-__init static int cpu_to_node_group(int cpu)
+static int __devinit cpu_to_node_group(int cpu)
 {
 	return cpu_to_node(cpu);
 }
@@ -4328,9 +4327,9 @@ __init static int cpu_to_node_group(int 
 static struct sched_group sched_group_isolated[NR_CPUS];
 
 /* cpus with isolated domains */
-cpumask_t __initdata cpu_isolated_map = CPU_MASK_NONE;
+cpumask_t __devinitdata cpu_isolated_map = CPU_MASK_NONE;
 
-__init static int cpu_to_isolated_group(int cpu)
+static int __devinit cpu_to_isolated_group(int cpu)
 {
 	return cpu;
 }
@@ -4360,7 +4359,7 @@ __setup ("isolcpus=", isolated_cpu_setup
  * covered by the given span, and will set each group's ->cpumask correctly,
  * and ->cpu_power to 0.
  */
-__init static void init_sched_build_groups(struct sched_group groups[],
+static void __devinit init_sched_build_groups(struct sched_group groups[],
 			cpumask_t span, int (*group_fn)(int cpu))
 {
 	struct sched_group *first = NULL, *last = NULL;
@@ -4394,7 +4393,11 @@ __init static void init_sched_build_grou
 	last->next = first;
 }
 
-__init static void arch_init_sched_domains(cpumask_t cpu_default_map)
+/*
+ * Set up scheduler domains and groups.  Callers must hold the cpucontrol
+ * semaphore.
+ */
+static void __devinit arch_init_sched_domains(cpumask_t cpu_default_map)
 {
 	int i;
 	/*
@@ -4630,10 +4633,61 @@ void sched_domain_debug(void)
 #define sched_domain_debug() {}
 #endif
 
+#ifdef CONFIG_SMP
+/* Initial dummy domain for early boot and for hotplug cpu */
+static struct sched_domain sched_domain_dummy;
+static struct sched_group sched_group_dummy;
+#endif
+
+#ifdef CONFIG_HOTPLUG_CPU
+/*
+ * Force a reinitialization of the sched domains hierarchy.  The domains
+ * and groups cannot be updated in place without racing with the balancing
+ * code, so we temporarily attach all running cpus to a "dummy" domain
+ * which will prevent rebalancing while the sched domains are recalculated.
+ */
+static int update_sched_domains(struct notifier_block *nfb,
+				unsigned long action, void *hcpu)
+{
+	cpumask_t new_cpu_map = cpu_online_map;
+	int i, cpu = (long)hcpu;
+
+	switch (action) {
+	case CPU_UP_PREPARE:
+		cpu_set(cpu, new_cpu_map);
+		break;
+	case CPU_UP_CANCELED: /* fall through */
+	case CPU_DEAD:
+		/*
+		 * No need to clear the dead cpu's bit, it has
+		 * already been marked offline.
+		 */
+		cpu_attach_domain(&sched_domain_dummy, cpu);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	for_each_online_cpu(i)
+		cpu_attach_domain(&sched_domain_dummy, i);
+
+	/* The cpucontrol semaphore is already held by cpu_up/cpu_down */
+	arch_init_sched_domains(new_cpu_map);
+
+	sched_domain_debug();
+
+	return NOTIFY_OK;
+}
+#endif
+
 void __init sched_init_smp(void)
 {
+	lock_cpu_hotplug();
 	arch_init_sched_domains(cpu_online_map);
 	sched_domain_debug();
+	unlock_cpu_hotplug();
+
+	hotcpu_notifier(update_sched_domains, 0);
 }
 #else
 void __init sched_init_smp(void)
@@ -4656,20 +4710,18 @@ void __init sched_init(void)
 
 #ifdef CONFIG_SMP
 	/* Set up an initial dummy domain for early boot */
-	static struct sched_domain sched_domain_init;
-	static struct sched_group sched_group_init;
 
-	memset(&sched_domain_init, 0, sizeof(struct sched_domain));
-	sched_domain_init.span = CPU_MASK_ALL;
-	sched_domain_init.groups = &sched_group_init;
-	sched_domain_init.last_balance = jiffies;
-	sched_domain_init.balance_interval = INT_MAX; /* Don't balance */
-	sched_domain_init.busy_factor = 1;
-
-	memset(&sched_group_init, 0, sizeof(struct sched_group));
-	sched_group_init.cpumask = CPU_MASK_ALL;
-	sched_group_init.next = &sched_group_init;
-	sched_group_init.cpu_power = SCHED_LOAD_SCALE;
+	memset(&sched_domain_dummy, 0, sizeof(struct sched_domain));
+	sched_domain_dummy.span = CPU_MASK_ALL;
+	sched_domain_dummy.groups = &sched_group_dummy;
+	sched_domain_dummy.last_balance = jiffies;
+	sched_domain_dummy.balance_interval = INT_MAX; /* Don't balance */
+	sched_domain_dummy.busy_factor = 1;
+
+	memset(&sched_group_dummy, 0, sizeof(struct sched_group));
+	sched_group_dummy.cpumask = CPU_MASK_ALL;
+	sched_group_dummy.next = &sched_group_dummy;
+	sched_group_dummy.cpu_power = SCHED_LOAD_SCALE;
 #endif
 
 	for (i = 0; i < NR_CPUS; i++) {
@@ -4682,7 +4734,7 @@ void __init sched_init(void)
 		rq->best_expired_prio = MAX_PRIO;
 
 #ifdef CONFIG_SMP
-		rq->sd = &sched_domain_init;
+		rq->sd = &sched_domain_dummy;
 		rq->cpu_load = 0;
 		rq->active_balance = 0;
 		rq->push_cpu = 0;

_

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] cpu hotplug notifier for updating sched domains
  2004-09-07 18:50 [patch 2/2] cpu hotplug notifier for updating sched domains nathanl
@ 2004-09-08  0:44 ` Nick Piggin
  2004-09-08  2:05   ` Nathan Lynch
  0 siblings, 1 reply; 7+ messages in thread
From: Nick Piggin @ 2004-09-08  0:44 UTC (permalink / raw)
  To: nathanl; +Cc: linux-kernel, akpm, rusty, Ingo Molnar



nathanl@austin.ibm.com wrote:

>Register a cpu hotplug notifier which reinitializes the scheduler
>domains hierarchy.  The notifier temporarily attaches all running cpus
>to a "dummy" domain (like we currently do during boot) to avoid
>balancing.  It then calls arch_init_sched_domains which rebuilds the
>"real" domains and reattaches the cpus to them.
>
>Also change __init attributes to __devinit where necessary.
>
>Signed-off-by: Nathan Lynch <nathanl@austin.ibm.com>
>

Thanks Nathan, this looks great.

I think the next step is to now make the setup code only use cpu_online_map
and get rid of everywhere I had been doing cpus_and(tmp, ..., 
cpu_online_map).
This may also make your patch 1/2 unnecessary? What do you think?

Nick


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] cpu hotplug notifier for updating sched domains
  2004-09-08  0:44 ` Nick Piggin
@ 2004-09-08  2:05   ` Nathan Lynch
  2004-09-08  2:19     ` Nick Piggin
  0 siblings, 1 reply; 7+ messages in thread
From: Nathan Lynch @ 2004-09-08  2:05 UTC (permalink / raw)
  To: Nick Piggin; +Cc: lkml, Andrew Morton, Rusty Russell, Ingo Molnar

On Tue, 2004-09-07 at 19:44, Nick Piggin wrote:
> I think the next step is to now make the setup code only use cpu_online_map
> and get rid of everywhere I had been doing cpus_and(tmp, ..., 
> cpu_online_map).
> This may also make your patch 1/2 unnecessary? What do you think?

Well, we have to "lie" to arch_init_sched_domains a little bit when
bringing a cpu online, by setting the soon-to-be-online cpu's bit in the
argument mask.  So I think the first patch is still necessary.

Nathan



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] cpu hotplug notifier for updating sched domains
  2004-09-08  2:05   ` Nathan Lynch
@ 2004-09-08  2:19     ` Nick Piggin
  2004-09-08  2:55       ` Nathan Lynch
  2004-09-09 10:18       ` Rusty Russell
  0 siblings, 2 replies; 7+ messages in thread
From: Nick Piggin @ 2004-09-08  2:19 UTC (permalink / raw)
  To: Nathan Lynch; +Cc: lkml, Andrew Morton, Rusty Russell, Ingo Molnar



Nathan Lynch wrote:

>On Tue, 2004-09-07 at 19:44, Nick Piggin wrote:
>
>>I think the next step is to now make the setup code only use cpu_online_map
>>

^^^ OK I see you've already done that. Sorry I should have looked at the
patches a bit closer. Your implementation looks very nice.

>>and get rid of everywhere I had been doing cpus_and(tmp, ..., 
>>cpu_online_map).
>>

^^^ This should still be done, of course. That can come later.

>>This may also make your patch 1/2 unnecessary? What do you think?
>>
>
>Well, we have to "lie" to arch_init_sched_domains a little bit when
>bringing a cpu online, by setting the soon-to-be-online cpu's bit in the
>argument mask.  So I think the first patch is still necessary.
>
>

Can't we do everything in the CPU_UP_ONLINE case though?


One other thing:

void __init sched_init_smp(void)
 {
+	lock_cpu_hotplug();
 	arch_init_sched_domains(cpu_online_map);
 	sched_domain_debug();
+	unlock_cpu_hotplug();
+
+	hotcpu_notifier(update_sched_domains, 0);
 }

Do you have a theoretical race here? Can we hotplug a CPU before the notifier
is registered? (I know we *can't* because it is still earlyish boot).

Can you move hotcpu_notifier under the cpu_hotplug lock? Seems not because
register_cpu_notifier takes the lock itself. Seems like a flaw in the API to
me. Probably the notifier chain should be protected by a lock that nests
inside the cpucontrol lock. Rusty?



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] cpu hotplug notifier for updating sched domains
  2004-09-08  2:19     ` Nick Piggin
@ 2004-09-08  2:55       ` Nathan Lynch
  2004-09-09 10:18       ` Rusty Russell
  1 sibling, 0 replies; 7+ messages in thread
From: Nathan Lynch @ 2004-09-08  2:55 UTC (permalink / raw)
  To: Nick Piggin; +Cc: lkml, Andrew Morton, Rusty Russell, Ingo Molnar

On Tue, 2004-09-07 at 21:19, Nick Piggin wrote:
> Nathan Lynch wrote:
> 
> >On Tue, 2004-09-07 at 19:44, Nick Piggin wrote:
> >>and get rid of everywhere I had been doing cpus_and(tmp, ..., 
> >>cpu_online_map).
> >>
> 
> ^^^ This should still be done, of course. That can come later.
> 
> >>This may also make your patch 1/2 unnecessary? What do you think?
> >>
> >
> >Well, we have to "lie" to arch_init_sched_domains a little bit when
> >bringing a cpu online, by setting the soon-to-be-online cpu's bit in the
> >argument mask.  So I think the first patch is still necessary.
> >
> >
> 
> Can't we do everything in the CPU_UP_ONLINE case though?

I guess I overlooked that possibility.  I'll give it a try.  If it works
ok, the first patch can go.

Nathan


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] cpu hotplug notifier for updating sched domains
  2004-09-08  2:19     ` Nick Piggin
  2004-09-08  2:55       ` Nathan Lynch
@ 2004-09-09 10:18       ` Rusty Russell
  2004-09-09 10:31         ` Nick Piggin
  1 sibling, 1 reply; 7+ messages in thread
From: Rusty Russell @ 2004-09-09 10:18 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Nathan Lynch, lkml - Kernel Mailing List, Andrew Morton,
	Ingo Molnar

On Wed, 2004-09-08 at 12:19, Nick Piggin wrote:
> Nathan Lynch wrote:
> >Well, we have to "lie" to arch_init_sched_domains a little bit when
> >bringing a cpu online, by setting the soon-to-be-online cpu's bit in the
> >argument mask.  So I think the first patch is still necessary.

I'm still a little surprised that you don't change the domains while the
machine is frozen (ie in take_cpu_down()).  This should avoid any races,
since noone can be looking at the domains at this time.

> Do you have a theoretical race here? Can we hotplug a CPU before the notifier
> is registered? (I know we *can't* because it is still earlyish boot).

No, init has so many serial assumptions that this is the least of our
worries.

Rusty.
-- 
Anyone who quotes me in their signature is an idiot -- Rusty Russell


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [patch 2/2] cpu hotplug notifier for updating sched domains
  2004-09-09 10:18       ` Rusty Russell
@ 2004-09-09 10:31         ` Nick Piggin
  0 siblings, 0 replies; 7+ messages in thread
From: Nick Piggin @ 2004-09-09 10:31 UTC (permalink / raw)
  To: Rusty Russell
  Cc: Nick Piggin, Nathan Lynch, lkml - Kernel Mailing List,
	Andrew Morton, Ingo Molnar

Rusty Russell wrote:
> On Wed, 2004-09-08 at 12:19, Nick Piggin wrote:

>>Do you have a theoretical race here? Can we hotplug a CPU before the notifier
>>is registered? (I know we *can't* because it is still earlyish boot).
> 
> 
> No, init has so many serial assumptions that this is the least of our
> worries.
> 

No. But the point is, you cannot (easily) set something up like this:

"do some setup with this specific, valid cpu_online_map";
"register notifier so we can keep everything valid";

without a race between them. Protecting the notifier chain under a
different lock is the trivial fix. I'll send the patch if you'd
like?

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2004-09-09 10:48 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2004-09-07 18:50 [patch 2/2] cpu hotplug notifier for updating sched domains nathanl
2004-09-08  0:44 ` Nick Piggin
2004-09-08  2:05   ` Nathan Lynch
2004-09-08  2:19     ` Nick Piggin
2004-09-08  2:55       ` Nathan Lynch
2004-09-09 10:18       ` Rusty Russell
2004-09-09 10:31         ` Nick Piggin

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox