public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2.6.16-mm1 2/2] sched_domains: Allocate sched_groups dynamically
@ 2006-03-25  8:28 Srivatsa Vaddagiri
  2006-03-27 21:18 ` Siddha, Suresh B
  2006-07-07  0:01 ` Paul Jackson
  0 siblings, 2 replies; 7+ messages in thread
From: Srivatsa Vaddagiri @ 2006-03-25  8:28 UTC (permalink / raw)
  To: Nick Piggin, Ingo Molnar, pj
  Cc: hawkes, Dinakar Guniguntala, Andrew Morton, linux-kernel,
	suresh.b.siddha

I hit a kernel lockup problem while making a CPUset exclusive. This was
also reported before:

	http://lkml.org/lkml/2006/3/16/128  (Ref 1)

Dinakar (CCed here) said that it may be similar to the problem reported
here:
	
	http://lkml.org/lkml/2005/8/20/40   (Ref 2)

Upon code inspection, I found that it is indeed a similar problem for
SCHED_SMT case. The problem is recreated if SCHED_SMT is enabled in the
kernel, and one tries to make a exclusive CPUset with just one thread in
it. This cause the two threads of the same CPU to be in in different
dynamic sched domains, although they share the same sched_group
structure at 'phys_domains' level. This will lead to the same kind of
corruption described in Ref 2 above.

Patch below, against 2.6.16-mm1, fixes the problem for both SCHED_SMT and 
SCHED_MC cases.

I have tested the patch on a 8-way (with HT) Intel Xeon machine and
found that the lockups I was facing earlier went away with the patch.

I couldn't test this on a multi-core machine, since I don't think we
have one in our lab.

Suresh, would you mind testing the patch on a multi-core machine, in case you 
have access to one?

Basically you would need to do create a exclusive CPUset with one CPU in it 
(ensure that its sibling in the same core is not part of the same
CPUset). As soon as you make the CPUset exclusive, you would hit some
kind of hang. With this patch, the hang should go away.



Signed-off-by: Srivatsa Vaddagiri <vatsa@in.ibm.com>



 kernel/sched.c |   67 +++++++++++++++++++++++++++++++++++++++++++++++++--------
 1 files changed, 58 insertions(+), 9 deletions(-)

diff -puN kernel/sched.c~sd_dynschedgroups kernel/sched.c
--- linux-2.6.16-mm1/kernel/sched.c~sd_dynschedgroups	2006-03-25 11:45:05.000000000 +0530
+++ linux-2.6.16-mm1-root/kernel/sched.c	2006-03-25 13:30:01.000000000 +0530
@@ -5884,7 +5884,7 @@ static int cpu_to_cpu_group(int cpu)
 
 #ifdef CONFIG_SCHED_MC
 static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static struct sched_group sched_group_core[NR_CPUS];
+static struct sched_group *sched_group_core_bycpu[NR_CPUS];
 #endif
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
@@ -5900,7 +5900,7 @@ static int cpu_to_core_group(int cpu)
 #endif
 
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static struct sched_group sched_group_phys[NR_CPUS];
+static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
 static int cpu_to_phys_group(int cpu)
 {
 #if defined(CONFIG_SCHED_MC)
@@ -5963,7 +5963,12 @@ next_sg:
  */
 void build_sched_domains(const cpumask_t *cpu_map)
 {
-	int i;
+	int i, alloc_phys_failed = 0;
+	struct sched_group *sched_group_phys = NULL;
+#ifdef CONFIG_SCHED_MC
+	int alloc_core_failed = 0;
+	struct sched_group *sched_group_core = NULL;
+#endif
 #ifdef CONFIG_NUMA
 	int alloc_failed = 0;
 	struct sched_group **sched_group_nodes = NULL;
@@ -6026,15 +6031,41 @@ void build_sched_domains(const cpumask_t
 		cpus_and(sd->span, sd->span, *cpu_map);
 #endif
 
+		if (!sched_group_phys && !alloc_phys_failed) {
+			sched_group_phys
+				= kmalloc(sizeof(struct sched_group) * NR_CPUS,
+					  GFP_KERNEL);
+			if (!sched_group_phys) {
+				printk (KERN_WARNING
+				"Can not alloc phys sched group\n");
+				alloc_phys_failed = 1;
+			}
+			sched_group_phys_bycpu[i] = sched_group_phys;
+		}
+
 		p = sd;
 		sd = &per_cpu(phys_domains, i);
 		group = cpu_to_phys_group(i);
 		*sd = SD_CPU_INIT;
 		sd->span = nodemask;
 		sd->parent = p;
-		sd->groups = &sched_group_phys[group];
+		sd->groups = sched_group_phys ? &sched_group_phys[group] : NULL;
+		if (!sd->groups)
+			sd->flags = 0;		/* No load balancing */
 
 #ifdef CONFIG_SCHED_MC
+		if (!sched_group_core && !alloc_core_failed) {
+			sched_group_core
+				 = kmalloc(sizeof(struct sched_group) * NR_CPUS,
+					   GFP_KERNEL);
+			if (!sched_group_core) {
+				printk (KERN_WARNING
+				"Can not alloc core sched group\n");
+				alloc_core_failed = 1;
+			}
+			sched_group_core_bycpu[i] = sched_group_core;
+		}
+
 		p = sd;
 		sd = &per_cpu(core_domains, i);
 		group = cpu_to_core_group(i);
@@ -6042,7 +6073,9 @@ void build_sched_domains(const cpumask_t
 		sd->span = cpu_coregroup_map(i);
 		cpus_and(sd->span, sd->span, *cpu_map);
 		sd->parent = p;
-		sd->groups = &sched_group_core[group];
+		sd->groups = sched_group_core ? &sched_group_core[group] : NULL;
+		if (!sd->groups)
+			sd->flags = 0; 		/* No load balancing */
 #endif
 
 #ifdef CONFIG_SCHED_SMT
@@ -6077,8 +6110,9 @@ void build_sched_domains(const cpumask_t
 		cpus_and(this_core_map, this_core_map, *cpu_map);
 		if (i != first_cpu(this_core_map))
 			continue;
-		init_sched_build_groups(sched_group_core, this_core_map,
-					&cpu_to_core_group);
+		if (sched_group_core)
+			init_sched_build_groups(sched_group_core, this_core_map,
+						&cpu_to_core_group);
 	}
 #endif
 
@@ -6091,7 +6125,8 @@ void build_sched_domains(const cpumask_t
 		if (cpus_empty(nodemask))
 			continue;
 
-		init_sched_build_groups(sched_group_phys, nodemask,
+		if (sched_group_phys)
+			init_sched_build_groups(sched_group_phys, nodemask,
 						&cpu_to_phys_group);
 	}
 
@@ -6249,9 +6284,9 @@ static void arch_init_sched_domains(cons
 
 static void arch_destroy_sched_domains(const cpumask_t *cpu_map)
 {
+	int cpu;
 #ifdef CONFIG_NUMA
 	int i;
-	int cpu;
 
 	for_each_cpu_mask(cpu, *cpu_map) {
 		struct sched_group *sched_group_allnodes
@@ -6289,6 +6324,20 @@ next_sg:
 		sched_group_nodes_bycpu[cpu] = NULL;
 	}
 #endif
+	for_each_cpu_mask(cpu, *cpu_map) {
+		if (sched_group_phys_bycpu[cpu]) {
+			kfree(sched_group_phys_bycpu[cpu]);
+			sched_group_phys_bycpu[cpu] = NULL;
+		}
+	}
+#ifdef CONFIG_SCHED_MC
+	for_each_cpu_mask(cpu, *cpu_map) {
+		if (sched_group_core_bycpu[cpu]) {
+			kfree(sched_group_core_bycpu[cpu]);
+			sched_group_core_bycpu[cpu] = NULL;
+		}
+	}
+#endif
 }
 
 /*

_
_




	


-- 
Regards,
vatsa

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2006-07-07  5:26 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-03-25  8:28 [PATCH 2.6.16-mm1 2/2] sched_domains: Allocate sched_groups dynamically Srivatsa Vaddagiri
2006-03-27 21:18 ` Siddha, Suresh B
2006-07-07  0:01 ` Paul Jackson
2006-07-07  0:08   ` Siddha, Suresh B
2006-07-07  0:34     ` Paul Jackson
2006-07-07  0:36       ` Siddha, Suresh B
2006-07-07  5:26         ` Paul Jackson

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox