public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically
@ 2006-04-01 18:56 Srivatsa Vaddagiri
  2006-04-02  1:39 ` Nick Piggin
  0 siblings, 1 reply; 7+ messages in thread
From: Srivatsa Vaddagiri @ 2006-04-01 18:56 UTC (permalink / raw)
  To: Andrew Morton, Ingo Molnar, Nick Piggin
  Cc: suresh.b.siddha, Dinakar Guniguntala, pj, hawkes, linux-kernel

As explained here:
	http://marc.theaimsgroup.com/?l=linux-kernel&m=114327539012323&w=2

there is a problem with sharing sched_group structures between two
different sched_domains.  This patch overcomes the problem by allocating 
separate sched_group structures for different sched_domains.

The patch has been tested and found to avoid the kernel lockup problem described
in above URL.


Signed-off-by: Srivatsa Vaddagiri <vatsa@in.ibm.com>


diff -puN kernel/sched.c~sd_dynschedgroup kernel/sched.c
--- linux-2.6.16-mm2/kernel/sched.c~sd_dynschedgroup	2006-04-01 23:40:56.000000000 +0530
+++ linux-2.6.16-mm2-root/kernel/sched.c	2006-04-01 23:40:56.000000000 +0530
@@ -5988,7 +5988,7 @@ static int cpu_to_cpu_group(int cpu)
 
 #ifdef CONFIG_SCHED_MC
 static DEFINE_PER_CPU(struct sched_domain, core_domains);
-static struct sched_group sched_group_core[NR_CPUS];
+static struct sched_group *sched_group_core_bycpu[NR_CPUS];
 #endif
 
 #if defined(CONFIG_SCHED_MC) && defined(CONFIG_SCHED_SMT)
@@ -6004,7 +6004,7 @@ static int cpu_to_core_group(int cpu)
 #endif
 
 static DEFINE_PER_CPU(struct sched_domain, phys_domains);
-static struct sched_group sched_group_phys[NR_CPUS];
+static struct sched_group *sched_group_phys_bycpu[NR_CPUS];
 static int cpu_to_phys_group(int cpu)
 {
 #if defined(CONFIG_SCHED_MC)
@@ -6064,9 +6064,9 @@ next_sg:
 /* Free memory allocated for various sched_group structures */
 static void free_sched_groups(const cpumask_t *cpu_map)
 {
+	int cpu;
 #ifdef CONFIG_NUMA
 	int i;
-	int cpu;
 
 	for_each_cpu_mask(cpu, *cpu_map) {
 		struct sched_group *sched_group_allnodes
@@ -6104,6 +6104,18 @@ next_sg:
 		sched_group_nodes_bycpu[cpu] = NULL;
 	}
 #endif
+	for_each_cpu_mask(cpu, *cpu_map) {
+		if (sched_group_phys_bycpu[cpu]) {
+			kfree(sched_group_phys_bycpu[cpu]);
+			sched_group_phys_bycpu[cpu] = NULL;
+		}
+#ifdef CONFIG_SCHED_MC
+		if (sched_group_core_bycpu[cpu]) {
+			kfree(sched_group_core_bycpu[cpu]);
+			sched_group_core_bycpu[cpu] = NULL;
+		}
+#endif
+	}
 }
 
 /*
@@ -6113,6 +6125,10 @@ next_sg:
 static int build_sched_domains(const cpumask_t *cpu_map)
 {
 	int i;
+	struct sched_group *sched_group_phys = NULL;
+#ifdef CONFIG_SCHED_MC
+	struct sched_group *sched_group_core = NULL;
+#endif
 #ifdef CONFIG_NUMA
 	struct sched_group **sched_group_nodes = NULL;
 	struct sched_group *sched_group_allnodes = NULL;
@@ -6171,6 +6187,18 @@ static int build_sched_domains(const cpu
 		cpus_and(sd->span, sd->span, *cpu_map);
 #endif
 
+		if (!sched_group_phys) {
+			sched_group_phys
+				= kmalloc(sizeof(struct sched_group) * NR_CPUS,
+					  GFP_KERNEL);
+			if (!sched_group_phys) {
+				printk (KERN_WARNING "Can not alloc phys sched"
+						     "group\n");
+				goto error;
+			}
+			sched_group_phys_bycpu[i] = sched_group_phys;
+		}
+
 		p = sd;
 		sd = &per_cpu(phys_domains, i);
 		group = cpu_to_phys_group(i);
@@ -6180,6 +6208,18 @@ static int build_sched_domains(const cpu
 		sd->groups = &sched_group_phys[group];
 
 #ifdef CONFIG_SCHED_MC
+		if (!sched_group_core) {
+			sched_group_core
+				= kmalloc(sizeof(struct sched_group) * NR_CPUS,
+					  GFP_KERNEL);
+			if (!sched_group_core) {
+				printk (KERN_WARNING "Can not alloc core sched"
+						     "group\n");
+				goto error;
+			}
+			sched_group_core_bycpu[i] = sched_group_core;
+		}
+		
 		p = sd;
 		sd = &per_cpu(core_domains, i);
 		group = cpu_to_core_group(i);
@@ -6375,11 +6415,9 @@ static int build_sched_domains(const cpu
 
 	return 0;
 
-#ifdef CONFIG_NUMA
 error:
 	free_sched_groups(cpu_map);
 	return -ENOMEM;
-#endif
 }
 /*
  * Set up scheduler domains and groups.  Callers must hold the hotplug lock.

_
-- 
Regards,
vatsa

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically
  2006-04-01 18:56 [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically Srivatsa Vaddagiri
@ 2006-04-02  1:39 ` Nick Piggin
  2006-04-02  5:04   ` Srivatsa Vaddagiri
  2006-04-02  7:35   ` Siddha, Suresh B
  0 siblings, 2 replies; 7+ messages in thread
From: Nick Piggin @ 2006-04-02  1:39 UTC (permalink / raw)
  To: vatsa
  Cc: Andrew Morton, Ingo Molnar, suresh.b.siddha, Dinakar Guniguntala,
	pj, hawkes, linux-kernel


Srivatsa Vaddagiri wrote:
>  /*
> @@ -6113,6 +6125,10 @@ next_sg:
>  static int build_sched_domains(const cpumask_t *cpu_map)
>  {
>  	int i;
> +	struct sched_group *sched_group_phys = NULL;
> +#ifdef CONFIG_SCHED_MC
> +	struct sched_group *sched_group_core = NULL;
> +#endif
>  #ifdef CONFIG_NUMA
>  	struct sched_group **sched_group_nodes = NULL;
>  	struct sched_group *sched_group_allnodes = NULL;
> @@ -6171,6 +6187,18 @@ static int build_sched_domains(const cpu
>  		cpus_and(sd->span, sd->span, *cpu_map);
>  #endif
>  
> +		if (!sched_group_phys) {
> +			sched_group_phys
> +				= kmalloc(sizeof(struct sched_group) * NR_CPUS,
> +					  GFP_KERNEL);
> +			if (!sched_group_phys) {
> +				printk (KERN_WARNING "Can not alloc phys sched"
> +						     "group\n");
> +				goto error;
> +			}
> +			sched_group_phys_bycpu[i] = sched_group_phys;
> +		}

Doesn't the last assignment have to be outside the if statement?

Hmm.. this design seems like the best way to go for now. Suresh?

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically
  2006-04-02  1:39 ` Nick Piggin
@ 2006-04-02  5:04   ` Srivatsa Vaddagiri
  2006-04-02  5:21     ` Nick Piggin
  2006-04-02  7:35   ` Siddha, Suresh B
  1 sibling, 1 reply; 7+ messages in thread
From: Srivatsa Vaddagiri @ 2006-04-02  5:04 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, Ingo Molnar, suresh.b.siddha, Dinakar Guniguntala,
	pj, hawkes, linux-kernel

On Sun, Apr 02, 2006 at 11:39:30AM +1000, Nick Piggin wrote:
> Srivatsa Vaddagiri wrote:
> > 
> >+		if (!sched_group_phys) {
> >+			sched_group_phys
> >+				= kmalloc(sizeof(struct sched_group) * 
> >NR_CPUS,
> >+					  GFP_KERNEL);
> >+			if (!sched_group_phys) {
> >+				printk (KERN_WARNING "Can not alloc phys 
> >sched"
> >+						     "group\n");
> >+				goto error;
> >+			}
> >+			sched_group_phys_bycpu[i] = sched_group_phys;
> >+		}
> 
> Doesn't the last assignment have to be outside the if statement?

I dont think so. The assignment can happen once (when we allocate
successfully) and not every time in the for loop?


-- 
Regards,
vatsa

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically
  2006-04-02  5:04   ` Srivatsa Vaddagiri
@ 2006-04-02  5:21     ` Nick Piggin
  2006-04-02  9:14       ` Srivatsa Vaddagiri
  0 siblings, 1 reply; 7+ messages in thread
From: Nick Piggin @ 2006-04-02  5:21 UTC (permalink / raw)
  To: vatsa
  Cc: Andrew Morton, Ingo Molnar, suresh.b.siddha, Dinakar Guniguntala,
	pj, hawkes, linux-kernel

Srivatsa Vaddagiri wrote:
> On Sun, Apr 02, 2006 at 11:39:30AM +1000, Nick Piggin wrote:
> 
>>Srivatsa Vaddagiri wrote:
>>
>>>+		if (!sched_group_phys) {
>>>+			sched_group_phys
>>>+				= kmalloc(sizeof(struct sched_group) * 
>>>NR_CPUS,
>>>+					  GFP_KERNEL);
>>>+			if (!sched_group_phys) {
>>>+				printk (KERN_WARNING "Can not alloc phys 
>>>sched"
>>>+						     "group\n");
>>>+				goto error;
>>>+			}
>>>+			sched_group_phys_bycpu[i] = sched_group_phys;
>>>+		}
>>
>>Doesn't the last assignment have to be outside the if statement?
> 
> 
> I dont think so. The assignment can happen once (when we allocate
> successfully) and not every time in the for loop?
> 

Then after you have allocated sched_group_phys, subsequent cpus
in cpu_map will have their sched_group_phys_bycpu[] entry
uninitialised, by the looks?

-- 
SUSE Labs, Novell Inc.
Send instant messages to your online friends http://au.messenger.yahoo.com 

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically
  2006-04-02  1:39 ` Nick Piggin
  2006-04-02  5:04   ` Srivatsa Vaddagiri
@ 2006-04-02  7:35   ` Siddha, Suresh B
  2006-04-02  9:17     ` Srivatsa Vaddagiri
  1 sibling, 1 reply; 7+ messages in thread
From: Siddha, Suresh B @ 2006-04-02  7:35 UTC (permalink / raw)
  To: Nick Piggin
  Cc: vatsa, Andrew Morton, Ingo Molnar, suresh.b.siddha,
	Dinakar Guniguntala, pj, hawkes, linux-kernel

On Sun, Apr 02, 2006 at 11:39:30AM +1000, Nick Piggin wrote:
> 
> Srivatsa Vaddagiri wrote:
> >  /*
> > @@ -6113,6 +6125,10 @@ next_sg:
> >  static int build_sched_domains(const cpumask_t *cpu_map)
> >  {
> >  	int i;
> > +	struct sched_group *sched_group_phys = NULL;
> > +#ifdef CONFIG_SCHED_MC
> > +	struct sched_group *sched_group_core = NULL;
> > +#endif
> >  #ifdef CONFIG_NUMA
> >  	struct sched_group **sched_group_nodes = NULL;
> >  	struct sched_group *sched_group_allnodes = NULL;
> > @@ -6171,6 +6187,18 @@ static int build_sched_domains(const cpu
> >  		cpus_and(sd->span, sd->span, *cpu_map);
> >  #endif
> >  
> > +		if (!sched_group_phys) {
> > +			sched_group_phys
> > +				= kmalloc(sizeof(struct sched_group) * NR_CPUS,
> > +					  GFP_KERNEL);
> > +			if (!sched_group_phys) {
> > +				printk (KERN_WARNING "Can not alloc phys sched"
> > +						     "group\n");
> > +				goto error;
> > +			}
> > +			sched_group_phys_bycpu[i] = sched_group_phys;
> > +		}
> 
> Doesn't the last assignment have to be outside the if statement?
> 
> Hmm.. this design seems like the best way to go for now. Suresh?

Only thing I see in this is, even if there are very few cpus in the
exclusive cpuset, we end up allocating NR_CPUS groups and waste memory.

thanks,
suresh

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically
  2006-04-02  5:21     ` Nick Piggin
@ 2006-04-02  9:14       ` Srivatsa Vaddagiri
  0 siblings, 0 replies; 7+ messages in thread
From: Srivatsa Vaddagiri @ 2006-04-02  9:14 UTC (permalink / raw)
  To: Nick Piggin
  Cc: Andrew Morton, Ingo Molnar, suresh.b.siddha, Dinakar Guniguntala,
	pj, hawkes, linux-kernel

On Sun, Apr 02, 2006 at 03:21:21PM +1000, Nick Piggin wrote:
> Then after you have allocated sched_group_phys, subsequent cpus
> in cpu_map will have their sched_group_phys_bycpu[] entry
> uninitialised, by the looks?

Not all the CPUs in cpu_map need to have an entry in 
sched_group_phys_bycpu[]. sched_group_phys_bycpu[] is purely used to
"remember" the chunk of memory we allocated for sched_group array
for later freeing. Individual CPU's in cpu_map point to a suitable entry 
in the sched_group array (thr' their phys_domains structure), as in the 
following lines of code after memory allocation:

	sd = &per_cpu(phys_domains, i);

	...

	sd->groups = &sched_group_phys[group];


Hope this clarifies!

-- 
Regards,
vatsa

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically
  2006-04-02  7:35   ` Siddha, Suresh B
@ 2006-04-02  9:17     ` Srivatsa Vaddagiri
  0 siblings, 0 replies; 7+ messages in thread
From: Srivatsa Vaddagiri @ 2006-04-02  9:17 UTC (permalink / raw)
  To: Siddha, Suresh B
  Cc: Nick Piggin, Andrew Morton, Ingo Molnar, Dinakar Guniguntala, pj,
	hawkes, linux-kernel

On Sat, Apr 01, 2006 at 11:35:13PM -0800, Siddha, Suresh B wrote:
> Only thing I see in this is, even if there are very few cpus in the
> exclusive cpuset, we end up allocating NR_CPUS groups and waste memory.

I had realized that, but used NR_CPUS just to keep it simple (as is
being done in the case of NUMA - where they simply allocate for
MAX_NODES). I can take a shot at optimizing the memory allocation size
(for NUMA as well) and send another patch later, if people think so.

-- 
Regards,
vatsa

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2006-04-02  9:16 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2006-04-01 18:56 [PATCH 2.6.16-mm2 4/4] sched_domain: Allocate sched_group structures dynamically Srivatsa Vaddagiri
2006-04-02  1:39 ` Nick Piggin
2006-04-02  5:04   ` Srivatsa Vaddagiri
2006-04-02  5:21     ` Nick Piggin
2006-04-02  9:14       ` Srivatsa Vaddagiri
2006-04-02  7:35   ` Siddha, Suresh B
2006-04-02  9:17     ` Srivatsa Vaddagiri

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox