public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH -tip] trace_workqueue: use percpu data for workqueue stat
@ 2009-01-15 12:01 Lai Jiangshan
  2009-01-15 13:09 ` Frédéric Weisbecker
  0 siblings, 1 reply; 7+ messages in thread
From: Lai Jiangshan @ 2009-01-15 12:01 UTC (permalink / raw)
  To: Frederic Weisbecker, Steven Rostedt
  Cc: Ingo Molnar, Linux Kernel Mailing List


Impact: make trace_workqueue works well on NUMA

It's not correct when (num_possible_cpus() < nr_cpumask_bits):
	all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
				     * num_possible_cpus(), GFP_KERNEL);

And alloc_percpu() allocate the cpu's data on the cpu's memory node.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index f8118d3..79617e2 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -8,6 +8,7 @@
 
 #include <trace/workqueue.h>
 #include <linux/list.h>
+#include <linux/percpu.h>
 #include "trace_stat.h"
 #include "trace.h"
 
@@ -38,6 +39,7 @@ struct workqueue_global_stats {
  * never freed.
  */
 static struct workqueue_global_stats *all_workqueue_stat;
+#define workqueue_cpu_stat(cpu) per_cpu_ptr(all_workqueue_stat, cpu)
 
 /* Insertion of a work */
 static void
@@ -48,8 +50,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
 	struct cpu_workqueue_stats *node, *next;
 	unsigned long flags;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
 							list) {
 		if (node->pid == wq_thread->pid) {
 			atomic_inc(&node->inserted);
@@ -58,7 +60,7 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
 	}
 	pr_debug("trace_workqueue: entry not found\n");
 found:
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
 
 /* Execution of a work */
@@ -70,8 +72,8 @@ probe_workqueue_execution(struct task_struct *wq_thread,
 	struct cpu_workqueue_stats *node, *next;
 	unsigned long flags;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
 							list) {
 		if (node->pid == wq_thread->pid) {
 			node->executed++;
@@ -80,7 +82,7 @@ probe_workqueue_execution(struct task_struct *wq_thread,
 	}
 	pr_debug("trace_workqueue: entry not found\n");
 found:
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
 
 /* Creation of a cpu workqueue thread */
@@ -104,11 +106,11 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
 
 	cws->pid = wq_thread->pid;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	if (list_empty(&all_workqueue_stat[cpu].list))
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	if (list_empty(&workqueue_cpu_stat(cpu)->list))
 		cws->first_entry = true;
-	list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
 
 /* Destruction of a cpu workqueue thread */
@@ -119,8 +121,8 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
 	struct cpu_workqueue_stats *node, *next;
 	unsigned long flags;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
 							list) {
 		if (node->pid == wq_thread->pid) {
 			list_del(&node->list);
@@ -131,7 +133,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
 
 	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
 found:
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
 }
 
@@ -141,13 +143,13 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
 	struct cpu_workqueue_stats *ret = NULL;
 
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 
-	if (!list_empty(&all_workqueue_stat[cpu].list))
-		ret = list_entry(all_workqueue_stat[cpu].list.next,
+	if (!list_empty(&workqueue_cpu_stat(cpu)->list))
+		ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
 				 struct cpu_workqueue_stats, list);
 
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
 	return ret;
 }
@@ -172,9 +174,9 @@ static void *workqueue_stat_next(void *prev, int idx)
 	unsigned long flags;
 	void *ret = NULL;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
-		spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
+		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 		for (++cpu ; cpu < num_possible_cpus(); cpu++) {
 			ret = workqueue_stat_start_cpu(cpu);
 			if (ret)
@@ -182,7 +184,7 @@ static void *workqueue_stat_next(void *prev, int idx)
 		}
 		return NULL;
 	}
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
 	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
 			  list);
@@ -199,10 +201,10 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
 		   cws->executed,
 		   trace_find_cmdline(cws->pid));
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	if (&cws->list == all_workqueue_stat[cpu].list.next)
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
 		seq_printf(s, "\n");
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
 	return 0;
 }
@@ -258,8 +260,7 @@ int __init trace_workqueue_early_init(void)
 	if (ret)
 		goto no_creation;
 
-	all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
-				     * num_possible_cpus(), GFP_KERNEL);
+	all_workqueue_stat = alloc_percpu(struct workqueue_global_stats);
 
 	if (!all_workqueue_stat) {
 		pr_warning("trace_workqueue: not enough memory\n");
@@ -267,8 +268,8 @@ int __init trace_workqueue_early_init(void)
 	}
 
 	for_each_possible_cpu(cpu) {
-		spin_lock_init(&all_workqueue_stat[cpu].lock);
-		INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
+		spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
+		INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
 	}
 
 	return 0;



^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH -tip] trace_workqueue: use percpu data for workqueue stat
  2009-01-15 12:01 [PATCH -tip] trace_workqueue: use percpu data for workqueue stat Lai Jiangshan
@ 2009-01-15 13:09 ` Frédéric Weisbecker
  2009-01-16  8:11   ` Lai Jiangshan
  2009-01-16  8:32   ` Lai Jiangshan
  0 siblings, 2 replies; 7+ messages in thread
From: Frédéric Weisbecker @ 2009-01-15 13:09 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Steven Rostedt, Ingo Molnar, Linux Kernel Mailing List

Hi Lai,

2009/1/15 Lai Jiangshan <laijs@cn.fujitsu.com>:
>
> Impact: make trace_workqueue works well on NUMA
>
> It's not correct when (num_possible_cpus() < nr_cpumask_bits):
>        all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
>                                     * num_possible_cpus(), GFP_KERNEL);


What is the difference between num_possible_cpus() and nr_cpumask_bits actually?
It looks like nr_cpumask_bits binds to NR_CPUS on early time and after
it is set to
num_possible_cpus() , right?
In this case num_possible_cpus() seems more relevant...no?

(I'm pretty sure I'm wrong.... :-)

> And alloc_percpu() allocate the cpu's data on the cpu's memory node.

Ok.

Thanks.
Frederic.


> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---
> diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
> index f8118d3..79617e2 100644
> --- a/kernel/trace/trace_workqueue.c
> +++ b/kernel/trace/trace_workqueue.c
> @@ -8,6 +8,7 @@
>
>  #include <trace/workqueue.h>
>  #include <linux/list.h>
> +#include <linux/percpu.h>
>  #include "trace_stat.h"
>  #include "trace.h"
>
> @@ -38,6 +39,7 @@ struct workqueue_global_stats {
>  * never freed.
>  */
>  static struct workqueue_global_stats *all_workqueue_stat;
> +#define workqueue_cpu_stat(cpu) per_cpu_ptr(all_workqueue_stat, cpu)
>
>  /* Insertion of a work */
>  static void
> @@ -48,8 +50,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
>        struct cpu_workqueue_stats *node, *next;
>        unsigned long flags;
>
> -       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -       list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
> +       spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +       list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
>                                                        list) {
>                if (node->pid == wq_thread->pid) {
>                        atomic_inc(&node->inserted);
> @@ -58,7 +60,7 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
>        }
>        pr_debug("trace_workqueue: entry not found\n");
>  found:
> -       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +       spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  }
>
>  /* Execution of a work */
> @@ -70,8 +72,8 @@ probe_workqueue_execution(struct task_struct *wq_thread,
>        struct cpu_workqueue_stats *node, *next;
>        unsigned long flags;
>
> -       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -       list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
> +       spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +       list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
>                                                        list) {
>                if (node->pid == wq_thread->pid) {
>                        node->executed++;
> @@ -80,7 +82,7 @@ probe_workqueue_execution(struct task_struct *wq_thread,
>        }
>        pr_debug("trace_workqueue: entry not found\n");
>  found:
> -       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +       spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  }
>
>  /* Creation of a cpu workqueue thread */
> @@ -104,11 +106,11 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
>
>        cws->pid = wq_thread->pid;
>
> -       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -       if (list_empty(&all_workqueue_stat[cpu].list))
> +       spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +       if (list_empty(&workqueue_cpu_stat(cpu)->list))
>                cws->first_entry = true;
> -       list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
> -       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +       list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
> +       spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  }
>
>  /* Destruction of a cpu workqueue thread */
> @@ -119,8 +121,8 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
>        struct cpu_workqueue_stats *node, *next;
>        unsigned long flags;
>
> -       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -       list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
> +       spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +       list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
>                                                        list) {
>                if (node->pid == wq_thread->pid) {
>                        list_del(&node->list);
> @@ -131,7 +133,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
>
>        pr_debug("trace_workqueue: don't find workqueue to destroy\n");
>  found:
> -       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +       spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>
>  }
>
> @@ -141,13 +143,13 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
>        struct cpu_workqueue_stats *ret = NULL;
>
>
> -       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> +       spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>
> -       if (!list_empty(&all_workqueue_stat[cpu].list))
> -               ret = list_entry(all_workqueue_stat[cpu].list.next,
> +       if (!list_empty(&workqueue_cpu_stat(cpu)->list))
> +               ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
>                                 struct cpu_workqueue_stats, list);
>
> -       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +       spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>
>        return ret;
>  }
> @@ -172,9 +174,9 @@ static void *workqueue_stat_next(void *prev, int idx)
>        unsigned long flags;
>        void *ret = NULL;
>
> -       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -       if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
> -               spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +       spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +       if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
> +               spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>                for (++cpu ; cpu < num_possible_cpus(); cpu++) {
>                        ret = workqueue_stat_start_cpu(cpu);
>                        if (ret)
> @@ -182,7 +184,7 @@ static void *workqueue_stat_next(void *prev, int idx)
>                }
>                return NULL;
>        }
> -       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +       spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>
>        return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
>                          list);
> @@ -199,10 +201,10 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
>                   cws->executed,
>                   trace_find_cmdline(cws->pid));
>
> -       spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -       if (&cws->list == all_workqueue_stat[cpu].list.next)
> +       spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +       if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
>                seq_printf(s, "\n");
> -       spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +       spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>
>        return 0;
>  }
> @@ -258,8 +260,7 @@ int __init trace_workqueue_early_init(void)
>        if (ret)
>                goto no_creation;
>
> -       all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
> -                                    * num_possible_cpus(), GFP_KERNEL);
> +       all_workqueue_stat = alloc_percpu(struct workqueue_global_stats);
>
>        if (!all_workqueue_stat) {
>                pr_warning("trace_workqueue: not enough memory\n");
> @@ -267,8 +268,8 @@ int __init trace_workqueue_early_init(void)
>        }
>
>        for_each_possible_cpu(cpu) {
> -               spin_lock_init(&all_workqueue_stat[cpu].lock);
> -               INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
> +               spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
> +               INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
>        }
>
>        return 0;
>
>
>

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH -tip] trace_workqueue: use percpu data for workqueue stat
  2009-01-15 13:09 ` Frédéric Weisbecker
@ 2009-01-16  8:11   ` Lai Jiangshan
  2009-01-16  8:49     ` Frederic Weisbecker
  2009-01-16  8:58     ` Frederic Weisbecker
  2009-01-16  8:32   ` Lai Jiangshan
  1 sibling, 2 replies; 7+ messages in thread
From: Lai Jiangshan @ 2009-01-16  8:11 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Steven Rostedt, Ingo Molnar, Linux Kernel Mailing List

Frédéric Weisbecker wrote:
> Hi Lai,
> 
> 2009/1/15 Lai Jiangshan <laijs@cn.fujitsu.com>:
>> Impact: make trace_workqueue works well on NUMA
>>
>> It's not correct when (num_possible_cpus() < nr_cpumask_bits):
>>        all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
>>                                     * num_possible_cpus(), GFP_KERNEL);
> 
> 
> What is the difference between num_possible_cpus() and nr_cpumask_bits actually?
> It looks like nr_cpumask_bits binds to NR_CPUS on early time and after
> it is set to
> num_possible_cpus() , right?
> In this case num_possible_cpus() seems more relevant...no?
> 
> (I'm pretty sure I'm wrong.... :-)
> 

I wanted to reference to nr_cpu_ids, not nr_cpumask_bits(I made mistake yesterday)

init/main.c
static void __init setup_nr_cpu_ids(void)
{
	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
}
setup_nr_cpu_ids() is called directly in main.c, it's earlier than early_initcall.

So nr_cpu_ids is better than num_possible_cpus(), for maybe cpu_possible_mask=101B
nr_cpu_ids=3, num_possible_cpus()=2, We will access to invalid memory when we use
num_possible_cpus().

but percpu data as my patch shows is better than nr_cpu_ids.

Thanks, Lai.




^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH -tip] trace_workqueue: use percpu data for workqueue stat
  2009-01-15 13:09 ` Frédéric Weisbecker
  2009-01-16  8:11   ` Lai Jiangshan
@ 2009-01-16  8:32   ` Lai Jiangshan
  2009-01-16  9:05     ` Frederic Weisbecker
  1 sibling, 1 reply; 7+ messages in thread
From: Lai Jiangshan @ 2009-01-16  8:32 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Steven Rostedt, Ingo Molnar, Linux Kernel Mailing List

Impact: use percpu data instead hardcode

Use
static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
instead of allocate it.

And percpu data works well on NUMA.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
---
diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
index f8118d3..4664990 100644
--- a/kernel/trace/trace_workqueue.c
+++ b/kernel/trace/trace_workqueue.c
@@ -8,6 +8,7 @@
 
 #include <trace/workqueue.h>
 #include <linux/list.h>
+#include <linux/percpu.h>
 #include "trace_stat.h"
 #include "trace.h"
 
@@ -37,7 +38,8 @@ struct workqueue_global_stats {
 /* Don't need a global lock because allocated before the workqueues, and
  * never freed.
  */
-static struct workqueue_global_stats *all_workqueue_stat;
+static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
+#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
 
 /* Insertion of a work */
 static void
@@ -48,8 +50,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
 	struct cpu_workqueue_stats *node, *next;
 	unsigned long flags;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
 							list) {
 		if (node->pid == wq_thread->pid) {
 			atomic_inc(&node->inserted);
@@ -58,7 +60,7 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
 	}
 	pr_debug("trace_workqueue: entry not found\n");
 found:
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
 
 /* Execution of a work */
@@ -70,8 +72,8 @@ probe_workqueue_execution(struct task_struct *wq_thread,
 	struct cpu_workqueue_stats *node, *next;
 	unsigned long flags;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
 							list) {
 		if (node->pid == wq_thread->pid) {
 			node->executed++;
@@ -80,7 +82,7 @@ probe_workqueue_execution(struct task_struct *wq_thread,
 	}
 	pr_debug("trace_workqueue: entry not found\n");
 found:
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
 
 /* Creation of a cpu workqueue thread */
@@ -104,11 +106,11 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
 
 	cws->pid = wq_thread->pid;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	if (list_empty(&all_workqueue_stat[cpu].list))
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	if (list_empty(&workqueue_cpu_stat(cpu)->list))
 		cws->first_entry = true;
-	list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 }
 
 /* Destruction of a cpu workqueue thread */
@@ -119,8 +121,8 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
 	struct cpu_workqueue_stats *node, *next;
 	unsigned long flags;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
 							list) {
 		if (node->pid == wq_thread->pid) {
 			list_del(&node->list);
@@ -131,7 +133,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
 
 	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
 found:
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
 }
 
@@ -141,13 +143,13 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
 	struct cpu_workqueue_stats *ret = NULL;
 
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
 
-	if (!list_empty(&all_workqueue_stat[cpu].list))
-		ret = list_entry(all_workqueue_stat[cpu].list.next,
+	if (!list_empty(&workqueue_cpu_stat(cpu)->list))
+		ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
 				 struct cpu_workqueue_stats, list);
 
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
 	return ret;
 }
@@ -172,9 +174,9 @@ static void *workqueue_stat_next(void *prev, int idx)
 	unsigned long flags;
 	void *ret = NULL;
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
-		spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
+		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 		for (++cpu ; cpu < num_possible_cpus(); cpu++) {
 			ret = workqueue_stat_start_cpu(cpu);
 			if (ret)
@@ -182,7 +184,7 @@ static void *workqueue_stat_next(void *prev, int idx)
 		}
 		return NULL;
 	}
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
 	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
 			  list);
@@ -199,10 +201,10 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
 		   cws->executed,
 		   trace_find_cmdline(cws->pid));
 
-	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
-	if (&cws->list == all_workqueue_stat[cpu].list.next)
+	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
+	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
 		seq_printf(s, "\n");
-	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
+	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
 
 	return 0;
 }
@@ -258,17 +260,9 @@ int __init trace_workqueue_early_init(void)
 	if (ret)
 		goto no_creation;
 
-	all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
-				     * num_possible_cpus(), GFP_KERNEL);
-
-	if (!all_workqueue_stat) {
-		pr_warning("trace_workqueue: not enough memory\n");
-		goto no_creation;
-	}
-
 	for_each_possible_cpu(cpu) {
-		spin_lock_init(&all_workqueue_stat[cpu].lock);
-		INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
+		spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
+		INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
 	}
 
 	return 0;


^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [PATCH -tip] trace_workqueue: use percpu data for workqueue stat
  2009-01-16  8:11   ` Lai Jiangshan
@ 2009-01-16  8:49     ` Frederic Weisbecker
  2009-01-16  8:58     ` Frederic Weisbecker
  1 sibling, 0 replies; 7+ messages in thread
From: Frederic Weisbecker @ 2009-01-16  8:49 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Steven Rostedt, Ingo Molnar, Linux Kernel Mailing List

On Fri, Jan 16, 2009 at 04:11:51PM +0800, Lai Jiangshan wrote:
> Frédéric Weisbecker wrote:
> > Hi Lai,
> > 
> > 2009/1/15 Lai Jiangshan <laijs@cn.fujitsu.com>:
> >> Impact: make trace_workqueue works well on NUMA
> >>
> >> It's not correct when (num_possible_cpus() < nr_cpumask_bits):
> >>        all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
> >>                                     * num_possible_cpus(), GFP_KERNEL);
> > 
> > 
> > What is the difference between num_possible_cpus() and nr_cpumask_bits actually?
> > It looks like nr_cpumask_bits binds to NR_CPUS on early time and after
> > it is set to
> > num_possible_cpus() , right?
> > In this case num_possible_cpus() seems more relevant...no?
> > 
> > (I'm pretty sure I'm wrong.... :-)
> > 
> 
> I wanted to reference to nr_cpu_ids, not nr_cpumask_bits(I made mistake yesterday)
> 
> init/main.c
> static void __init setup_nr_cpu_ids(void)
> {
> 	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
> }
> setup_nr_cpu_ids() is called directly in main.c, it's earlier than early_initcall.
> 
> So nr_cpu_ids is better than num_possible_cpus(), for maybe cpu_possible_mask=101B
> nr_cpu_ids=3, num_possible_cpus()=2, We will access to invalid memory when we use
> num_possible_cpus().


Ok.
But is it possible to enable a new possible cpu on runtime?

 
> but percpu data as my patch shows is better than nr_cpu_ids.


Yeah that's right. And more proper.


> Thanks, Lai.
> 
> 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH -tip] trace_workqueue: use percpu data for workqueue stat
  2009-01-16  8:11   ` Lai Jiangshan
  2009-01-16  8:49     ` Frederic Weisbecker
@ 2009-01-16  8:58     ` Frederic Weisbecker
  1 sibling, 0 replies; 7+ messages in thread
From: Frederic Weisbecker @ 2009-01-16  8:58 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Steven Rostedt, Ingo Molnar, Linux Kernel Mailing List

On Fri, Jan 16, 2009 at 04:11:51PM +0800, Lai Jiangshan wrote:
> Frédéric Weisbecker wrote:
> > Hi Lai,
> > 
> > 2009/1/15 Lai Jiangshan <laijs@cn.fujitsu.com>:
> >> Impact: make trace_workqueue works well on NUMA
> >>
> >> It's not correct when (num_possible_cpus() < nr_cpumask_bits):
> >>        all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
> >>                                     * num_possible_cpus(), GFP_KERNEL);
> > 
> > 
> > What is the difference between num_possible_cpus() and nr_cpumask_bits actually?
> > It looks like nr_cpumask_bits binds to NR_CPUS on early time and after
> > it is set to
> > num_possible_cpus() , right?
> > In this case num_possible_cpus() seems more relevant...no?
> > 
> > (I'm pretty sure I'm wrong.... :-)
> > 
> 
> I wanted to reference to nr_cpu_ids, not nr_cpumask_bits(I made mistake yesterday)
> 
> init/main.c
> static void __init setup_nr_cpu_ids(void)
> {
> 	nr_cpu_ids = find_last_bit(cpumask_bits(cpu_possible_mask),NR_CPUS) + 1;
> }
> setup_nr_cpu_ids() is called directly in main.c, it's earlier than early_initcall.
> 
> So nr_cpu_ids is better than num_possible_cpus(), for maybe cpu_possible_mask=101B
> nr_cpu_ids=3, num_possible_cpus()=2, We will access to invalid memory when we use
> num_possible_cpus().


Oh ok I see now. If the cpu_possible_mask is 101, there is a risk I use cpu number 3
later to directly find the index into the table. But I allocated the array in a
contiguous way.

Thanks I better understand now.
 
> but percpu data as my patch shows is better than nr_cpu_ids.
> 
> Thanks, Lai.
> 
> 
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [PATCH -tip] trace_workqueue: use percpu data for workqueue stat
  2009-01-16  8:32   ` Lai Jiangshan
@ 2009-01-16  9:05     ` Frederic Weisbecker
  0 siblings, 0 replies; 7+ messages in thread
From: Frederic Weisbecker @ 2009-01-16  9:05 UTC (permalink / raw)
  To: Lai Jiangshan; +Cc: Steven Rostedt, Ingo Molnar, Linux Kernel Mailing List

On Fri, Jan 16, 2009 at 04:32:25PM +0800, Lai Jiangshan wrote:
> Impact: use percpu data instead hardcode
> 
> Use
> static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
> instead of allocate it.
> 
> And percpu data works well on NUMA.
> 
> Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
> ---


Thanks Lai!

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>

> diff --git a/kernel/trace/trace_workqueue.c b/kernel/trace/trace_workqueue.c
> index f8118d3..4664990 100644
> --- a/kernel/trace/trace_workqueue.c
> +++ b/kernel/trace/trace_workqueue.c
> @@ -8,6 +8,7 @@
>  
>  #include <trace/workqueue.h>
>  #include <linux/list.h>
> +#include <linux/percpu.h>
>  #include "trace_stat.h"
>  #include "trace.h"
>  
> @@ -37,7 +38,8 @@ struct workqueue_global_stats {
>  /* Don't need a global lock because allocated before the workqueues, and
>   * never freed.
>   */
> -static struct workqueue_global_stats *all_workqueue_stat;
> +static DEFINE_PER_CPU(struct workqueue_global_stats, all_workqueue_stat);
> +#define workqueue_cpu_stat(cpu) (&per_cpu(all_workqueue_stat, cpu))
>  
>  /* Insertion of a work */
>  static void
> @@ -48,8 +50,8 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
>  	struct cpu_workqueue_stats *node, *next;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
> +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
>  							list) {
>  		if (node->pid == wq_thread->pid) {
>  			atomic_inc(&node->inserted);
> @@ -58,7 +60,7 @@ probe_workqueue_insertion(struct task_struct *wq_thread,
>  	}
>  	pr_debug("trace_workqueue: entry not found\n");
>  found:
> -	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  }
>  
>  /* Execution of a work */
> @@ -70,8 +72,8 @@ probe_workqueue_execution(struct task_struct *wq_thread,
>  	struct cpu_workqueue_stats *node, *next;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
> +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
>  							list) {
>  		if (node->pid == wq_thread->pid) {
>  			node->executed++;
> @@ -80,7 +82,7 @@ probe_workqueue_execution(struct task_struct *wq_thread,
>  	}
>  	pr_debug("trace_workqueue: entry not found\n");
>  found:
> -	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  }
>  
>  /* Creation of a cpu workqueue thread */
> @@ -104,11 +106,11 @@ static void probe_workqueue_creation(struct task_struct *wq_thread, int cpu)
>  
>  	cws->pid = wq_thread->pid;
>  
> -	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -	if (list_empty(&all_workqueue_stat[cpu].list))
> +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +	if (list_empty(&workqueue_cpu_stat(cpu)->list))
>  		cws->first_entry = true;
> -	list_add_tail(&cws->list, &all_workqueue_stat[cpu].list);
> -	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +	list_add_tail(&cws->list, &workqueue_cpu_stat(cpu)->list);
> +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  }
>  
>  /* Destruction of a cpu workqueue thread */
> @@ -119,8 +121,8 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
>  	struct cpu_workqueue_stats *node, *next;
>  	unsigned long flags;
>  
> -	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -	list_for_each_entry_safe(node, next, &all_workqueue_stat[cpu].list,
> +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +	list_for_each_entry_safe(node, next, &workqueue_cpu_stat(cpu)->list,
>  							list) {
>  		if (node->pid == wq_thread->pid) {
>  			list_del(&node->list);
> @@ -131,7 +133,7 @@ static void probe_workqueue_destruction(struct task_struct *wq_thread)
>  
>  	pr_debug("trace_workqueue: don't find workqueue to destroy\n");
>  found:
> -	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  
>  }
>  
> @@ -141,13 +143,13 @@ static struct cpu_workqueue_stats *workqueue_stat_start_cpu(int cpu)
>  	struct cpu_workqueue_stats *ret = NULL;
>  
>  
> -	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
>  
> -	if (!list_empty(&all_workqueue_stat[cpu].list))
> -		ret = list_entry(all_workqueue_stat[cpu].list.next,
> +	if (!list_empty(&workqueue_cpu_stat(cpu)->list))
> +		ret = list_entry(workqueue_cpu_stat(cpu)->list.next,
>  				 struct cpu_workqueue_stats, list);
>  
> -	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  
>  	return ret;
>  }
> @@ -172,9 +174,9 @@ static void *workqueue_stat_next(void *prev, int idx)
>  	unsigned long flags;
>  	void *ret = NULL;
>  
> -	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -	if (list_is_last(&prev_cws->list, &all_workqueue_stat[cpu].list)) {
> -		spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +	if (list_is_last(&prev_cws->list, &workqueue_cpu_stat(cpu)->list)) {
> +		spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  		for (++cpu ; cpu < num_possible_cpus(); cpu++) {
>  			ret = workqueue_stat_start_cpu(cpu);
>  			if (ret)
> @@ -182,7 +184,7 @@ static void *workqueue_stat_next(void *prev, int idx)
>  		}
>  		return NULL;
>  	}
> -	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  
>  	return list_entry(prev_cws->list.next, struct cpu_workqueue_stats,
>  			  list);
> @@ -199,10 +201,10 @@ static int workqueue_stat_show(struct seq_file *s, void *p)
>  		   cws->executed,
>  		   trace_find_cmdline(cws->pid));
>  
> -	spin_lock_irqsave(&all_workqueue_stat[cpu].lock, flags);
> -	if (&cws->list == all_workqueue_stat[cpu].list.next)
> +	spin_lock_irqsave(&workqueue_cpu_stat(cpu)->lock, flags);
> +	if (&cws->list == workqueue_cpu_stat(cpu)->list.next)
>  		seq_printf(s, "\n");
> -	spin_unlock_irqrestore(&all_workqueue_stat[cpu].lock, flags);
> +	spin_unlock_irqrestore(&workqueue_cpu_stat(cpu)->lock, flags);
>  
>  	return 0;
>  }
> @@ -258,17 +260,9 @@ int __init trace_workqueue_early_init(void)
>  	if (ret)
>  		goto no_creation;
>  
> -	all_workqueue_stat = kmalloc(sizeof(struct workqueue_global_stats)
> -				     * num_possible_cpus(), GFP_KERNEL);
> -
> -	if (!all_workqueue_stat) {
> -		pr_warning("trace_workqueue: not enough memory\n");
> -		goto no_creation;
> -	}
> -
>  	for_each_possible_cpu(cpu) {
> -		spin_lock_init(&all_workqueue_stat[cpu].lock);
> -		INIT_LIST_HEAD(&all_workqueue_stat[cpu].list);
> +		spin_lock_init(&workqueue_cpu_stat(cpu)->lock);
> +		INIT_LIST_HEAD(&workqueue_cpu_stat(cpu)->list);
>  	}
>  
>  	return 0;
> 


^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2009-01-16  9:06 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-15 12:01 [PATCH -tip] trace_workqueue: use percpu data for workqueue stat Lai Jiangshan
2009-01-15 13:09 ` Frédéric Weisbecker
2009-01-16  8:11   ` Lai Jiangshan
2009-01-16  8:49     ` Frederic Weisbecker
2009-01-16  8:58     ` Frederic Weisbecker
2009-01-16  8:32   ` Lai Jiangshan
2009-01-16  9:05     ` Frederic Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox