public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH][RESEND] tracing/ftrace: Introduce the big kernel lock tracer
@ 2008-10-22 17:09 Frederic Weisbecker
  2008-10-22 17:27 ` Randy Dunlap
  2008-10-22 18:39 ` Steven Rostedt
  0 siblings, 2 replies; 5+ messages in thread
From: Frederic Weisbecker @ 2008-10-22 17:09 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Steven Rostedt, Linux Kernel

Introduce the Big Kernel Lock tracer.
This new tracer lets one to observe the latencies caused
by the lock_kernel() function.
It captures the time when the task request the spinlock,
the time when the spinlock is hold and the time it is released.
This way we can measure the latency for a task that wait for this
spinlock and the latency caused by the time this lock is hold.

I will surely add some improvements like:

_ Adding a stack trace to know which function locked or unlocked the kernel.
_ Adding a script to get some statistics about this: which functions hold it
often, which hold it for a long time and some other things....

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/bkl.h      |   29 ++++++++
 kernel/trace/Kconfig     |   11 +++
 kernel/trace/Makefile    |    1 +
 kernel/trace/trace.h     |   18 +++++
 kernel/trace/trace_bkl.c |  170 ++++++++++++++++++++++++++++++++++++++++++++++
 lib/kernel_lock.c        |   22 ++++++-
 6 files changed, 249 insertions(+), 2 deletions(-)

diff --git a/include/trace/bkl.h b/include/trace/bkl.h
new file mode 100644
index 0000000..bed2f1b
--- /dev/null
+++ b/include/trace/bkl.h
@@ -0,0 +1,29 @@
+#ifndef _TRACE_BKL_H
+#define _TRACE_BKL_H
+
+
+/* Entry which log the time when the task tried first to acquire the bkl
+ * and the time when it acquired it.
+ * That will let us know the latency to acquire the kernel lock.
+ */
+struct bkl_trace_acquire {
+	unsigned long long acquire_req_time;
+	unsigned long long acquire_time;
+};
+
+/* This will log the time when the task released the bkl.
+ * So we will know the whole duration of its kernel lock.
+ */
+struct bkl_trace_release {
+	unsigned long long release_time;
+};
+
+#ifdef CONFIG_BKL_TRACER
+extern void trace_bkl_acquire(struct bkl_trace_acquire *trace);
+extern void trace_bkl_release(struct bkl_trace_release *trace);
+#else
+static void trace_bkl_acquire(struct bkl_trace_acquire *trace) { }
+static void trace_bkl_release(struct bkl_trace_release *trace) { }
+#endif
+
+#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 396aea1..fde7700 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -144,6 +144,17 @@ config STACK_TRACER
 	 This tracer records the max stack of the kernel, and displays
 	 it in debugfs/tracing/stack_trace

+config BKL_TRACER
+	bool "Trace Big Kernel Lock latencies"
+	depends on HAVE_FTRACE
+	depends on DEBUG_KERNEL
+	select FTRACE
+	select STACKTRACE
+	help
+	  This tracer records the latencies issued by the Big Kernel Lock.
+	  It traces the time when a task request the bkl, the time when it
+	  acquires it and the time when it releases it.
+
 config DYNAMIC_FTRACE
 	bool "enable/disable ftrace tracepoints dynamically"
 	depends on FTRACE
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index a85dfba..2ba8904 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -24,5 +24,6 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
 obj-$(CONFIG_STACK_TRACER) += trace_stack.o
 obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
 obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
+obj-$(CONFIG_BKL_TRACER) += trace_bkl.o

 libftrace-y := ftrace.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 707620c..b1df671 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -8,6 +8,7 @@
 #include <linux/ring_buffer.h>
 #include <linux/mmiotrace.h>
 #include <linux/ftrace.h>
+#include <trace/bkl.h>

 enum trace_type {
 	__TRACE_FIRST_TYPE = 0,
@@ -22,6 +23,8 @@ enum trace_type {
 	TRACE_MMIO_RW,
 	TRACE_MMIO_MAP,
 	TRACE_BOOT,
+	TRACE_BKL_ACQ,
+	TRACE_BKL_REL,

 	__TRACE_LAST_TYPE
 };
@@ -119,6 +122,17 @@ struct trace_boot {
 	struct boot_trace	initcall;
 };

+/* Trace bkl entries */
+struct trace_bkl_acq {
+	struct trace_entry		ent;
+	struct bkl_trace_acquire	bkl_acq;
+};
+
+struct trace_bkl_rel {
+	struct trace_entry		ent;
+	struct bkl_trace_release	bkl_rel;
+};
+
 /*
 * trace_flag_type is an enumeration that holds different
 * states when a trace occurs. These are:
@@ -219,6 +233,10 @@ extern void __ftrace_bad_type(void);
 		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
 			  TRACE_MMIO_MAP);				\
 		IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT);     \
+		IF_ASSIGN(var, ent, struct trace_bkl_acq,               \
+			  TRACE_BKL_ACQ);				\
+		IF_ASSIGN(var, ent, struct trace_bkl_rel,		\
+			  TRACE_BKL_REL);				\
 		__ftrace_bad_type();					\
 	} while (0)

diff --git a/kernel/trace/trace_bkl.c b/kernel/trace/trace_bkl.c
new file mode 100644
index 0000000..169ee01
--- /dev/null
+++ b/kernel/trace/trace_bkl.c
@@ -0,0 +1,170 @@
+/*
+ * Ring buffer based Big Kernel Lock tracer
+ * for purpose of measuring the latencies issued
+ * by the BKL.
+ *
+ * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
+ */
+
+#include <trace/bkl.h>
+#include "trace.h"
+#include <linux/ftrace.h>
+
+
+static struct trace_array *bkl_trace;
+static bool bkl_tracer_enabled;
+
+
+static void bkl_trace_init(struct trace_array *tr)
+{
+	int cpu;
+	bkl_trace = tr;
+
+	for_each_cpu_mask(cpu, cpu_possible_map)
+		tracing_reset(tr, cpu);
+
+	if (tr->ctrl)
+		bkl_tracer_enabled = 1;
+}
+
+static void reset_bkl_trace(struct trace_array *tr)
+{
+	bkl_tracer_enabled = 0;
+}
+
+static void bkl_trace_ctrl_update(struct trace_array *tr)
+{
+	if (tr->ctrl)
+		bkl_tracer_enabled = 1;
+	else
+		bkl_tracer_enabled = 0;
+}
+
+static enum print_line_t bkl_acq_print_line(struct trace_iterator *iter)
+{
+	int ret;
+	struct trace_entry *entry = iter->ent;
+	struct trace_bkl_acq *field = (struct trace_bkl_acq *) entry;
+	struct bkl_trace_acquire *bta = &field->bkl_acq;
+	struct trace_seq *s = &iter->seq;
+	unsigned long sec, nsec_rem;
+	unsigned long long wait_for_spin;
+
+	wait_for_spin = bta->acquire_time - bta->acquire_req_time;
+	nsec_rem = do_div(bta->acquire_req_time, 1000000000);
+	sec = (unsigned long) bta->acquire_req_time;
+	ret = trace_seq_printf(s, "Kernel lock requested at %5lu.%09lu ",
+				  sec, nsec_rem);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+
+	nsec_rem = do_div(bta->acquire_time, 1000000000);
+	sec = (unsigned long) bta->acquire_time;
+	ret = trace_seq_printf(s, "acquired at %5lu.%09lu "
+				  "(waited %llu nsecs)\n",
+				  sec, nsec_rem,
+				  wait_for_spin);
+	if (!ret)
+		return TRACE_TYPE_PARTIAL_LINE;
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t bkl_rel_print_line(struct trace_iterator *iter)
+{
+	int ret;
+	struct trace_entry *entry = iter->ent;
+	struct trace_bkl_rel *field = (struct trace_bkl_rel *) entry;
+	struct bkl_trace_release *btr = &field->bkl_rel;
+	struct trace_seq *s = &iter->seq;
+	unsigned long sec, nsec_rem;
+
+	nsec_rem = do_div(btr->release_time, 1000000000);
+	sec = (unsigned long) btr->release_time;
+	ret = trace_seq_printf(s, "Kernel lock released at  %5lu.%09lu\n",
+				  sec, nsec_rem);
+	return TRACE_TYPE_HANDLED;
+}
+
+static enum print_line_t bkl_trace_print_line(struct trace_iterator *iter)
+{
+	struct trace_entry *entry = iter->ent;
+
+	if (entry->type == TRACE_BKL_ACQ)
+		return bkl_acq_print_line(iter);
+	if (entry->type == TRACE_BKL_REL)
+		return bkl_rel_print_line(iter);
+
+	return TRACE_TYPE_UNHANDLED;
+}
+
+static struct tracer bkl_tracer __read_mostly =
+{
+	.name		= "bkl",
+	.init		= bkl_trace_init,
+	.reset		= reset_bkl_trace,
+	.ctrl_update	= bkl_trace_ctrl_update,
+	.print_line	= bkl_trace_print_line,
+};
+
+__init static int init_bkl_tracer(void)
+{
+	return register_tracer(&bkl_tracer);
+}
+device_initcall(init_bkl_tracer);
+
+/*
+ * These functions don't need us to disable
+ * the preemption since it has been done by lock_kernel()
+ */
+
+void trace_bkl_acquire(struct bkl_trace_acquire *trace)
+{
+	struct ring_buffer_event *event;
+	struct trace_bkl_acq *entry;
+	struct trace_array_cpu *data;
+	unsigned long irq_flags;
+	struct trace_array *tr = bkl_trace;
+
+	if (!bkl_tracer_enabled)
+		return;
+
+	data = tr->data[smp_processor_id()];
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry = ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
+	entry->ent.type = TRACE_BKL_ACQ;
+	entry->bkl_acq = *trace;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+}
+
+void trace_bkl_release(struct bkl_trace_release *trace)
+{
+	struct ring_buffer_event *event;
+	struct trace_bkl_rel *entry;
+	struct trace_array_cpu *data;
+	unsigned long irq_flags;
+	struct trace_array *tr = bkl_trace;
+
+	if (!bkl_tracer_enabled)
+		return;
+
+	data = tr->data[smp_processor_id()];
+
+	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
+					 &irq_flags);
+	if (!event)
+		return;
+	entry   = ring_buffer_event_data(event);
+	tracing_generic_entry_update(&entry->ent, 0, 0);
+	entry->ent.type = TRACE_BKL_REL;
+	entry->bkl_rel = *trace;
+	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
+
+	trace_wake_up();
+}
diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
index 01a3c22..9987834 100644
--- a/lib/kernel_lock.c
+++ b/lib/kernel_lock.c
@@ -9,6 +9,7 @@
 #include <linux/module.h>
 #include <linux/kallsyms.h>
 #include <linux/semaphore.h>
+#include <trace/bkl.h>

 /*
 * The 'big kernel lock'
@@ -116,16 +117,33 @@ static inline void __unlock_kernel(void)
 void __lockfunc lock_kernel(void)
 {
 	int depth = current->lock_depth+1;
-	if (likely(!depth))
+	if (likely(!depth)) {
+#ifdef CONFIG_BKL_TRACER
+		struct bkl_trace_acquire trace;
+		preempt_disable();
+		trace.acquire_req_time = cpu_clock(raw_smp_processor_id());
+		preempt_enable();
 		__lock_kernel();
+		trace.acquire_time = cpu_clock(raw_smp_processor_id());
+		trace_bkl_acquire(&trace);
+#else
+		__lock_kernel();
+#endif
+	}
 	current->lock_depth = depth;
 }

 void __lockfunc unlock_kernel(void)
 {
 	BUG_ON(current->lock_depth < 0);
-	if (likely(--current->lock_depth < 0))
+	if (likely(--current->lock_depth < 0)) {
+#ifdef CONFIG_BKL_TRACER
+		struct bkl_trace_release trace;
+		trace.release_time = cpu_clock(raw_smp_processor_id());
+		trace_bkl_release(&trace);
+#endif
 		__unlock_kernel();
+	}
 }

 EXPORT_SYMBOL(lock_kernel);

^ permalink raw reply related	[flat|nested] 5+ messages in thread

* Re: [PATCH][RESEND] tracing/ftrace: Introduce the big kernel lock tracer
  2008-10-22 17:09 [PATCH][RESEND] tracing/ftrace: Introduce the big kernel lock tracer Frederic Weisbecker
@ 2008-10-22 17:27 ` Randy Dunlap
  2008-10-22 17:44   ` Frédéric Weisbecker
  2008-10-22 18:39 ` Steven Rostedt
  1 sibling, 1 reply; 5+ messages in thread
From: Randy Dunlap @ 2008-10-22 17:27 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Ingo Molnar, Steven Rostedt, Linux Kernel

Frederic Weisbecker wrote:
> Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
> Cc: Steven Rostedt <rostedt@goodmis.org>
> ---
>  include/trace/bkl.h      |   29 ++++++++
>  kernel/trace/Kconfig     |   11 +++
>  kernel/trace/Makefile    |    1 +
>  kernel/trace/trace.h     |   18 +++++
>  kernel/trace/trace_bkl.c |  170 ++++++++++++++++++++++++++++++++++++++++++++++
>  lib/kernel_lock.c        |   22 ++++++-
>  6 files changed, 249 insertions(+), 2 deletions(-)
> 
> diff --git a/include/trace/bkl.h b/include/trace/bkl.h
> new file mode 100644
> index 0000000..bed2f1b
> --- /dev/null
> +++ b/include/trace/bkl.h
> @@ -0,0 +1,29 @@
> +#ifndef _TRACE_BKL_H
> +#define _TRACE_BKL_H
> +
> +
> +/* Entry which log the time when the task tried first to acquire the bkl
> + * and the time when it acquired it.
> + * That will let us know the latency to acquire the kernel lock.
> + */

Please use the more common/accepted long-comment style:

/*
 * comment line 1
 * comment line 2
 */

like you did in some other places.

> +struct bkl_trace_acquire {
> +	unsigned long long acquire_req_time;
> +	unsigned long long acquire_time;
> +};
> +
> +/* This will log the time when the task released the bkl.
> + * So we will know the whole duration of its kernel lock.
> + */

ditto et al

> +struct bkl_trace_release {
> +	unsigned long long release_time;
> +};
> +
> +#ifdef CONFIG_BKL_TRACER
> +extern void trace_bkl_acquire(struct bkl_trace_acquire *trace);
> +extern void trace_bkl_release(struct bkl_trace_release *trace);
> +#else
> +static void trace_bkl_acquire(struct bkl_trace_acquire *trace) { }
> +static void trace_bkl_release(struct bkl_trace_release *trace) { }
> +#endif
> +
> +#endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 396aea1..fde7700 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -144,6 +144,17 @@ config STACK_TRACER
>  	 This tracer records the max stack of the kernel, and displays
>  	 it in debugfs/tracing/stack_trace
> 
> +config BKL_TRACER
> +	bool "Trace Big Kernel Lock latencies"
> +	depends on HAVE_FTRACE
> +	depends on DEBUG_KERNEL
> +	select FTRACE
> +	select STACKTRACE
> +	help
> +	  This tracer records the latencies issued by the Big Kernel Lock.
> +	  It traces the time when a task request the bkl, the time when it

	                                 requests

> +	  acquires it and the time when it releases it.
> +
>  config DYNAMIC_FTRACE
>  	bool "enable/disable ftrace tracepoints dynamically"
>  	depends on FTRACE

> diff --git a/kernel/trace/trace_bkl.c b/kernel/trace/trace_bkl.c
> new file mode 100644
> index 0000000..169ee01
> --- /dev/null
> +++ b/kernel/trace/trace_bkl.c
> @@ -0,0 +1,170 @@
> +/*
> + * Ring buffer based Big Kernel Lock tracer
> + * for purpose of measuring the latencies issued
> + * by the BKL.
> + *
> + * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
> + */
> +
> +#include <trace/bkl.h>
> +#include "trace.h"
> +#include <linux/ftrace.h>
> +
> +
> +static struct trace_array *bkl_trace;
> +static bool bkl_tracer_enabled;
> +
> +
> +static void bkl_trace_init(struct trace_array *tr)
> +{
> +	int cpu;
> +	bkl_trace = tr;

what is <bkl_trace> used for?  or unused?

> +
> +	for_each_cpu_mask(cpu, cpu_possible_map)
> +		tracing_reset(tr, cpu);
> +
> +	if (tr->ctrl)
> +		bkl_tracer_enabled = 1;
> +}


~Randy


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][RESEND] tracing/ftrace: Introduce the big kernel lock tracer
  2008-10-22 17:27 ` Randy Dunlap
@ 2008-10-22 17:44   ` Frédéric Weisbecker
  0 siblings, 0 replies; 5+ messages in thread
From: Frédéric Weisbecker @ 2008-10-22 17:44 UTC (permalink / raw)
  To: Randy Dunlap; +Cc: Ingo Molnar, Steven Rostedt, Linux Kernel

2008/10/22 Randy Dunlap <randy.dunlap@oracle.com>
>
> Please use the more common/accepted long-comment style:
>
> /*
>  * comment line 1
>  * comment line 2
>  */
>

Ok, I will correct it.


>                                         requests

Oops....


> what is <bkl_trace> used for?  or unused?

It is used on trace_bkl_acquire and trace_bkl_release functions. It is
really necessary
to keep a reference to the global trace array.

I can send a new patch with the corrections or send a new one which
applies on this one
if it is accepted.
As you want Ingo...

^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][RESEND] tracing/ftrace: Introduce the big kernel lock tracer
  2008-10-22 17:09 [PATCH][RESEND] tracing/ftrace: Introduce the big kernel lock tracer Frederic Weisbecker
  2008-10-22 17:27 ` Randy Dunlap
@ 2008-10-22 18:39 ` Steven Rostedt
  2008-10-22 18:54   ` Frédéric Weisbecker
  1 sibling, 1 reply; 5+ messages in thread
From: Steven Rostedt @ 2008-10-22 18:39 UTC (permalink / raw)
  To: Frederic Weisbecker; +Cc: Ingo Molnar, Linux Kernel


On Wed, 22 Oct 2008, Frederic Weisbecker wrote:
> 
> diff --git a/include/trace/bkl.h b/include/trace/bkl.h
> new file mode 100644
> index 0000000..bed2f1b
> --- /dev/null
> +++ b/include/trace/bkl.h
> @@ -0,0 +1,29 @@
> +#ifndef _TRACE_BKL_H
> +#define _TRACE_BKL_H
> +
> +
> +/* Entry which log the time when the task tried first to acquire the bkl
> + * and the time when it acquired it.
> + * That will let us know the latency to acquire the kernel lock.
> + */

We recommend the 

  /*
   * Multi line
   * comment style.
   */


> +struct bkl_trace_acquire {
> +	unsigned long long acquire_req_time;
> +	unsigned long long acquire_time;
> +};
> +
> +/* This will log the time when the task released the bkl.
> + * So we will know the whole duration of its kernel lock.
> + */
> +struct bkl_trace_release {
> +	unsigned long long release_time;
> +};
> +
> +#ifdef CONFIG_BKL_TRACER
> +extern void trace_bkl_acquire(struct bkl_trace_acquire *trace);
> +extern void trace_bkl_release(struct bkl_trace_release *trace);
> +#else
> +static void trace_bkl_acquire(struct bkl_trace_acquire *trace) { }
> +static void trace_bkl_release(struct bkl_trace_release *trace) { }

I think you want these as "static inline"

> +#endif
> +
> +#endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index 396aea1..fde7700 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -144,6 +144,17 @@ config STACK_TRACER
>  	 This tracer records the max stack of the kernel, and displays
>  	 it in debugfs/tracing/stack_trace
> 
> +config BKL_TRACER
> +	bool "Trace Big Kernel Lock latencies"
> +	depends on HAVE_FTRACE

You don't need FTRACE for this.

> +	depends on DEBUG_KERNEL
> +	select FTRACE

Again, you don't need ftrace for this. You should just be able to trace 
the latencies and report the places that are big. If FTRACE is configured 
in, then you can also get the function trace.  We have patches to replace
FTRACE with FUNCTION_TRACER to eliminate this confusion.


> +	select STACKTRACE
> +	help
> +	  This tracer records the latencies issued by the Big Kernel Lock.
> +	  It traces the time when a task request the bkl, the time when it
> +	  acquires it and the time when it releases it.
> +
>  config DYNAMIC_FTRACE
>  	bool "enable/disable ftrace tracepoints dynamically"
>  	depends on FTRACE
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index a85dfba..2ba8904 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -24,5 +24,6 @@ obj-$(CONFIG_NOP_TRACER) += trace_nop.o
>  obj-$(CONFIG_STACK_TRACER) += trace_stack.o
>  obj-$(CONFIG_MMIOTRACE) += trace_mmiotrace.o
>  obj-$(CONFIG_BOOT_TRACER) += trace_boot.o
> +obj-$(CONFIG_BKL_TRACER) += trace_bkl.o
> 
>  libftrace-y := ftrace.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 707620c..b1df671 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -8,6 +8,7 @@
>  #include <linux/ring_buffer.h>
>  #include <linux/mmiotrace.h>
>  #include <linux/ftrace.h>
> +#include <trace/bkl.h>
> 
>  enum trace_type {
>  	__TRACE_FIRST_TYPE = 0,
> @@ -22,6 +23,8 @@ enum trace_type {
>  	TRACE_MMIO_RW,
>  	TRACE_MMIO_MAP,
>  	TRACE_BOOT,
> +	TRACE_BKL_ACQ,
> +	TRACE_BKL_REL,
> 
>  	__TRACE_LAST_TYPE
>  };
> @@ -119,6 +122,17 @@ struct trace_boot {
>  	struct boot_trace	initcall;
>  };
> 
> +/* Trace bkl entries */
> +struct trace_bkl_acq {
> +	struct trace_entry		ent;
> +	struct bkl_trace_acquire	bkl_acq;
> +};
> +
> +struct trace_bkl_rel {
> +	struct trace_entry		ent;
> +	struct bkl_trace_release	bkl_rel;
> +};
> +
>  /*
>  * trace_flag_type is an enumeration that holds different
>  * states when a trace occurs. These are:
> @@ -219,6 +233,10 @@ extern void __ftrace_bad_type(void);
>  		IF_ASSIGN(var, ent, struct trace_mmiotrace_map,		\
>  			  TRACE_MMIO_MAP);				\
>  		IF_ASSIGN(var, ent, struct trace_boot, TRACE_BOOT);     \
> +		IF_ASSIGN(var, ent, struct trace_bkl_acq,               \
> +			  TRACE_BKL_ACQ);				\
> +		IF_ASSIGN(var, ent, struct trace_bkl_rel,		\
> +			  TRACE_BKL_REL);				\
>  		__ftrace_bad_type();					\
>  	} while (0)
> 
> diff --git a/kernel/trace/trace_bkl.c b/kernel/trace/trace_bkl.c
> new file mode 100644
> index 0000000..169ee01
> --- /dev/null
> +++ b/kernel/trace/trace_bkl.c
> @@ -0,0 +1,170 @@
> +/*
> + * Ring buffer based Big Kernel Lock tracer
> + * for purpose of measuring the latencies issued
> + * by the BKL.
> + *
> + * Copyright (C) 2008 Frederic Weisbecker <fweisbec@gmail.com>
> + */
> +
> +#include <trace/bkl.h>
> +#include "trace.h"
> +#include <linux/ftrace.h>
> +
> +
> +static struct trace_array *bkl_trace;
> +static bool bkl_tracer_enabled;
> +
> +
> +static void bkl_trace_init(struct trace_array *tr)
> +{
> +	int cpu;
> +	bkl_trace = tr;
> +
> +	for_each_cpu_mask(cpu, cpu_possible_map)
> +		tracing_reset(tr, cpu);
> +
> +	if (tr->ctrl)
> +		bkl_tracer_enabled = 1;
> +}
> +
> +static void reset_bkl_trace(struct trace_array *tr)
> +{
> +	bkl_tracer_enabled = 0;
> +}
> +
> +static void bkl_trace_ctrl_update(struct trace_array *tr)
> +{
> +	if (tr->ctrl)
> +		bkl_tracer_enabled = 1;
> +	else
> +		bkl_tracer_enabled = 0;
> +}
> +
> +static enum print_line_t bkl_acq_print_line(struct trace_iterator *iter)
> +{
> +	int ret;
> +	struct trace_entry *entry = iter->ent;
> +	struct trace_bkl_acq *field = (struct trace_bkl_acq *) entry;

Could you use the trace_assign_type(field, entry) instead.
See trace.c for examples.


> +	struct bkl_trace_acquire *bta = &field->bkl_acq;
> +	struct trace_seq *s = &iter->seq;
> +	unsigned long sec, nsec_rem;
> +	unsigned long long wait_for_spin;
> +
> +	wait_for_spin = bta->acquire_time - bta->acquire_req_time;
> +	nsec_rem = do_div(bta->acquire_req_time, 1000000000);
> +	sec = (unsigned long) bta->acquire_req_time;
> +	ret = trace_seq_printf(s, "Kernel lock requested at %5lu.%09lu ",
> +				  sec, nsec_rem);
> +	if (!ret)
> +		return TRACE_TYPE_PARTIAL_LINE;
> +
> +	nsec_rem = do_div(bta->acquire_time, 1000000000);
> +	sec = (unsigned long) bta->acquire_time;
> +	ret = trace_seq_printf(s, "acquired at %5lu.%09lu "
> +				  "(waited %llu nsecs)\n",
> +				  sec, nsec_rem,
> +				  wait_for_spin);
> +	if (!ret)
> +		return TRACE_TYPE_PARTIAL_LINE;
> +	return TRACE_TYPE_HANDLED;
> +}
> +
> +static enum print_line_t bkl_rel_print_line(struct trace_iterator *iter)
> +{
> +	int ret;
> +	struct trace_entry *entry = iter->ent;
> +	struct trace_bkl_rel *field = (struct trace_bkl_rel *) entry;

Same here.

> +	struct bkl_trace_release *btr = &field->bkl_rel;
> +	struct trace_seq *s = &iter->seq;
> +	unsigned long sec, nsec_rem;
> +
> +	nsec_rem = do_div(btr->release_time, 1000000000);
> +	sec = (unsigned long) btr->release_time;
> +	ret = trace_seq_printf(s, "Kernel lock released at  %5lu.%09lu\n",
> +				  sec, nsec_rem);
> +	return TRACE_TYPE_HANDLED;
> +}
> +
> +static enum print_line_t bkl_trace_print_line(struct trace_iterator *iter)
> +{
> +	struct trace_entry *entry = iter->ent;
> +

You might want to make this into a switch instead.

> +	if (entry->type == TRACE_BKL_ACQ)
> +		return bkl_acq_print_line(iter);
> +	if (entry->type == TRACE_BKL_REL)
> +		return bkl_rel_print_line(iter);
> +
> +	return TRACE_TYPE_UNHANDLED;
> +}
> +
> +static struct tracer bkl_tracer __read_mostly =
> +{
> +	.name		= "bkl",
> +	.init		= bkl_trace_init,
> +	.reset		= reset_bkl_trace,
> +	.ctrl_update	= bkl_trace_ctrl_update,
> +	.print_line	= bkl_trace_print_line,
> +};
> +
> +__init static int init_bkl_tracer(void)
> +{
> +	return register_tracer(&bkl_tracer);
> +}
> +device_initcall(init_bkl_tracer);
> +
> +/*
> + * These functions don't need us to disable
> + * the preemption since it has been done by lock_kernel()
> + */
> +
> +void trace_bkl_acquire(struct bkl_trace_acquire *trace)
> +{
> +	struct ring_buffer_event *event;
> +	struct trace_bkl_acq *entry;
> +	struct trace_array_cpu *data;
> +	unsigned long irq_flags;
> +	struct trace_array *tr = bkl_trace;
> +
> +	if (!bkl_tracer_enabled)
> +		return;
> +
> +	data = tr->data[smp_processor_id()];

data is unused.

> +
> +	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
> +					 &irq_flags);
> +	if (!event)
> +		return;
> +	entry = ring_buffer_event_data(event);
> +	tracing_generic_entry_update(&entry->ent, 0, 0);
> +	entry->ent.type = TRACE_BKL_ACQ;
> +	entry->bkl_acq = *trace;
> +	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> +	trace_wake_up();
> +}
> +
> +void trace_bkl_release(struct bkl_trace_release *trace)
> +{
> +	struct ring_buffer_event *event;
> +	struct trace_bkl_rel *entry;
> +	struct trace_array_cpu *data;
> +	unsigned long irq_flags;
> +	struct trace_array *tr = bkl_trace;
> +
> +	if (!bkl_tracer_enabled)
> +		return;
> +
> +	data = tr->data[smp_processor_id()];

data is unused here too.

> +
> +	event = ring_buffer_lock_reserve(tr->buffer, sizeof(*entry),
> +					 &irq_flags);
> +	if (!event)
> +		return;
> +	entry   = ring_buffer_event_data(event);
> +	tracing_generic_entry_update(&entry->ent, 0, 0);
> +	entry->ent.type = TRACE_BKL_REL;
> +	entry->bkl_rel = *trace;
> +	ring_buffer_unlock_commit(tr->buffer, event, irq_flags);
> +
> +	trace_wake_up();
> +}
> diff --git a/lib/kernel_lock.c b/lib/kernel_lock.c
> index 01a3c22..9987834 100644
> --- a/lib/kernel_lock.c
> +++ b/lib/kernel_lock.c
> @@ -9,6 +9,7 @@
>  #include <linux/module.h>
>  #include <linux/kallsyms.h>
>  #include <linux/semaphore.h>
> +#include <trace/bkl.h>
> 
>  /*
>  * The 'big kernel lock'
> @@ -116,16 +117,33 @@ static inline void __unlock_kernel(void)
>  void __lockfunc lock_kernel(void)
>  {
>  	int depth = current->lock_depth+1;
> -	if (likely(!depth))
> +	if (likely(!depth)) {
> +#ifdef CONFIG_BKL_TRACER
> +		struct bkl_trace_acquire trace;
> +		preempt_disable();
> +		trace.acquire_req_time = cpu_clock(raw_smp_processor_id());
> +		preempt_enable();
>  		__lock_kernel();
> +		trace.acquire_time = cpu_clock(raw_smp_processor_id());

Note, the cpu retuned here may not be the same as the above processor id.

> +		trace_bkl_acquire(&trace);
> +#else
> +		__lock_kernel();
> +#endif

The above looks a bit ugly. Could you do something perhaps at the 
beginning of the file like:

#ifdef CONFIG_BKL_TRACER
static void lock_kernel_trace(void) {
	struct bkl_trace_acquire trace;

	preempt_disable();
	trace.acquire_req_time = cpu_clock(raw_smp_processor_id());
	preempt_enable();
	 __lock_kernel();
	trace.acquire_time = cpu_clock(raw_smp_processor_id());
	trace_bkl_acquire(&trace);
}
#else
# define lock_kernel_trace() __lock_kernel();
#endif

And then just add the lock_kernel_trace within the code.

> +	}
>  	current->lock_depth = depth;
>  }
> 
>  void __lockfunc unlock_kernel(void)
>  {
>  	BUG_ON(current->lock_depth < 0);
> -	if (likely(--current->lock_depth < 0))
> +	if (likely(--current->lock_depth < 0)) {
> +#ifdef CONFIG_BKL_TRACER
> +		struct bkl_trace_release trace;
> +		trace.release_time = cpu_clock(raw_smp_processor_id());
> +		trace_bkl_release(&trace);
> +#endif

Same here. Have an unlock_kernel_trace();

>  		__unlock_kernel();
> +	}
>  }
> 
>  EXPORT_SYMBOL(lock_kernel);
> 
> 

-- Steve


^ permalink raw reply	[flat|nested] 5+ messages in thread

* Re: [PATCH][RESEND] tracing/ftrace: Introduce the big kernel lock tracer
  2008-10-22 18:39 ` Steven Rostedt
@ 2008-10-22 18:54   ` Frédéric Weisbecker
  0 siblings, 0 replies; 5+ messages in thread
From: Frédéric Weisbecker @ 2008-10-22 18:54 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Ingo Molnar, Linux Kernel

Ok, I will send a V2 to apply your comments.

Thanks!

^ permalink raw reply	[flat|nested] 5+ messages in thread

end of thread, other threads:[~2008-10-22 18:57 UTC | newest]

Thread overview: 5+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2008-10-22 17:09 [PATCH][RESEND] tracing/ftrace: Introduce the big kernel lock tracer Frederic Weisbecker
2008-10-22 17:27 ` Randy Dunlap
2008-10-22 17:44   ` Frédéric Weisbecker
2008-10-22 18:39 ` Steven Rostedt
2008-10-22 18:54   ` Frédéric Weisbecker

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox