linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-27 18:04 [RFC] ftrace, perf: Adding support to use function trace Jiri Olsa
@ 2011-11-27 18:04 ` Jiri Olsa
  2011-11-28 19:58   ` Steven Rostedt
  0 siblings, 1 reply; 19+ messages in thread
From: Jiri Olsa @ 2011-11-27 18:04 UTC (permalink / raw)
  To: rostedt, fweisbec, mingo, paulus, acme; +Cc: linux-kernel, Jiri Olsa

Adding perf registration support for the ftrace function event,
so it is now possible to register it via perf interface.

The perf_event struct statically contains ftrace_ops as a handle
for function tracer. The function tracer is registered/unregistered
in open/close actions, and enabled/disabled in add/del actions.

It is now possible to use function trace within perf commands
like:

  perf record -e ftrace:function ls
  perf stat -e ftrace:function ls

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
---
 include/linux/perf_event.h      |    3 +
 kernel/trace/trace_event_perf.c |   88 +++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_export.c     |   23 ++++++++++
 3 files changed, 114 insertions(+), 0 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 1e9ebe5..6071995 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -847,6 +847,9 @@ struct perf_event {
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call	*tp_event;
 	struct event_filter		*filter;
+#ifdef CONFIG_FUNCTION_TRACER
+	struct ftrace_ops               ftrace_ops;
+#endif
 #endif
 
 #ifdef CONFIG_CGROUP_PERF
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index d72af0b..4be0f73 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -250,3 +250,91 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
 	return raw_data;
 }
 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
+
+
+static void
+perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct ftrace_entry *entry;
+	struct hlist_head *head;
+	struct pt_regs regs;
+	int rctx;
+
+#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
+		    sizeof(u64)) - sizeof(u32))
+
+	BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
+
+	perf_fetch_caller_regs(&regs);
+
+	entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
+	if (!entry)
+		return;
+
+	entry->ip = ip;
+	entry->parent_ip = parent_ip;
+
+	head = this_cpu_ptr(event_function.perf_events);
+	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
+			      1, &regs, head);
+
+#undef ENTRY_SIZE
+}
+
+static int perf_ftrace_function_register(struct perf_event *event)
+{
+	struct ftrace_ops *ops = &event->ftrace_ops;
+
+	ops->flags |= FTRACE_OPS_FL_CONTROL;
+	atomic_set(&ops->disabled, 1);
+	ops->func = perf_ftrace_function_call;
+	return register_ftrace_function(ops);
+}
+
+static int perf_ftrace_function_unregister(struct perf_event *event)
+{
+	struct ftrace_ops *ops = &event->ftrace_ops;
+	return unregister_ftrace_function(ops);
+}
+
+static void perf_ftrace_function_enable(struct perf_event *event)
+{
+	struct ftrace_ops *ops = &event->ftrace_ops;
+	enable_ftrace_function(ops);
+}
+
+static void perf_ftrace_function_disable(struct perf_event *event)
+{
+	struct ftrace_ops *ops = &event->ftrace_ops;
+	disable_ftrace_function(ops);
+}
+
+int perf_ftrace_event_register(struct ftrace_event_call *call,
+			       enum trace_reg type, void *data)
+{
+	int etype = call->event.type;
+
+	if (etype != TRACE_FN)
+		return -EINVAL;
+
+	switch (type) {
+	case TRACE_REG_REGISTER:
+	case TRACE_REG_UNREGISTER:
+		break;
+	case TRACE_REG_PERF_REGISTER:
+	case TRACE_REG_PERF_UNREGISTER:
+		return 0;
+	case TRACE_REG_PERF_OPEN:
+		return perf_ftrace_function_register(data);
+	case TRACE_REG_PERF_CLOSE:
+		return perf_ftrace_function_unregister(data);
+	case TRACE_REG_PERF_ADD:
+		perf_ftrace_function_enable(data);
+		return 0;
+	case TRACE_REG_PERF_DEL:
+		perf_ftrace_function_disable(data);
+		return 0;
+	}
+
+	return -EINVAL;
+}
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index bbeec31..62e86a5 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -131,6 +131,28 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
 
 #include "trace_entries.h"
 
+static int ftrace_event_class_register(struct ftrace_event_call *call,
+				       enum trace_reg type, void *data)
+{
+	switch (type) {
+	case TRACE_REG_PERF_REGISTER:
+	case TRACE_REG_PERF_UNREGISTER:
+		return 0;
+	case TRACE_REG_PERF_OPEN:
+	case TRACE_REG_PERF_CLOSE:
+	case TRACE_REG_PERF_ADD:
+	case TRACE_REG_PERF_DEL:
+#ifdef CONFIG_PERF_EVENTS
+		return perf_ftrace_event_register(call, type, data);
+#endif
+	case TRACE_REG_REGISTER:
+	case TRACE_REG_UNREGISTER:
+		break;
+	}
+
+	return -EINVAL;
+}
+
 #undef __entry
 #define __entry REC
 
@@ -159,6 +181,7 @@ struct ftrace_event_class event_class_ftrace_##call = {			\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.define_fields		= ftrace_define_fields_##call,		\
 	.fields			= LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
+	.reg			= ftrace_event_class_register,		\
 };									\
 									\
 struct ftrace_event_call __used event_##call = {			\
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-27 18:04 ` [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf Jiri Olsa
@ 2011-11-28 19:58   ` Steven Rostedt
  2011-11-28 20:03     ` Peter Zijlstra
  2011-11-28 20:08     ` Peter Zijlstra
  0 siblings, 2 replies; 19+ messages in thread
From: Steven Rostedt @ 2011-11-28 19:58 UTC (permalink / raw)
  To: Jiri Olsa; +Cc: fweisbec, mingo, paulus, acme, linux-kernel, Peter Zijlstra

On Sun, 2011-11-27 at 19:04 +0100, Jiri Olsa wrote:
> Adding perf registration support for the ftrace function event,
> so it is now possible to register it via perf interface.
> 
> The perf_event struct statically contains ftrace_ops as a handle
> for function tracer. The function tracer is registered/unregistered
> in open/close actions, and enabled/disabled in add/del actions.
> 
> It is now possible to use function trace within perf commands
> like:
> 
>   perf record -e ftrace:function ls
>   perf stat -e ftrace:function ls

Question. This is a root only command, correct? Otherwise, we are
allowing any user to create a large performance impact to the system.

> 
> Signed-off-by: Jiri Olsa <jolsa@redhat.com>
> ---
>  include/linux/perf_event.h      |    3 +
>  kernel/trace/trace_event_perf.c |   88 +++++++++++++++++++++++++++++++++++++++
>  kernel/trace/trace_export.c     |   23 ++++++++++
>  3 files changed, 114 insertions(+), 0 deletions(-)
> 
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 1e9ebe5..6071995 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -847,6 +847,9 @@ struct perf_event {
>  #ifdef CONFIG_EVENT_TRACING
>  	struct ftrace_event_call	*tp_event;
>  	struct event_filter		*filter;
> +#ifdef CONFIG_FUNCTION_TRACER
> +	struct ftrace_ops               ftrace_ops;
> +#endif
>  #endif
>  
>  #ifdef CONFIG_CGROUP_PERF
> diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
> index d72af0b..4be0f73 100644
> --- a/kernel/trace/trace_event_perf.c
> +++ b/kernel/trace/trace_event_perf.c
> @@ -250,3 +250,91 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
>  	return raw_data;
>  }
>  EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
> +
> +
> +static void
> +perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
> +{
> +	struct ftrace_entry *entry;
> +	struct hlist_head *head;
> +	struct pt_regs regs;
> +	int rctx;
> +
> +#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
> +		    sizeof(u64)) - sizeof(u32))
> +
> +	BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
> +
> +	perf_fetch_caller_regs(&regs);
> +
> +	entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
> +	if (!entry)
> +		return;
> +
> +	entry->ip = ip;
> +	entry->parent_ip = parent_ip;
> +
> +	head = this_cpu_ptr(event_function.perf_events);
> +	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
> +			      1, &regs, head);
> +
> +#undef ENTRY_SIZE
> +}
> +
> +static int perf_ftrace_function_register(struct perf_event *event)
> +{
> +	struct ftrace_ops *ops = &event->ftrace_ops;
> +
> +	ops->flags |= FTRACE_OPS_FL_CONTROL;
> +	atomic_set(&ops->disabled, 1);
> +	ops->func = perf_ftrace_function_call;
> +	return register_ftrace_function(ops);

When is ADD called? Because as soon as you register this function, even
though you have it "disabled" the system takes about a 13% impact on
performance just by calling this.

> +}
> +
> +static int perf_ftrace_function_unregister(struct perf_event *event)
> +{
> +	struct ftrace_ops *ops = &event->ftrace_ops;
> +	return unregister_ftrace_function(ops);
> +}
> +
> +static void perf_ftrace_function_enable(struct perf_event *event)
> +{
> +	struct ftrace_ops *ops = &event->ftrace_ops;
> +	enable_ftrace_function(ops);

Is it really an issue that we shouldn't call the full blown register
instead? I'm not really understanding why this is a problem. Note, one
of the improvements to ftrace in the near future is to enable ftrace
without stop_machine.

-- Steve

> +}
> +
> +static void perf_ftrace_function_disable(struct perf_event *event)
> +{
> +	struct ftrace_ops *ops = &event->ftrace_ops;
> +	disable_ftrace_function(ops);
> +}
> +
> +int perf_ftrace_event_register(struct ftrace_event_call *call,
> +			       enum trace_reg type, void *data)
> +{
> +	int etype = call->event.type;
> +
> +	if (etype != TRACE_FN)
> +		return -EINVAL;
> +
> +	switch (type) {
> +	case TRACE_REG_REGISTER:
> +	case TRACE_REG_UNREGISTER:
> +		break;
> +	case TRACE_REG_PERF_REGISTER:
> +	case TRACE_REG_PERF_UNREGISTER:
> +		return 0;
> +	case TRACE_REG_PERF_OPEN:
> +		return perf_ftrace_function_register(data);
> +	case TRACE_REG_PERF_CLOSE:
> +		return perf_ftrace_function_unregister(data);
> +	case TRACE_REG_PERF_ADD:
> +		perf_ftrace_function_enable(data);
> +		return 0;
> +	case TRACE_REG_PERF_DEL:
> +		perf_ftrace_function_disable(data);
> +		return 0;
> +	}
> +
> +	return -EINVAL;
> +}
> diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
> index bbeec31..62e86a5 100644
> --- a/kernel/trace/trace_export.c
> +++ b/kernel/trace/trace_export.c
> @@ -131,6 +131,28 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
>  
>  #include "trace_entries.h"
>  
> +static int ftrace_event_class_register(struct ftrace_event_call *call,
> +				       enum trace_reg type, void *data)
> +{
> +	switch (type) {
> +	case TRACE_REG_PERF_REGISTER:
> +	case TRACE_REG_PERF_UNREGISTER:
> +		return 0;
> +	case TRACE_REG_PERF_OPEN:
> +	case TRACE_REG_PERF_CLOSE:
> +	case TRACE_REG_PERF_ADD:
> +	case TRACE_REG_PERF_DEL:
> +#ifdef CONFIG_PERF_EVENTS
> +		return perf_ftrace_event_register(call, type, data);
> +#endif
> +	case TRACE_REG_REGISTER:
> +	case TRACE_REG_UNREGISTER:
> +		break;
> +	}
> +
> +	return -EINVAL;
> +}
> +
>  #undef __entry
>  #define __entry REC
>  
> @@ -159,6 +181,7 @@ struct ftrace_event_class event_class_ftrace_##call = {			\
>  	.system			= __stringify(TRACE_SYSTEM),		\
>  	.define_fields		= ftrace_define_fields_##call,		\
>  	.fields			= LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
> +	.reg			= ftrace_event_class_register,		\
>  };									\
>  									\
>  struct ftrace_event_call __used event_##call = {			\



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-28 19:58   ` Steven Rostedt
@ 2011-11-28 20:03     ` Peter Zijlstra
  2011-11-28 20:13       ` Steven Rostedt
  2011-11-28 20:08     ` Peter Zijlstra
  1 sibling, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2011-11-28 20:03 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Jiri Olsa, fweisbec, mingo, paulus, acme, linux-kernel

On Mon, 2011-11-28 at 14:58 -0500, Steven Rostedt wrote:
> >   perf stat -e ftrace:function ls
> 
> Question. This is a root only command, correct? Otherwise, we are
> allowing any user to create a large performance impact to the system. 

Typically not, although I haven't looked at Jiri's implementation of the
function tracepoint.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-28 19:58   ` Steven Rostedt
  2011-11-28 20:03     ` Peter Zijlstra
@ 2011-11-28 20:08     ` Peter Zijlstra
  2011-11-28 20:10       ` Peter Zijlstra
  1 sibling, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2011-11-28 20:08 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Jiri Olsa, fweisbec, mingo, paulus, acme, linux-kernel

On Mon, 2011-11-28 at 14:58 -0500, Steven Rostedt wrote:
> > +static int perf_ftrace_function_register(struct perf_event *event)
> > +{
> > +     struct ftrace_ops *ops = &event->ftrace_ops;
> > +
> > +     ops->flags |= FTRACE_OPS_FL_CONTROL;
> > +     atomic_set(&ops->disabled, 1);
> > +     ops->func = perf_ftrace_function_call;
> > +     return register_ftrace_function(ops);
> 
> When is ADD called? Because as soon as you register this function, even
> though you have it "disabled" the system takes about a 13% impact on
> performance just by calling this.

Typically at context switch time.

> > +static void perf_ftrace_function_enable(struct perf_event *event)
> > +{
> > +     struct ftrace_ops *ops = &event->ftrace_ops;
> > +     enable_ftrace_function(ops);
> 
> Is it really an issue that we shouldn't call the full blown register
> instead? I'm not really understanding why this is a problem. Note, one
> of the improvements to ftrace in the near future is to enable ftrace
> without stop_machine. 

Yeah, but still not something you want to do during context switches,
right?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-28 20:08     ` Peter Zijlstra
@ 2011-11-28 20:10       ` Peter Zijlstra
  2011-11-28 20:16         ` Steven Rostedt
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2011-11-28 20:10 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Jiri Olsa, fweisbec, mingo, paulus, acme, linux-kernel

On Mon, 2011-11-28 at 21:08 +0100, Peter Zijlstra wrote:
> > Is it really an issue that we shouldn't call the full blown register
> > instead? I'm not really understanding why this is a problem. Note, one
> > of the improvements to ftrace in the near future is to enable ftrace
> > without stop_machine. 
> 
> Yeah, but still not something you want to do during context switches,
> right? 

Also, that's x86, there's still archs that need stop_machine(), right?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-28 20:03     ` Peter Zijlstra
@ 2011-11-28 20:13       ` Steven Rostedt
  2011-11-29 10:10         ` Jiri Olsa
  0 siblings, 1 reply; 19+ messages in thread
From: Steven Rostedt @ 2011-11-28 20:13 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Jiri Olsa, fweisbec, mingo, paulus, acme, linux-kernel

On Mon, 2011-11-28 at 21:03 +0100, Peter Zijlstra wrote:
> On Mon, 2011-11-28 at 14:58 -0500, Steven Rostedt wrote:
> > >   perf stat -e ftrace:function ls
> > 
> > Question. This is a root only command, correct? Otherwise, we are
> > allowing any user to create a large performance impact to the system. 
> 
> Typically not, although I haven't looked at Jiri's implementation of the
> function tracepoint.
> 

I would not allow function tracing for non root users. It's just way too
invasive to let such a thing happen.

-- Steve




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-28 20:10       ` Peter Zijlstra
@ 2011-11-28 20:16         ` Steven Rostedt
  2011-11-28 20:18           ` Peter Zijlstra
  0 siblings, 1 reply; 19+ messages in thread
From: Steven Rostedt @ 2011-11-28 20:16 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Jiri Olsa, fweisbec, mingo, paulus, acme, linux-kernel

On Mon, 2011-11-28 at 21:10 +0100, Peter Zijlstra wrote:

> > Yeah, but still not something you want to do during context switches,
> > right? 
> 
> Also, that's x86, there's still archs that need stop_machine(), right?

s390 may need it, but till we add a work around there too.

But yeah, if this happens at context switch, then we want the fast
enable/disable that Jiri made.

-- Steve




^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-28 20:16         ` Steven Rostedt
@ 2011-11-28 20:18           ` Peter Zijlstra
  0 siblings, 0 replies; 19+ messages in thread
From: Peter Zijlstra @ 2011-11-28 20:18 UTC (permalink / raw)
  To: Steven Rostedt; +Cc: Jiri Olsa, fweisbec, mingo, paulus, acme, linux-kernel

On Mon, 2011-11-28 at 15:16 -0500, Steven Rostedt wrote:
> On Mon, 2011-11-28 at 21:10 +0100, Peter Zijlstra wrote:
> 
> > > Yeah, but still not something you want to do during context switches,
> > > right? 
> > 
> > Also, that's x86, there's still archs that need stop_machine(), right?
> 
> s390 may need it, but till we add a work around there too.
> 
> But yeah, if this happens at context switch, then we want the fast
> enable/disable that Jiri made.

Right, so the way tracepoint->perf works currently is we enable the
tracepoint when we create the event. The tracepoint handler runs a
per-cpu hlist. perf then registers with the per-cpu hlist on context
switch.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2011-11-28 20:13       ` Steven Rostedt
@ 2011-11-29 10:10         ` Jiri Olsa
  0 siblings, 0 replies; 19+ messages in thread
From: Jiri Olsa @ 2011-11-29 10:10 UTC (permalink / raw)
  To: Steven Rostedt
  Cc: Peter Zijlstra, fweisbec, mingo, paulus, acme, linux-kernel

On Mon, Nov 28, 2011 at 03:13:09PM -0500, Steven Rostedt wrote:
> On Mon, 2011-11-28 at 21:03 +0100, Peter Zijlstra wrote:
> > On Mon, 2011-11-28 at 14:58 -0500, Steven Rostedt wrote:
> > > >   perf stat -e ftrace:function ls
> > > 
> > > Question. This is a root only command, correct? Otherwise, we are
> > > allowing any user to create a large performance impact to the system. 
> > 
> > Typically not, although I haven't looked at Jiri's implementation of the
> > function tracepoint.
> > 
> 
> I would not allow function tracing for non root users. It's just way too
> invasive to let such a thing happen.

I believe it's for root only, I'll double check

jirka

> 
> -- Steve
> 
> 
> 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf
@ 2012-02-22 14:40 Steven Rostedt
  2012-02-22 14:40 ` [PATCH 1/9] tracing: Dont print an extra separator of flags Steven Rostedt
                   ` (8 more replies)
  0 siblings, 9 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo

[-- Attachment #1: Type: text/plain, Size: 1697 bytes --]


Ingo,

Please pull the latest tip/perf/core tree, which can be found at:

  git://git.kernel.org/pub/scm/linux/kernel/git/rostedt/linux-trace.git
tip/perf/core

Head SHA1: 5500fa51199aee770ce53718853732600543619e


Andrey Vagin (1):
      tracing: Don't print an extra separator of flags

Jiri Olsa (7):
      ftrace: Add enable/disable ftrace_ops control interface
      ftrace, perf: Add open/close tracepoint perf registration actions
      ftrace, perf: Add add/del tracepoint perf registration actions
      ftrace: Add FTRACE_ENTRY_REG macro to allow event registration
      ftrace, perf: Add support to use function tracepoint in perf
      ftrace: Allow to specify filter field type for ftrace events
      ftrace, perf: Add filter support for function trace event

Steven Rostedt (1):
      tracing: Don't use p->len field to determine output in __print_*() functions

----
 include/linux/ftrace.h             |   73 ++++++++++++-
 include/linux/ftrace_event.h       |    9 ++-
 include/linux/perf_event.h         |    3 +
 kernel/trace/ftrace.c              |  117 +++++++++++++++++++--
 kernel/trace/trace.h               |   38 +++++--
 kernel/trace/trace_entries.h       |   54 +++++++---
 kernel/trace/trace_event_perf.c    |  208 ++++++++++++++++++++++++++++-------
 kernel/trace/trace_events.c        |   12 ++-
 kernel/trace/trace_events_filter.c |  168 ++++++++++++++++++++++++++++-
 kernel/trace/trace_export.c        |   64 ++++++++----
 kernel/trace/trace_kprobe.c        |    8 ++-
 kernel/trace/trace_output.c        |   12 ++-
 kernel/trace/trace_syscalls.c      |   18 +++-
 13 files changed, 667 insertions(+), 117 deletions(-)

[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply	[flat|nested] 19+ messages in thread

* [PATCH 1/9] tracing: Dont print an extra separator of flags
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  2012-02-22 14:40 ` [PATCH 2/9] tracing: Dont use p->len field to determine output in __print_*() functions Steven Rostedt
                   ` (7 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Andrew Vagin, Ingo Molnar

[-- Attachment #1: Type: text/plain, Size: 1667 bytes --]

From: Andrey Vagin <avagin@openvz.org>

If __print_flags() is used after another __print_*() function, the
temp seq_file buffer will not be empty on entry, and the delimiter will
be printed even though there's just one field. We get something like:

	|S

instead of just:

	S

This is because the length of the temp seq buffer is used to determine
if the delimiter is printed or not. But this algorithm fails when
the seq buffer is not empty on entry, and the delimiter will be printed
because it thinks that a previous field was already printed.

Link: http://lkml.kernel.org/r/1329650167-480655-1-git-send-email-avagin@openvz.org

Signed-off-by: Andrew Vagin <avagin@openvz.org>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_output.c |    6 ++++--
 1 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 0d6ff355..3efd718 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -300,7 +300,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 	unsigned long mask;
 	const char *str;
 	const char *ret = p->buffer + p->len;
-	int i;
+	int i, first = 1;
 
 	for (i = 0;  flag_array[i].name && flags; i++) {
 
@@ -310,8 +310,10 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 
 		str = flag_array[i].name;
 		flags &= ~mask;
-		if (p->len && delim)
+		if (!first && delim)
 			trace_seq_puts(p, delim);
+		else
+			first = 0;
 		trace_seq_puts(p, str);
 	}
 
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/9] tracing: Dont use p->len field to determine output in __print_*() functions
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
  2012-02-22 14:40 ` [PATCH 1/9] tracing: Dont print an extra separator of flags Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  2012-02-22 14:40 ` [PATCH 3/9] ftrace: Add enable/disable ftrace_ops control interface Steven Rostedt
                   ` (6 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Andrew Vagin

[-- Attachment #1: Type: text/plain, Size: 1577 bytes --]

From: Steven Rostedt <srostedt@redhat.com>

If more than one __print_*() function is used in a tracepoint
(__print_flags(), __print_symbols(), etc), then the temp seq buffer will
not be zero on entry. Using the temp seq buffer's length to know if
data has been printed or not in the current function is incorrect and
may produce incorrect results.

Currently, no in-tree tracepoint causes this bug, but new ones may
be created.

Cc: Andrew Vagin <avagin@openvz.org>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace_output.c |    6 +++---
 1 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 3efd718..c5a0187 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -319,7 +319,7 @@ ftrace_print_flags_seq(struct trace_seq *p, const char *delim,
 
 	/* check for left over flags */
 	if (flags) {
-		if (p->len && delim)
+		if (!first && delim)
 			trace_seq_puts(p, delim);
 		trace_seq_printf(p, "0x%lx", flags);
 	}
@@ -346,7 +346,7 @@ ftrace_print_symbols_seq(struct trace_seq *p, unsigned long val,
 		break;
 	}
 
-	if (!p->len)
+	if (ret == (const char *)(p->buffer + p->len))
 		trace_seq_printf(p, "0x%lx", val);
 		
 	trace_seq_putc(p, 0);
@@ -372,7 +372,7 @@ ftrace_print_symbols_seq_u64(struct trace_seq *p, unsigned long long val,
 		break;
 	}
 
-	if (!p->len)
+	if (ret == (const char *)(p->buffer + p->len))
 		trace_seq_printf(p, "0x%llx", val);
 
 	trace_seq_putc(p, 0);
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 3/9] ftrace: Add enable/disable ftrace_ops control interface
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
  2012-02-22 14:40 ` [PATCH 1/9] tracing: Dont print an extra separator of flags Steven Rostedt
  2012-02-22 14:40 ` [PATCH 2/9] tracing: Dont use p->len field to determine output in __print_*() functions Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  2012-02-22 14:40 ` [PATCH 4/9] ftrace, perf: Add open/close tracepoint perf registration actions Steven Rostedt
                   ` (5 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Jiri Olsa

[-- Attachment #1: Type: text/plain, Size: 11091 bytes --]

From: Jiri Olsa <jolsa@redhat.com>

Adding a way to temporarily enable/disable ftrace_ops. The change
follows the same way as 'global' ftrace_ops are done.

Introducing 2 global ftrace_ops - control_ops and ftrace_control_list
which take over all ftrace_ops registered with FTRACE_OPS_FL_CONTROL
flag. In addition new per cpu flag called 'disabled' is also added to
ftrace_ops to provide the control information for each cpu.

When ftrace_ops with FTRACE_OPS_FL_CONTROL is registered, it is
set as disabled for all cpus.

The ftrace_control_list contains all the registered 'control' ftrace_ops.
The control_ops provides function which iterates ftrace_control_list
and does the check for 'disabled' flag on current cpu.

Adding 3 inline functions:
  ftrace_function_local_disable/ftrace_function_local_enable
  - enable/disable the ftrace_ops on current cpu
  ftrace_function_local_disabled
  - get disabled ftrace_ops::disabled value for current cpu

Link: http://lkml.kernel.org/r/1329317514-8131-2-git-send-email-jolsa@redhat.com

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   66 ++++++++++++++++++++++++++++
 kernel/trace/ftrace.c  |  111 +++++++++++++++++++++++++++++++++++++++++++++---
 kernel/trace/trace.h   |    2 +
 3 files changed, 172 insertions(+), 7 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f33fb3b..64a309d 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -31,16 +31,33 @@ ftrace_enable_sysctl(struct ctl_table *table, int write,
 
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip);
 
+/*
+ * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
+ * set in the flags member.
+ *
+ * ENABLED - set/unset when ftrace_ops is registered/unregistered
+ * GLOBAL  - set manualy by ftrace_ops user to denote the ftrace_ops
+ *           is part of the global tracers sharing the same filter
+ *           via set_ftrace_* debugfs files.
+ * DYNAMIC - set when ftrace_ops is registered to denote dynamically
+ *           allocated ftrace_ops which need special care
+ * CONTROL - set manualy by ftrace_ops user to denote the ftrace_ops
+ *           could be controled by following calls:
+ *             ftrace_function_local_enable
+ *             ftrace_function_local_disable
+ */
 enum {
 	FTRACE_OPS_FL_ENABLED		= 1 << 0,
 	FTRACE_OPS_FL_GLOBAL		= 1 << 1,
 	FTRACE_OPS_FL_DYNAMIC		= 1 << 2,
+	FTRACE_OPS_FL_CONTROL		= 1 << 3,
 };
 
 struct ftrace_ops {
 	ftrace_func_t			func;
 	struct ftrace_ops		*next;
 	unsigned long			flags;
+	int __percpu			*disabled;
 #ifdef CONFIG_DYNAMIC_FTRACE
 	struct ftrace_hash		*notrace_hash;
 	struct ftrace_hash		*filter_hash;
@@ -97,6 +114,55 @@ int register_ftrace_function(struct ftrace_ops *ops);
 int unregister_ftrace_function(struct ftrace_ops *ops);
 void clear_ftrace_function(void);
 
+/**
+ * ftrace_function_local_enable - enable controlled ftrace_ops on current cpu
+ *
+ * This function enables tracing on current cpu by decreasing
+ * the per cpu control variable.
+ * It must be called with preemption disabled and only on ftrace_ops
+ * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
+ */
+static inline void ftrace_function_local_enable(struct ftrace_ops *ops)
+{
+	if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
+		return;
+
+	(*this_cpu_ptr(ops->disabled))--;
+}
+
+/**
+ * ftrace_function_local_disable - enable controlled ftrace_ops on current cpu
+ *
+ * This function enables tracing on current cpu by decreasing
+ * the per cpu control variable.
+ * It must be called with preemption disabled and only on ftrace_ops
+ * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
+ */
+static inline void ftrace_function_local_disable(struct ftrace_ops *ops)
+{
+	if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL)))
+		return;
+
+	(*this_cpu_ptr(ops->disabled))++;
+}
+
+/**
+ * ftrace_function_local_disabled - returns ftrace_ops disabled value
+ *                                  on current cpu
+ *
+ * This function returns value of ftrace_ops::disabled on current cpu.
+ * It must be called with preemption disabled and only on ftrace_ops
+ * registered with FTRACE_OPS_FL_CONTROL. If called without preemption
+ * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled.
+ */
+static inline int ftrace_function_local_disabled(struct ftrace_ops *ops)
+{
+	WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_CONTROL));
+	return *this_cpu_ptr(ops->disabled);
+}
+
 extern void ftrace_stub(unsigned long a0, unsigned long a1);
 
 #else /* !CONFIG_FUNCTION_TRACER */
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index d1499e9..f615f97 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -62,6 +62,8 @@
 #define FTRACE_HASH_DEFAULT_BITS 10
 #define FTRACE_HASH_MAX_BITS 12
 
+#define FL_GLOBAL_CONTROL_MASK (FTRACE_OPS_FL_GLOBAL | FTRACE_OPS_FL_CONTROL)
+
 /* ftrace_enabled is a method to turn ftrace on or off */
 int ftrace_enabled __read_mostly;
 static int last_ftrace_enabled;
@@ -89,12 +91,14 @@ static struct ftrace_ops ftrace_list_end __read_mostly = {
 };
 
 static struct ftrace_ops *ftrace_global_list __read_mostly = &ftrace_list_end;
+static struct ftrace_ops *ftrace_control_list __read_mostly = &ftrace_list_end;
 static struct ftrace_ops *ftrace_ops_list __read_mostly = &ftrace_list_end;
 ftrace_func_t ftrace_trace_function __read_mostly = ftrace_stub;
 static ftrace_func_t __ftrace_trace_function_delay __read_mostly = ftrace_stub;
 ftrace_func_t __ftrace_trace_function __read_mostly = ftrace_stub;
 ftrace_func_t ftrace_pid_function __read_mostly = ftrace_stub;
 static struct ftrace_ops global_ops;
+static struct ftrace_ops control_ops;
 
 static void
 ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip);
@@ -168,6 +172,32 @@ static void ftrace_test_stop_func(unsigned long ip, unsigned long parent_ip)
 }
 #endif
 
+static void control_ops_disable_all(struct ftrace_ops *ops)
+{
+	int cpu;
+
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(ops->disabled, cpu) = 1;
+}
+
+static int control_ops_alloc(struct ftrace_ops *ops)
+{
+	int __percpu *disabled;
+
+	disabled = alloc_percpu(int);
+	if (!disabled)
+		return -ENOMEM;
+
+	ops->disabled = disabled;
+	control_ops_disable_all(ops);
+	return 0;
+}
+
+static void control_ops_free(struct ftrace_ops *ops)
+{
+	free_percpu(ops->disabled);
+}
+
 static void update_global_ops(void)
 {
 	ftrace_func_t func;
@@ -259,6 +289,26 @@ static int remove_ftrace_ops(struct ftrace_ops **list, struct ftrace_ops *ops)
 	return 0;
 }
 
+static void add_ftrace_list_ops(struct ftrace_ops **list,
+				struct ftrace_ops *main_ops,
+				struct ftrace_ops *ops)
+{
+	int first = *list == &ftrace_list_end;
+	add_ftrace_ops(list, ops);
+	if (first)
+		add_ftrace_ops(&ftrace_ops_list, main_ops);
+}
+
+static int remove_ftrace_list_ops(struct ftrace_ops **list,
+				  struct ftrace_ops *main_ops,
+				  struct ftrace_ops *ops)
+{
+	int ret = remove_ftrace_ops(list, ops);
+	if (!ret && *list == &ftrace_list_end)
+		ret = remove_ftrace_ops(&ftrace_ops_list, main_ops);
+	return ret;
+}
+
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
 	if (ftrace_disabled)
@@ -270,15 +320,20 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	if (WARN_ON(ops->flags & FTRACE_OPS_FL_ENABLED))
 		return -EBUSY;
 
+	/* We don't support both control and global flags set. */
+	if ((ops->flags & FL_GLOBAL_CONTROL_MASK) == FL_GLOBAL_CONTROL_MASK)
+		return -EINVAL;
+
 	if (!core_kernel_data((unsigned long)ops))
 		ops->flags |= FTRACE_OPS_FL_DYNAMIC;
 
 	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		int first = ftrace_global_list == &ftrace_list_end;
-		add_ftrace_ops(&ftrace_global_list, ops);
+		add_ftrace_list_ops(&ftrace_global_list, &global_ops, ops);
 		ops->flags |= FTRACE_OPS_FL_ENABLED;
-		if (first)
-			add_ftrace_ops(&ftrace_ops_list, &global_ops);
+	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+		if (control_ops_alloc(ops))
+			return -ENOMEM;
+		add_ftrace_list_ops(&ftrace_control_list, &control_ops, ops);
 	} else
 		add_ftrace_ops(&ftrace_ops_list, ops);
 
@@ -302,11 +357,23 @@ static int __unregister_ftrace_function(struct ftrace_ops *ops)
 		return -EINVAL;
 
 	if (ops->flags & FTRACE_OPS_FL_GLOBAL) {
-		ret = remove_ftrace_ops(&ftrace_global_list, ops);
-		if (!ret && ftrace_global_list == &ftrace_list_end)
-			ret = remove_ftrace_ops(&ftrace_ops_list, &global_ops);
+		ret = remove_ftrace_list_ops(&ftrace_global_list,
+					     &global_ops, ops);
 		if (!ret)
 			ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+	} else if (ops->flags & FTRACE_OPS_FL_CONTROL) {
+		ret = remove_ftrace_list_ops(&ftrace_control_list,
+					     &control_ops, ops);
+		if (!ret) {
+			/*
+			 * The ftrace_ops is now removed from the list,
+			 * so there'll be no new users. We must ensure
+			 * all current users are done before we free
+			 * the control data.
+			 */
+			synchronize_sched();
+			control_ops_free(ops);
+		}
 	} else
 		ret = remove_ftrace_ops(&ftrace_ops_list, ops);
 
@@ -3874,6 +3941,36 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip)
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 static void
+ftrace_ops_control_func(unsigned long ip, unsigned long parent_ip)
+{
+	struct ftrace_ops *op;
+
+	if (unlikely(trace_recursion_test(TRACE_CONTROL_BIT)))
+		return;
+
+	/*
+	 * Some of the ops may be dynamically allocated,
+	 * they must be freed after a synchronize_sched().
+	 */
+	preempt_disable_notrace();
+	trace_recursion_set(TRACE_CONTROL_BIT);
+	op = rcu_dereference_raw(ftrace_control_list);
+	while (op != &ftrace_list_end) {
+		if (!ftrace_function_local_disabled(op) &&
+		    ftrace_ops_test(op, ip))
+			op->func(ip, parent_ip);
+
+		op = rcu_dereference_raw(op->next);
+	};
+	trace_recursion_clear(TRACE_CONTROL_BIT);
+	preempt_enable_notrace();
+}
+
+static struct ftrace_ops control_ops = {
+	.func = ftrace_ops_control_func,
+};
+
+static void
 ftrace_ops_list_func(unsigned long ip, unsigned long parent_ip)
 {
 	struct ftrace_ops *op;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index b93ecba..55c6ea0 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -288,6 +288,8 @@ struct tracer {
 /* for function tracing recursion */
 #define TRACE_INTERNAL_BIT		(1<<11)
 #define TRACE_GLOBAL_BIT		(1<<12)
+#define TRACE_CONTROL_BIT		(1<<13)
+
 /*
  * Abuse of the trace_recursion.
  * As we need a way to maintain state if we are tracing the function
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 4/9] ftrace, perf: Add open/close tracepoint perf registration actions
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
                   ` (2 preceding siblings ...)
  2012-02-22 14:40 ` [PATCH 3/9] ftrace: Add enable/disable ftrace_ops control interface Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  2012-02-22 14:40 ` [PATCH 5/9] ftrace, perf: Add add/del " Steven Rostedt
                   ` (4 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Jiri Olsa

[-- Attachment #1: Type: text/plain, Size: 10282 bytes --]

From: Jiri Olsa <jolsa@redhat.com>

Adding TRACE_REG_PERF_OPEN and TRACE_REG_PERF_CLOSE to differentiate
register/unregister from open/close actions.

The register/unregister actions are invoked for the first/last
tracepoint user when opening/closing the event.

The open/close actions are invoked for each tracepoint user when
opening/closing the event.

Link: http://lkml.kernel.org/r/1329317514-8131-3-git-send-email-jolsa@redhat.com

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h    |    6 +-
 kernel/trace/trace_event_perf.c |  116 +++++++++++++++++++++++++--------------
 kernel/trace/trace_events.c     |   10 ++-
 kernel/trace/trace_kprobe.c     |    6 ++-
 kernel/trace/trace_syscalls.c   |   14 +++-
 5 files changed, 101 insertions(+), 51 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index c3da42d..195e360 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -146,6 +146,8 @@ enum trace_reg {
 	TRACE_REG_UNREGISTER,
 	TRACE_REG_PERF_REGISTER,
 	TRACE_REG_PERF_UNREGISTER,
+	TRACE_REG_PERF_OPEN,
+	TRACE_REG_PERF_CLOSE,
 };
 
 struct ftrace_event_call;
@@ -157,7 +159,7 @@ struct ftrace_event_class {
 	void			*perf_probe;
 #endif
 	int			(*reg)(struct ftrace_event_call *event,
-				       enum trace_reg type);
+				       enum trace_reg type, void *data);
 	int			(*define_fields)(struct ftrace_event_call *);
 	struct list_head	*(*get_fields)(struct ftrace_event_call *);
 	struct list_head	fields;
@@ -165,7 +167,7 @@ struct ftrace_event_class {
 };
 
 extern int ftrace_event_reg(struct ftrace_event_call *event,
-			    enum trace_reg type);
+			    enum trace_reg type, void *data);
 
 enum {
 	TRACE_EVENT_FL_ENABLED_BIT,
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 19a359d..0cfcc37 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -44,23 +44,17 @@ static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
 	return 0;
 }
 
-static int perf_trace_event_init(struct ftrace_event_call *tp_event,
-				 struct perf_event *p_event)
+static int perf_trace_event_reg(struct ftrace_event_call *tp_event,
+				struct perf_event *p_event)
 {
 	struct hlist_head __percpu *list;
-	int ret;
+	int ret = -ENOMEM;
 	int cpu;
 
-	ret = perf_trace_event_perm(tp_event, p_event);
-	if (ret)
-		return ret;
-
 	p_event->tp_event = tp_event;
 	if (tp_event->perf_refcount++ > 0)
 		return 0;
 
-	ret = -ENOMEM;
-
 	list = alloc_percpu(struct hlist_head);
 	if (!list)
 		goto fail;
@@ -83,7 +77,7 @@ static int perf_trace_event_init(struct ftrace_event_call *tp_event,
 		}
 	}
 
-	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER);
+	ret = tp_event->class->reg(tp_event, TRACE_REG_PERF_REGISTER, NULL);
 	if (ret)
 		goto fail;
 
@@ -108,6 +102,69 @@ fail:
 	return ret;
 }
 
+static void perf_trace_event_unreg(struct perf_event *p_event)
+{
+	struct ftrace_event_call *tp_event = p_event->tp_event;
+	int i;
+
+	if (--tp_event->perf_refcount > 0)
+		goto out;
+
+	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER, NULL);
+
+	/*
+	 * Ensure our callback won't be called anymore. The buffers
+	 * will be freed after that.
+	 */
+	tracepoint_synchronize_unregister();
+
+	free_percpu(tp_event->perf_events);
+	tp_event->perf_events = NULL;
+
+	if (!--total_ref_count) {
+		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
+			free_percpu(perf_trace_buf[i]);
+			perf_trace_buf[i] = NULL;
+		}
+	}
+out:
+	module_put(tp_event->mod);
+}
+
+static int perf_trace_event_open(struct perf_event *p_event)
+{
+	struct ftrace_event_call *tp_event = p_event->tp_event;
+	return tp_event->class->reg(tp_event, TRACE_REG_PERF_OPEN, p_event);
+}
+
+static void perf_trace_event_close(struct perf_event *p_event)
+{
+	struct ftrace_event_call *tp_event = p_event->tp_event;
+	tp_event->class->reg(tp_event, TRACE_REG_PERF_CLOSE, p_event);
+}
+
+static int perf_trace_event_init(struct ftrace_event_call *tp_event,
+				 struct perf_event *p_event)
+{
+	int ret;
+
+	ret = perf_trace_event_perm(tp_event, p_event);
+	if (ret)
+		return ret;
+
+	ret = perf_trace_event_reg(tp_event, p_event);
+	if (ret)
+		return ret;
+
+	ret = perf_trace_event_open(p_event);
+	if (ret) {
+		perf_trace_event_unreg(p_event);
+		return ret;
+	}
+
+	return 0;
+}
+
 int perf_trace_init(struct perf_event *p_event)
 {
 	struct ftrace_event_call *tp_event;
@@ -130,6 +187,14 @@ int perf_trace_init(struct perf_event *p_event)
 	return ret;
 }
 
+void perf_trace_destroy(struct perf_event *p_event)
+{
+	mutex_lock(&event_mutex);
+	perf_trace_event_close(p_event);
+	perf_trace_event_unreg(p_event);
+	mutex_unlock(&event_mutex);
+}
+
 int perf_trace_add(struct perf_event *p_event, int flags)
 {
 	struct ftrace_event_call *tp_event = p_event->tp_event;
@@ -154,37 +219,6 @@ void perf_trace_del(struct perf_event *p_event, int flags)
 	hlist_del_rcu(&p_event->hlist_entry);
 }
 
-void perf_trace_destroy(struct perf_event *p_event)
-{
-	struct ftrace_event_call *tp_event = p_event->tp_event;
-	int i;
-
-	mutex_lock(&event_mutex);
-	if (--tp_event->perf_refcount > 0)
-		goto out;
-
-	tp_event->class->reg(tp_event, TRACE_REG_PERF_UNREGISTER);
-
-	/*
-	 * Ensure our callback won't be called anymore. The buffers
-	 * will be freed after that.
-	 */
-	tracepoint_synchronize_unregister();
-
-	free_percpu(tp_event->perf_events);
-	tp_event->perf_events = NULL;
-
-	if (!--total_ref_count) {
-		for (i = 0; i < PERF_NR_CONTEXTS; i++) {
-			free_percpu(perf_trace_buf[i]);
-			perf_trace_buf[i] = NULL;
-		}
-	}
-out:
-	module_put(tp_event->mod);
-	mutex_unlock(&event_mutex);
-}
-
 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
 				       struct pt_regs *regs, int *rctxp)
 {
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index c212a7f..5138fea 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -147,7 +147,8 @@ int trace_event_raw_init(struct ftrace_event_call *call)
 }
 EXPORT_SYMBOL_GPL(trace_event_raw_init);
 
-int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
+int ftrace_event_reg(struct ftrace_event_call *call,
+		     enum trace_reg type, void *data)
 {
 	switch (type) {
 	case TRACE_REG_REGISTER:
@@ -170,6 +171,9 @@ int ftrace_event_reg(struct ftrace_event_call *call, enum trace_reg type)
 					    call->class->perf_probe,
 					    call);
 		return 0;
+	case TRACE_REG_PERF_OPEN:
+	case TRACE_REG_PERF_CLOSE:
+		return 0;
 #endif
 	}
 	return 0;
@@ -209,7 +213,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
 				tracing_stop_cmdline_record();
 				call->flags &= ~TRACE_EVENT_FL_RECORDED_CMD;
 			}
-			call->class->reg(call, TRACE_REG_UNREGISTER);
+			call->class->reg(call, TRACE_REG_UNREGISTER, NULL);
 		}
 		break;
 	case 1:
@@ -218,7 +222,7 @@ static int ftrace_event_enable_disable(struct ftrace_event_call *call,
 				tracing_start_cmdline_record();
 				call->flags |= TRACE_EVENT_FL_RECORDED_CMD;
 			}
-			ret = call->class->reg(call, TRACE_REG_REGISTER);
+			ret = call->class->reg(call, TRACE_REG_REGISTER, NULL);
 			if (ret) {
 				tracing_stop_cmdline_record();
 				pr_info("event trace: Could not enable event "
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 00d527c..5667f89 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1892,7 +1892,8 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
 #endif	/* CONFIG_PERF_EVENTS */
 
 static __kprobes
-int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
+int kprobe_register(struct ftrace_event_call *event,
+		    enum trace_reg type, void *data)
 {
 	struct trace_probe *tp = (struct trace_probe *)event->data;
 
@@ -1909,6 +1910,9 @@ int kprobe_register(struct ftrace_event_call *event, enum trace_reg type)
 	case TRACE_REG_PERF_UNREGISTER:
 		disable_trace_probe(tp, TP_FLAG_PROFILE);
 		return 0;
+	case TRACE_REG_PERF_OPEN:
+	case TRACE_REG_PERF_CLOSE:
+		return 0;
 #endif
 	}
 	return 0;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 4350015..e23515f 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -17,9 +17,9 @@ static DECLARE_BITMAP(enabled_enter_syscalls, NR_syscalls);
 static DECLARE_BITMAP(enabled_exit_syscalls, NR_syscalls);
 
 static int syscall_enter_register(struct ftrace_event_call *event,
-				 enum trace_reg type);
+				 enum trace_reg type, void *data);
 static int syscall_exit_register(struct ftrace_event_call *event,
-				 enum trace_reg type);
+				 enum trace_reg type, void *data);
 
 static int syscall_enter_define_fields(struct ftrace_event_call *call);
 static int syscall_exit_define_fields(struct ftrace_event_call *call);
@@ -649,7 +649,7 @@ void perf_sysexit_disable(struct ftrace_event_call *call)
 #endif /* CONFIG_PERF_EVENTS */
 
 static int syscall_enter_register(struct ftrace_event_call *event,
-				 enum trace_reg type)
+				 enum trace_reg type, void *data)
 {
 	switch (type) {
 	case TRACE_REG_REGISTER:
@@ -664,13 +664,16 @@ static int syscall_enter_register(struct ftrace_event_call *event,
 	case TRACE_REG_PERF_UNREGISTER:
 		perf_sysenter_disable(event);
 		return 0;
+	case TRACE_REG_PERF_OPEN:
+	case TRACE_REG_PERF_CLOSE:
+		return 0;
 #endif
 	}
 	return 0;
 }
 
 static int syscall_exit_register(struct ftrace_event_call *event,
-				 enum trace_reg type)
+				 enum trace_reg type, void *data)
 {
 	switch (type) {
 	case TRACE_REG_REGISTER:
@@ -685,6 +688,9 @@ static int syscall_exit_register(struct ftrace_event_call *event,
 	case TRACE_REG_PERF_UNREGISTER:
 		perf_sysexit_disable(event);
 		return 0;
+	case TRACE_REG_PERF_OPEN:
+	case TRACE_REG_PERF_CLOSE:
+		return 0;
 #endif
 	}
 	return 0;
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 5/9] ftrace, perf: Add add/del tracepoint perf registration actions
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
                   ` (3 preceding siblings ...)
  2012-02-22 14:40 ` [PATCH 4/9] ftrace, perf: Add open/close tracepoint perf registration actions Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  2012-02-22 14:40 ` [PATCH 6/9] ftrace: Add FTRACE_ENTRY_REG macro to allow event registration Steven Rostedt
                   ` (3 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Jiri Olsa

[-- Attachment #1: Type: text/plain, Size: 3423 bytes --]

From: Jiri Olsa <jolsa@redhat.com>

Adding TRACE_REG_PERF_ADD and TRACE_REG_PERF_DEL to handle
perf event schedule in/out actions.

The add action is invoked for when the perf event is scheduled in,
while the del action is invoked when the event is scheduled out.

Link: http://lkml.kernel.org/r/1329317514-8131-4-git-send-email-jolsa@redhat.com

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h    |    2 ++
 kernel/trace/trace_event_perf.c |    4 +++-
 kernel/trace/trace_events.c     |    2 ++
 kernel/trace/trace_kprobe.c     |    2 ++
 kernel/trace/trace_syscalls.c   |    4 ++++
 5 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 195e360..2bf677c 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -148,6 +148,8 @@ enum trace_reg {
 	TRACE_REG_PERF_UNREGISTER,
 	TRACE_REG_PERF_OPEN,
 	TRACE_REG_PERF_CLOSE,
+	TRACE_REG_PERF_ADD,
+	TRACE_REG_PERF_DEL,
 };
 
 struct ftrace_event_call;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 0cfcc37..d72af0b 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -211,12 +211,14 @@ int perf_trace_add(struct perf_event *p_event, int flags)
 	list = this_cpu_ptr(pcpu_list);
 	hlist_add_head_rcu(&p_event->hlist_entry, list);
 
-	return 0;
+	return tp_event->class->reg(tp_event, TRACE_REG_PERF_ADD, p_event);
 }
 
 void perf_trace_del(struct perf_event *p_event, int flags)
 {
+	struct ftrace_event_call *tp_event = p_event->tp_event;
 	hlist_del_rcu(&p_event->hlist_entry);
+	tp_event->class->reg(tp_event, TRACE_REG_PERF_DEL, p_event);
 }
 
 __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 5138fea..079a93a 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -173,6 +173,8 @@ int ftrace_event_reg(struct ftrace_event_call *call,
 		return 0;
 	case TRACE_REG_PERF_OPEN:
 	case TRACE_REG_PERF_CLOSE:
+	case TRACE_REG_PERF_ADD:
+	case TRACE_REG_PERF_DEL:
 		return 0;
 #endif
 	}
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 5667f89..580a05e 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1912,6 +1912,8 @@ int kprobe_register(struct ftrace_event_call *event,
 		return 0;
 	case TRACE_REG_PERF_OPEN:
 	case TRACE_REG_PERF_CLOSE:
+	case TRACE_REG_PERF_ADD:
+	case TRACE_REG_PERF_DEL:
 		return 0;
 #endif
 	}
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index e23515f..96fc733 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -666,6 +666,8 @@ static int syscall_enter_register(struct ftrace_event_call *event,
 		return 0;
 	case TRACE_REG_PERF_OPEN:
 	case TRACE_REG_PERF_CLOSE:
+	case TRACE_REG_PERF_ADD:
+	case TRACE_REG_PERF_DEL:
 		return 0;
 #endif
 	}
@@ -690,6 +692,8 @@ static int syscall_exit_register(struct ftrace_event_call *event,
 		return 0;
 	case TRACE_REG_PERF_OPEN:
 	case TRACE_REG_PERF_CLOSE:
+	case TRACE_REG_PERF_ADD:
+	case TRACE_REG_PERF_DEL:
 		return 0;
 #endif
 	}
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 6/9] ftrace: Add FTRACE_ENTRY_REG macro to allow event registration
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
                   ` (4 preceding siblings ...)
  2012-02-22 14:40 ` [PATCH 5/9] ftrace, perf: Add add/del " Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  2012-02-22 14:40 ` [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf Steven Rostedt
                   ` (2 subsequent siblings)
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Jiri Olsa

[-- Attachment #1: Type: text/plain, Size: 2908 bytes --]

From: Jiri Olsa <jolsa@redhat.com>

Adding FTRACE_ENTRY_REG macro so particular ftrace entries
could specify registration function and thus become accesible
via perf.

This will be used in upcomming patch for function trace.

Link: http://lkml.kernel.org/r/1329317514-8131-5-git-send-email-jolsa@redhat.com

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 kernel/trace/trace.h        |    4 ++++
 kernel/trace/trace_export.c |   18 ++++++++++++++++--
 2 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 55c6ea0..638476a 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -68,6 +68,10 @@ enum trace_type {
 #undef FTRACE_ENTRY_DUP
 #define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
 
+#undef FTRACE_ENTRY_REG
+#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \
+	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+
 #include "trace_entries.h"
 
 /*
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index bbeec31..f74de86 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -18,6 +18,14 @@
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM	ftrace
 
+/*
+ * The FTRACE_ENTRY_REG macro allows ftrace entry to define register
+ * function and thus become accesible via perf.
+ */
+#undef FTRACE_ENTRY_REG
+#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \
+	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+
 /* not needed for this file */
 #undef __field_struct
 #define __field_struct(type, item)
@@ -152,13 +160,14 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
 #undef F_printk
 #define F_printk(fmt, args...) #fmt ", "  __stringify(args)
 
-#undef FTRACE_ENTRY
-#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print)		\
+#undef FTRACE_ENTRY_REG
+#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, regfn)\
 									\
 struct ftrace_event_class event_class_ftrace_##call = {			\
 	.system			= __stringify(TRACE_SYSTEM),		\
 	.define_fields		= ftrace_define_fields_##call,		\
 	.fields			= LIST_HEAD_INIT(event_class_ftrace_##call.fields),\
+	.reg			= regfn,				\
 };									\
 									\
 struct ftrace_event_call __used event_##call = {			\
@@ -170,4 +179,9 @@ struct ftrace_event_call __used event_##call = {			\
 struct ftrace_event_call __used						\
 __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
 
+#undef FTRACE_ENTRY
+#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print)		\
+	FTRACE_ENTRY_REG(call, struct_name, etype,			\
+			 PARAMS(tstruct), PARAMS(print), NULL)
+
 #include "trace_entries.h"
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
                   ` (5 preceding siblings ...)
  2012-02-22 14:40 ` [PATCH 6/9] ftrace: Add FTRACE_ENTRY_REG macro to allow event registration Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  2012-02-22 14:40 ` [PATCH 8/9] ftrace: Allow to specify filter field type for ftrace events Steven Rostedt
  2012-02-22 14:40 ` [PATCH 9/9] ftrace, perf: Add filter support for function trace event Steven Rostedt
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Jiri Olsa

[-- Attachment #1: Type: text/plain, Size: 6978 bytes --]

From: Jiri Olsa <jolsa@redhat.com>

Adding perf registration support for the ftrace function event,
so it is now possible to register it via perf interface.

The perf_event struct statically contains ftrace_ops as a handle
for function tracer. The function tracer is registered/unregistered
in open/close actions.

To be efficient, we enable/disable ftrace_ops each time the traced
process is scheduled in/out (via TRACE_REG_PERF_(ADD|DELL) handlers).
This way tracing is enabled only when the process is running.
Intentionally using this way instead of the event's hw state
PERF_HES_STOPPED, which would not disable the ftrace_ops.

It is now possible to use function trace within perf commands
like:

  perf record -e ftrace:function ls
  perf stat -e ftrace:function ls

Allowed only for root.

Link: http://lkml.kernel.org/r/1329317514-8131-6-git-send-email-jolsa@redhat.com

Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/perf_event.h      |    3 +
 kernel/trace/trace.h            |   11 +++++
 kernel/trace/trace_entries.h    |    6 ++-
 kernel/trace/trace_event_perf.c |   86 +++++++++++++++++++++++++++++++++++++++
 kernel/trace/trace_export.c     |    5 ++
 5 files changed, 109 insertions(+), 2 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 412b790..92a056f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -859,6 +859,9 @@ struct perf_event {
 #ifdef CONFIG_EVENT_TRACING
 	struct ftrace_event_call	*tp_event;
 	struct event_filter		*filter;
+#ifdef CONFIG_FUNCTION_TRACER
+	struct ftrace_ops               ftrace_ops;
+#endif
 #endif
 
 #ifdef CONFIG_CGROUP_PERF
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 638476a..76a1c50 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -595,6 +595,8 @@ static inline int ftrace_trace_task(struct task_struct *task)
 static inline int ftrace_is_dead(void) { return 0; }
 #endif
 
+int ftrace_event_is_function(struct ftrace_event_call *call);
+
 /*
  * struct trace_parser - servers for reading the user input separated by spaces
  * @cont: set if the input is not complete - no final space char was found
@@ -832,4 +834,13 @@ extern const char *__stop___trace_bprintk_fmt[];
 	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
 #include "trace_entries.h"
 
+#ifdef CONFIG_PERF_EVENTS
+#ifdef CONFIG_FUNCTION_TRACER
+int perf_ftrace_event_register(struct ftrace_event_call *call,
+			       enum trace_reg type, void *data);
+#else
+#define perf_ftrace_event_register NULL
+#endif /* CONFIG_FUNCTION_TRACER */
+#endif /* CONFIG_PERF_EVENTS */
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 9336590..47db7ed 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -55,7 +55,7 @@
 /*
  * Function trace entry - function address and parent function address:
  */
-FTRACE_ENTRY(function, ftrace_entry,
+FTRACE_ENTRY_REG(function, ftrace_entry,
 
 	TRACE_FN,
 
@@ -64,7 +64,9 @@ FTRACE_ENTRY(function, ftrace_entry,
 		__field(	unsigned long,	parent_ip	)
 	),
 
-	F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip)
+	F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip),
+
+	perf_ftrace_event_register
 );
 
 /* Function call entry */
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index d72af0b..fdeeb5c 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -24,6 +24,11 @@ static int	total_ref_count;
 static int perf_trace_event_perm(struct ftrace_event_call *tp_event,
 				 struct perf_event *p_event)
 {
+	/* The ftrace function trace is allowed only for root. */
+	if (ftrace_event_is_function(tp_event) &&
+	    perf_paranoid_kernel() && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
 	/* No tracing, just counting, so no obvious leak */
 	if (!(p_event->attr.sample_type & PERF_SAMPLE_RAW))
 		return 0;
@@ -250,3 +255,84 @@ __kprobes void *perf_trace_buf_prepare(int size, unsigned short type,
 	return raw_data;
 }
 EXPORT_SYMBOL_GPL(perf_trace_buf_prepare);
+
+#ifdef CONFIG_FUNCTION_TRACER
+static void
+perf_ftrace_function_call(unsigned long ip, unsigned long parent_ip)
+{
+	struct ftrace_entry *entry;
+	struct hlist_head *head;
+	struct pt_regs regs;
+	int rctx;
+
+#define ENTRY_SIZE (ALIGN(sizeof(struct ftrace_entry) + sizeof(u32), \
+		    sizeof(u64)) - sizeof(u32))
+
+	BUILD_BUG_ON(ENTRY_SIZE > PERF_MAX_TRACE_SIZE);
+
+	perf_fetch_caller_regs(&regs);
+
+	entry = perf_trace_buf_prepare(ENTRY_SIZE, TRACE_FN, NULL, &rctx);
+	if (!entry)
+		return;
+
+	entry->ip = ip;
+	entry->parent_ip = parent_ip;
+
+	head = this_cpu_ptr(event_function.perf_events);
+	perf_trace_buf_submit(entry, ENTRY_SIZE, rctx, 0,
+			      1, &regs, head);
+
+#undef ENTRY_SIZE
+}
+
+static int perf_ftrace_function_register(struct perf_event *event)
+{
+	struct ftrace_ops *ops = &event->ftrace_ops;
+
+	ops->flags |= FTRACE_OPS_FL_CONTROL;
+	ops->func = perf_ftrace_function_call;
+	return register_ftrace_function(ops);
+}
+
+static int perf_ftrace_function_unregister(struct perf_event *event)
+{
+	struct ftrace_ops *ops = &event->ftrace_ops;
+	return unregister_ftrace_function(ops);
+}
+
+static void perf_ftrace_function_enable(struct perf_event *event)
+{
+	ftrace_function_local_enable(&event->ftrace_ops);
+}
+
+static void perf_ftrace_function_disable(struct perf_event *event)
+{
+	ftrace_function_local_disable(&event->ftrace_ops);
+}
+
+int perf_ftrace_event_register(struct ftrace_event_call *call,
+			       enum trace_reg type, void *data)
+{
+	switch (type) {
+	case TRACE_REG_REGISTER:
+	case TRACE_REG_UNREGISTER:
+		break;
+	case TRACE_REG_PERF_REGISTER:
+	case TRACE_REG_PERF_UNREGISTER:
+		return 0;
+	case TRACE_REG_PERF_OPEN:
+		return perf_ftrace_function_register(data);
+	case TRACE_REG_PERF_CLOSE:
+		return perf_ftrace_function_unregister(data);
+	case TRACE_REG_PERF_ADD:
+		perf_ftrace_function_enable(data);
+		return 0;
+	case TRACE_REG_PERF_DEL:
+		perf_ftrace_function_disable(data);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index f74de86..a3dbee6 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -184,4 +184,9 @@ __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
 	FTRACE_ENTRY_REG(call, struct_name, etype,			\
 			 PARAMS(tstruct), PARAMS(print), NULL)
 
+int ftrace_event_is_function(struct ftrace_event_call *call)
+{
+	return call == &event_function;
+}
+
 #include "trace_entries.h"
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 8/9] ftrace: Allow to specify filter field type for ftrace events
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
                   ` (6 preceding siblings ...)
  2012-02-22 14:40 ` [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  2012-02-22 14:40 ` [PATCH 9/9] ftrace, perf: Add filter support for function trace event Steven Rostedt
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Jiri Olsa

[-- Attachment #1: Type: text/plain, Size: 12870 bytes --]

From: Jiri Olsa <jolsa@redhat.com>

Adding FILTER_TRACE_FN event field type for function tracepoint
event, so it can be properly recognized within filtering code.

Currently all fields of ftrace subsystem events share the common
field type FILTER_OTHER. Since the function trace fields need
special care within the filtering code we need to recognize it
properly, hence adding the FILTER_TRACE_FN event type.

Adding filter parameter to the FTRACE_ENTRY macro, to specify the
filter field type for the event.

Link: http://lkml.kernel.org/r/1329317514-8131-7-git-send-email-jolsa@redhat.com

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h       |    1 +
 kernel/trace/trace.h               |   23 +++++++++-------
 kernel/trace/trace_entries.h       |   48 ++++++++++++++++++++++++---------
 kernel/trace/trace_events_filter.c |    7 ++++-
 kernel/trace/trace_export.c        |   51 +++++++++++++++++++----------------
 5 files changed, 83 insertions(+), 47 deletions(-)

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 2bf677c..dd478fc 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -245,6 +245,7 @@ enum {
 	FILTER_STATIC_STRING,
 	FILTER_DYN_STRING,
 	FILTER_PTR_STRING,
+	FILTER_TRACE_FN,
 };
 
 #define EVENT_STORAGE_SIZE 128
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 76a1c50..29f93cd 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -56,21 +56,23 @@ enum trace_type {
 #define F_STRUCT(args...)		args
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)	\
-	struct struct_name {					\
-		struct trace_entry	ent;			\
-		tstruct						\
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)	\
+	struct struct_name {						\
+		struct trace_entry	ent;				\
+		tstruct							\
 	}
 
 #undef TP_ARGS
 #define TP_ARGS(args...)	args
 
 #undef FTRACE_ENTRY_DUP
-#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk)
+#define FTRACE_ENTRY_DUP(name, name_struct, id, tstruct, printk, filter)
 
 #undef FTRACE_ENTRY_REG
-#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \
-	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print,	\
+			 filter, regfn) \
+	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+		     filter)
 
 #include "trace_entries.h"
 
@@ -826,12 +828,13 @@ extern const char *__start___trace_bprintk_fmt[];
 extern const char *__stop___trace_bprintk_fmt[];
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(call, struct_name, id, tstruct, print)		\
+#define FTRACE_ENTRY(call, struct_name, id, tstruct, print, filter)	\
 	extern struct ftrace_event_call					\
 	__attribute__((__aligned__(4))) event_##call;
 #undef FTRACE_ENTRY_DUP
-#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print)		\
-	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print))
+#define FTRACE_ENTRY_DUP(call, struct_name, id, tstruct, print, filter)	\
+	FTRACE_ENTRY(call, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+		     filter)
 #include "trace_entries.h"
 
 #ifdef CONFIG_PERF_EVENTS
diff --git a/kernel/trace/trace_entries.h b/kernel/trace/trace_entries.h
index 47db7ed..d91eb05 100644
--- a/kernel/trace/trace_entries.h
+++ b/kernel/trace/trace_entries.h
@@ -66,6 +66,8 @@ FTRACE_ENTRY_REG(function, ftrace_entry,
 
 	F_printk(" %lx <-- %lx", __entry->ip, __entry->parent_ip),
 
+	FILTER_TRACE_FN,
+
 	perf_ftrace_event_register
 );
 
@@ -80,7 +82,9 @@ FTRACE_ENTRY(funcgraph_entry, ftrace_graph_ent_entry,
 		__field_desc(	int,		graph_ent,	depth		)
 	),
 
-	F_printk("--> %lx (%d)", __entry->func, __entry->depth)
+	F_printk("--> %lx (%d)", __entry->func, __entry->depth),
+
+	FILTER_OTHER
 );
 
 /* Function return entry */
@@ -100,7 +104,9 @@ FTRACE_ENTRY(funcgraph_exit, ftrace_graph_ret_entry,
 	F_printk("<-- %lx (%d) (start: %llx  end: %llx) over: %d",
 		 __entry->func, __entry->depth,
 		 __entry->calltime, __entry->rettime,
-		 __entry->depth)
+		 __entry->depth),
+
+	FILTER_OTHER
 );
 
 /*
@@ -129,8 +135,9 @@ FTRACE_ENTRY(context_switch, ctx_switch_entry,
 	F_printk("%u:%u:%u  ==> %u:%u:%u [%03u]",
 		 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
 		 __entry->next_pid, __entry->next_prio, __entry->next_state,
-		 __entry->next_cpu
-		)
+		 __entry->next_cpu),
+
+	FILTER_OTHER
 );
 
 /*
@@ -148,8 +155,9 @@ FTRACE_ENTRY_DUP(wakeup, ctx_switch_entry,
 	F_printk("%u:%u:%u  ==+ %u:%u:%u [%03u]",
 		 __entry->prev_pid, __entry->prev_prio, __entry->prev_state,
 		 __entry->next_pid, __entry->next_prio, __entry->next_state,
-		 __entry->next_cpu
-		)
+		 __entry->next_cpu),
+
+	FILTER_OTHER
 );
 
 /*
@@ -171,7 +179,9 @@ FTRACE_ENTRY(kernel_stack, stack_entry,
 		 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
 		 __entry->caller[0], __entry->caller[1], __entry->caller[2],
 		 __entry->caller[3], __entry->caller[4], __entry->caller[5],
-		 __entry->caller[6], __entry->caller[7])
+		 __entry->caller[6], __entry->caller[7]),
+
+	FILTER_OTHER
 );
 
 FTRACE_ENTRY(user_stack, userstack_entry,
@@ -187,7 +197,9 @@ FTRACE_ENTRY(user_stack, userstack_entry,
 		 "\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n\t=> (%08lx)\n",
 		 __entry->caller[0], __entry->caller[1], __entry->caller[2],
 		 __entry->caller[3], __entry->caller[4], __entry->caller[5],
-		 __entry->caller[6], __entry->caller[7])
+		 __entry->caller[6], __entry->caller[7]),
+
+	FILTER_OTHER
 );
 
 /*
@@ -204,7 +216,9 @@ FTRACE_ENTRY(bprint, bprint_entry,
 	),
 
 	F_printk("%08lx fmt:%p",
-		 __entry->ip, __entry->fmt)
+		 __entry->ip, __entry->fmt),
+
+	FILTER_OTHER
 );
 
 FTRACE_ENTRY(print, print_entry,
@@ -217,7 +231,9 @@ FTRACE_ENTRY(print, print_entry,
 	),
 
 	F_printk("%08lx %s",
-		 __entry->ip, __entry->buf)
+		 __entry->ip, __entry->buf),
+
+	FILTER_OTHER
 );
 
 FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
@@ -236,7 +252,9 @@ FTRACE_ENTRY(mmiotrace_rw, trace_mmiotrace_rw,
 
 	F_printk("%lx %lx %lx %d %x %x",
 		 (unsigned long)__entry->phys, __entry->value, __entry->pc,
-		 __entry->map_id, __entry->opcode, __entry->width)
+		 __entry->map_id, __entry->opcode, __entry->width),
+
+	FILTER_OTHER
 );
 
 FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
@@ -254,7 +272,9 @@ FTRACE_ENTRY(mmiotrace_map, trace_mmiotrace_map,
 
 	F_printk("%lx %lx %lx %d %x",
 		 (unsigned long)__entry->phys, __entry->virt, __entry->len,
-		 __entry->map_id, __entry->opcode)
+		 __entry->map_id, __entry->opcode),
+
+	FILTER_OTHER
 );
 
 
@@ -274,6 +294,8 @@ FTRACE_ENTRY(branch, trace_branch,
 
 	F_printk("%u:%s:%s (%u)",
 		 __entry->line,
-		 __entry->func, __entry->file, __entry->correct)
+		 __entry->func, __entry->file, __entry->correct),
+
+	FILTER_OTHER
 );
 
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 76afaee..3da3d0e 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -899,6 +899,11 @@ int filter_assign_type(const char *type)
 	return FILTER_OTHER;
 }
 
+static bool is_function_field(struct ftrace_event_field *field)
+{
+	return field->filter_type == FILTER_TRACE_FN;
+}
+
 static bool is_string_field(struct ftrace_event_field *field)
 {
 	return field->filter_type == FILTER_DYN_STRING ||
@@ -986,7 +991,7 @@ static int init_pred(struct filter_parse_state *ps,
 			fn = filter_pred_strloc;
 		else
 			fn = filter_pred_pchar;
-	} else {
+	} else if (!is_function_field(field)) {
 		if (field->is_signed)
 			ret = strict_strtoll(pred->regex.pattern, 0, &val);
 		else
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index a3dbee6..7b46c9b 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -23,8 +23,10 @@
  * function and thus become accesible via perf.
  */
 #undef FTRACE_ENTRY_REG
-#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, regfn) \
-	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+#define FTRACE_ENTRY_REG(name, struct_name, id, tstruct, print, \
+			 filter, regfn) \
+	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+		     filter)
 
 /* not needed for this file */
 #undef __field_struct
@@ -52,21 +54,22 @@
 #define F_printk(fmt, args...) fmt, args
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)	\
-struct ____ftrace_##name {					\
-	tstruct							\
-};								\
-static void __always_unused ____ftrace_check_##name(void)	\
-{								\
-	struct ____ftrace_##name *__entry = NULL;		\
-								\
-	/* force compile-time check on F_printk() */		\
-	printk(print);						\
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)	\
+struct ____ftrace_##name {						\
+	tstruct								\
+};									\
+static void __always_unused ____ftrace_check_##name(void)		\
+{									\
+	struct ____ftrace_##name *__entry = NULL;			\
+									\
+	/* force compile-time check on F_printk() */			\
+	printk(print);							\
 }
 
 #undef FTRACE_ENTRY_DUP
-#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print)	\
-	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print))
+#define FTRACE_ENTRY_DUP(name, struct_name, id, tstruct, print, filter)	\
+	FTRACE_ENTRY(name, struct_name, id, PARAMS(tstruct), PARAMS(print), \
+		     filter)
 
 #include "trace_entries.h"
 
@@ -75,7 +78,7 @@ static void __always_unused ____ftrace_check_##name(void)	\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
 				 sizeof(field.item),			\
-				 is_signed_type(type), FILTER_OTHER);	\
+				 is_signed_type(type), filter_type);	\
 	if (ret)							\
 		return ret;
 
@@ -85,7 +88,7 @@ static void __always_unused ____ftrace_check_##name(void)	\
 				 offsetof(typeof(field),		\
 					  container.item),		\
 				 sizeof(field.container.item),		\
-				 is_signed_type(type), FILTER_OTHER);	\
+				 is_signed_type(type), filter_type);	\
 	if (ret)							\
 		return ret;
 
@@ -99,7 +102,7 @@ static void __always_unused ____ftrace_check_##name(void)	\
 		ret = trace_define_field(event_call, event_storage, #item, \
 				 offsetof(typeof(field), item),		\
 				 sizeof(field.item),			\
-				 is_signed_type(type), FILTER_OTHER);	\
+				 is_signed_type(type), filter_type);	\
 		mutex_unlock(&event_storage_mutex);			\
 		if (ret)						\
 			return ret;					\
@@ -112,7 +115,7 @@ static void __always_unused ____ftrace_check_##name(void)	\
 				 offsetof(typeof(field),		\
 					  container.item),		\
 				 sizeof(field.container.item),		\
-				 is_signed_type(type), FILTER_OTHER);	\
+				 is_signed_type(type), filter_type);	\
 	if (ret)							\
 		return ret;
 
@@ -120,17 +123,18 @@ static void __always_unused ____ftrace_check_##name(void)	\
 #define __dynamic_array(type, item)					\
 	ret = trace_define_field(event_call, #type, #item,		\
 				 offsetof(typeof(field), item),		\
-				 0, is_signed_type(type), FILTER_OTHER);\
+				 0, is_signed_type(type), filter_type);\
 	if (ret)							\
 		return ret;
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(name, struct_name, id, tstruct, print)		\
+#define FTRACE_ENTRY(name, struct_name, id, tstruct, print, filter)	\
 int									\
 ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
 {									\
 	struct struct_name field;					\
 	int ret;							\
+	int filter_type = filter;					\
 									\
 	tstruct;							\
 									\
@@ -161,7 +165,8 @@ ftrace_define_fields_##name(struct ftrace_event_call *event_call)	\
 #define F_printk(fmt, args...) #fmt ", "  __stringify(args)
 
 #undef FTRACE_ENTRY_REG
-#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, regfn)\
+#define FTRACE_ENTRY_REG(call, struct_name, etype, tstruct, print, filter,\
+			 regfn)						\
 									\
 struct ftrace_event_class event_class_ftrace_##call = {			\
 	.system			= __stringify(TRACE_SYSTEM),		\
@@ -180,9 +185,9 @@ struct ftrace_event_call __used						\
 __attribute__((section("_ftrace_events"))) *__event_##call = &event_##call;
 
 #undef FTRACE_ENTRY
-#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print)		\
+#define FTRACE_ENTRY(call, struct_name, etype, tstruct, print, filter)	\
 	FTRACE_ENTRY_REG(call, struct_name, etype,			\
-			 PARAMS(tstruct), PARAMS(print), NULL)
+			 PARAMS(tstruct), PARAMS(print), filter, NULL)
 
 int ftrace_event_is_function(struct ftrace_event_call *call)
 {
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 9/9] ftrace, perf: Add filter support for function trace event
  2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
                   ` (7 preceding siblings ...)
  2012-02-22 14:40 ` [PATCH 8/9] ftrace: Allow to specify filter field type for ftrace events Steven Rostedt
@ 2012-02-22 14:40 ` Steven Rostedt
  8 siblings, 0 replies; 19+ messages in thread
From: Steven Rostedt @ 2012-02-22 14:40 UTC (permalink / raw)
  To: linux-kernel
  Cc: Ingo Molnar, Andrew Morton, Frederic Weisbecker,
	Arnaldo Carvalho de Melo, Jiri Olsa

[-- Attachment #1: Type: text/plain, Size: 10490 bytes --]

From: Jiri Olsa <jolsa@redhat.com>

Adding support to filter function trace event via perf
interface. It is now possible to use filter interface
in the perf tool like:

  perf record -e ftrace:function --filter="(ip == mm_*)" ls

The filter syntax is restricted to the the 'ip' field only,
and following operators are accepted '==' '!=' '||', ending
up with the filter strings like:

  ip == f1[, ]f2 ... || ip != f3[, ]f4 ...

with comma ',' or space ' ' as a function separator. If the
space ' ' is used as a separator, the right side of the
assignment needs to be enclosed in double quotes '"', e.g.:

  perf record -e ftrace:function --filter '(ip == do_execve,sys_*,ext*)' ls
  perf record -e ftrace:function --filter '(ip == "do_execve,sys_*,ext*")' ls
  perf record -e ftrace:function --filter '(ip == "do_execve sys_* ext*")' ls

The '==' operator adds trace filter with same effect as would
be added via set_ftrace_filter file.

The '!=' operator adds trace filter with same effect as would
be added via set_ftrace_notrace file.

The right side of the '!=', '==' operators is list of functions
or regexp. to be added to filter separated by space.

The '||' operator is used for connecting multiple filter definitions
together. It is possible to have more than one '==' and '!='
operators within one filter string.

Link: http://lkml.kernel.org/r/1329317514-8131-8-git-send-email-jolsa@redhat.com

Signed-off-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h             |    7 +-
 kernel/trace/ftrace.c              |    6 ++
 kernel/trace/trace.h               |    2 -
 kernel/trace/trace_event_perf.c    |    4 +-
 kernel/trace/trace_events_filter.c |  165 ++++++++++++++++++++++++++++++++++--
 5 files changed, 172 insertions(+), 12 deletions(-)

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 64a309d..72a6cab 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -250,6 +250,7 @@ int ftrace_set_notrace(struct ftrace_ops *ops, unsigned char *buf,
 			int len, int reset);
 void ftrace_set_global_filter(unsigned char *buf, int len, int reset);
 void ftrace_set_global_notrace(unsigned char *buf, int len, int reset);
+void ftrace_free_filter(struct ftrace_ops *ops);
 
 int register_ftrace_command(struct ftrace_func_command *cmd);
 int unregister_ftrace_command(struct ftrace_func_command *cmd);
@@ -380,9 +381,6 @@ extern void ftrace_enable_daemon(void);
 #else
 static inline int skip_trace(unsigned long ip) { return 0; }
 static inline int ftrace_force_update(void) { return 0; }
-static inline void ftrace_set_filter(unsigned char *buf, int len, int reset)
-{
-}
 static inline void ftrace_disable_daemon(void) { }
 static inline void ftrace_enable_daemon(void) { }
 static inline void ftrace_release_mod(struct module *mod) {}
@@ -406,6 +404,9 @@ static inline int ftrace_text_reserved(void *start, void *end)
  */
 #define ftrace_regex_open(ops, flag, inod, file) ({ -ENODEV; })
 #define ftrace_set_early_filter(ops, buf, enable) do { } while (0)
+#define ftrace_set_filter(ops, buf, len, reset) ({ -ENODEV; })
+#define ftrace_set_notrace(ops, buf, len, reset) ({ -ENODEV; })
+#define ftrace_free_filter(ops) do { } while (0)
 
 static inline ssize_t ftrace_filter_write(struct file *file, const char __user *ubuf,
 			    size_t cnt, loff_t *ppos) { return -ENODEV; }
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index f615f97..867bd1d 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1186,6 +1186,12 @@ static void free_ftrace_hash_rcu(struct ftrace_hash *hash)
 	call_rcu_sched(&hash->rcu, __free_ftrace_hash_rcu);
 }
 
+void ftrace_free_filter(struct ftrace_ops *ops)
+{
+	free_ftrace_hash(ops->filter_hash);
+	free_ftrace_hash(ops->notrace_hash);
+}
+
 static struct ftrace_hash *alloc_ftrace_hash(int size_bits)
 {
 	struct ftrace_hash *hash;
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 29f93cd..54faec7 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -776,9 +776,7 @@ struct filter_pred {
 	u64 			val;
 	struct regex		regex;
 	unsigned short		*ops;
-#ifdef CONFIG_FTRACE_STARTUP_TEST
 	struct ftrace_event_field *field;
-#endif
 	int 			offset;
 	int 			not;
 	int 			op;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index fdeeb5c..fee3752 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -298,7 +298,9 @@ static int perf_ftrace_function_register(struct perf_event *event)
 static int perf_ftrace_function_unregister(struct perf_event *event)
 {
 	struct ftrace_ops *ops = &event->ftrace_ops;
-	return unregister_ftrace_function(ops);
+	int ret = unregister_ftrace_function(ops);
+	ftrace_free_filter(ops);
+	return ret;
 }
 
 static void perf_ftrace_function_enable(struct perf_event *event)
diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 3da3d0e..431dba8 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -81,6 +81,7 @@ enum {
 	FILT_ERR_TOO_MANY_PREDS,
 	FILT_ERR_MISSING_FIELD,
 	FILT_ERR_INVALID_FILTER,
+	FILT_ERR_IP_FIELD_ONLY,
 };
 
 static char *err_text[] = {
@@ -96,6 +97,7 @@ static char *err_text[] = {
 	"Too many terms in predicate expression",
 	"Missing field name and/or value",
 	"Meaningless filter expression",
+	"Only 'ip' field is supported for function trace",
 };
 
 struct opstack_op {
@@ -991,7 +993,12 @@ static int init_pred(struct filter_parse_state *ps,
 			fn = filter_pred_strloc;
 		else
 			fn = filter_pred_pchar;
-	} else if (!is_function_field(field)) {
+	} else if (is_function_field(field)) {
+		if (strcmp(field->name, "ip")) {
+			parse_error(ps, FILT_ERR_IP_FIELD_ONLY, 0);
+			return -EINVAL;
+		}
+	} else {
 		if (field->is_signed)
 			ret = strict_strtoll(pred->regex.pattern, 0, &val);
 		else
@@ -1338,10 +1345,7 @@ static struct filter_pred *create_pred(struct filter_parse_state *ps,
 
 	strcpy(pred.regex.pattern, operand2);
 	pred.regex.len = strlen(pred.regex.pattern);
-
-#ifdef CONFIG_FTRACE_STARTUP_TEST
 	pred.field = field;
-#endif
 	return init_pred(ps, field, &pred) ? NULL : &pred;
 }
 
@@ -1954,6 +1958,148 @@ void ftrace_profile_free_filter(struct perf_event *event)
 	__free_filter(filter);
 }
 
+struct function_filter_data {
+	struct ftrace_ops *ops;
+	int first_filter;
+	int first_notrace;
+};
+
+#ifdef CONFIG_FUNCTION_TRACER
+static char **
+ftrace_function_filter_re(char *buf, int len, int *count)
+{
+	char *str, *sep, **re;
+
+	str = kstrndup(buf, len, GFP_KERNEL);
+	if (!str)
+		return NULL;
+
+	/*
+	 * The argv_split function takes white space
+	 * as a separator, so convert ',' into spaces.
+	 */
+	while ((sep = strchr(str, ',')))
+		*sep = ' ';
+
+	re = argv_split(GFP_KERNEL, str, count);
+	kfree(str);
+	return re;
+}
+
+static int ftrace_function_set_regexp(struct ftrace_ops *ops, int filter,
+				      int reset, char *re, int len)
+{
+	int ret;
+
+	if (filter)
+		ret = ftrace_set_filter(ops, re, len, reset);
+	else
+		ret = ftrace_set_notrace(ops, re, len, reset);
+
+	return ret;
+}
+
+static int __ftrace_function_set_filter(int filter, char *buf, int len,
+					struct function_filter_data *data)
+{
+	int i, re_cnt, ret;
+	int *reset;
+	char **re;
+
+	reset = filter ? &data->first_filter : &data->first_notrace;
+
+	/*
+	 * The 'ip' field could have multiple filters set, separated
+	 * either by space or comma. We first cut the filter and apply
+	 * all pieces separatelly.
+	 */
+	re = ftrace_function_filter_re(buf, len, &re_cnt);
+	if (!re)
+		return -EINVAL;
+
+	for (i = 0; i < re_cnt; i++) {
+		ret = ftrace_function_set_regexp(data->ops, filter, *reset,
+						 re[i], strlen(re[i]));
+		if (ret)
+			break;
+
+		if (*reset)
+			*reset = 0;
+	}
+
+	argv_free(re);
+	return ret;
+}
+
+static int ftrace_function_check_pred(struct filter_pred *pred, int leaf)
+{
+	struct ftrace_event_field *field = pred->field;
+
+	if (leaf) {
+		/*
+		 * Check the leaf predicate for function trace, verify:
+		 *  - only '==' and '!=' is used
+		 *  - the 'ip' field is used
+		 */
+		if ((pred->op != OP_EQ) && (pred->op != OP_NE))
+			return -EINVAL;
+
+		if (strcmp(field->name, "ip"))
+			return -EINVAL;
+	} else {
+		/*
+		 * Check the non leaf predicate for function trace, verify:
+		 *  - only '||' is used
+		*/
+		if (pred->op != OP_OR)
+			return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ftrace_function_set_filter_cb(enum move_type move,
+					 struct filter_pred *pred,
+					 int *err, void *data)
+{
+	/* Checking the node is valid for function trace. */
+	if ((move != MOVE_DOWN) ||
+	    (pred->left != FILTER_PRED_INVALID)) {
+		*err = ftrace_function_check_pred(pred, 0);
+	} else {
+		*err = ftrace_function_check_pred(pred, 1);
+		if (*err)
+			return WALK_PRED_ABORT;
+
+		*err = __ftrace_function_set_filter(pred->op == OP_EQ,
+						    pred->regex.pattern,
+						    pred->regex.len,
+						    data);
+	}
+
+	return (*err) ? WALK_PRED_ABORT : WALK_PRED_DEFAULT;
+}
+
+static int ftrace_function_set_filter(struct perf_event *event,
+				      struct event_filter *filter)
+{
+	struct function_filter_data data = {
+		.first_filter  = 1,
+		.first_notrace = 1,
+		.ops           = &event->ftrace_ops,
+	};
+
+	return walk_pred_tree(filter->preds, filter->root,
+			      ftrace_function_set_filter_cb, &data);
+}
+#else
+static int ftrace_function_set_filter(struct perf_event *event,
+				      struct event_filter *filter)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_FUNCTION_TRACER */
+
 int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 			      char *filter_str)
 {
@@ -1974,9 +2120,16 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
 		goto out_unlock;
 
 	err = create_filter(call, filter_str, false, &filter);
-	if (!err)
-		event->filter = filter;
+	if (err)
+		goto free_filter;
+
+	if (ftrace_event_is_function(call))
+		err = ftrace_function_set_filter(event, filter);
 	else
+		event->filter = filter;
+
+free_filter:
+	if (err || ftrace_event_is_function(call))
 		__free_filter(filter);
 
 out_unlock:
-- 
1.7.8.3



[-- Attachment #2: This is a digitally signed message part --]
[-- Type: application/pgp-signature, Size: 836 bytes --]

^ permalink raw reply related	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2012-02-22 14:43 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2012-02-22 14:40 [PATCH 0/9] [GIT PULL] perf/tracing: fixes and add function trace to perf Steven Rostedt
2012-02-22 14:40 ` [PATCH 1/9] tracing: Dont print an extra separator of flags Steven Rostedt
2012-02-22 14:40 ` [PATCH 2/9] tracing: Dont use p->len field to determine output in __print_*() functions Steven Rostedt
2012-02-22 14:40 ` [PATCH 3/9] ftrace: Add enable/disable ftrace_ops control interface Steven Rostedt
2012-02-22 14:40 ` [PATCH 4/9] ftrace, perf: Add open/close tracepoint perf registration actions Steven Rostedt
2012-02-22 14:40 ` [PATCH 5/9] ftrace, perf: Add add/del " Steven Rostedt
2012-02-22 14:40 ` [PATCH 6/9] ftrace: Add FTRACE_ENTRY_REG macro to allow event registration Steven Rostedt
2012-02-22 14:40 ` [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf Steven Rostedt
2012-02-22 14:40 ` [PATCH 8/9] ftrace: Allow to specify filter field type for ftrace events Steven Rostedt
2012-02-22 14:40 ` [PATCH 9/9] ftrace, perf: Add filter support for function trace event Steven Rostedt
  -- strict thread matches above, loose matches on Subject: below --
2011-11-27 18:04 [RFC] ftrace, perf: Adding support to use function trace Jiri Olsa
2011-11-27 18:04 ` [PATCH 7/9] ftrace, perf: Add support to use function tracepoint in perf Jiri Olsa
2011-11-28 19:58   ` Steven Rostedt
2011-11-28 20:03     ` Peter Zijlstra
2011-11-28 20:13       ` Steven Rostedt
2011-11-29 10:10         ` Jiri Olsa
2011-11-28 20:08     ` Peter Zijlstra
2011-11-28 20:10       ` Peter Zijlstra
2011-11-28 20:16         ` Steven Rostedt
2011-11-28 20:18           ` Peter Zijlstra

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).