public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Frederic Weisbecker <fweisbec@gmail.com>
To: Jason Baron <jbaron@redhat.com>
Cc: linux-kernel@vger.kernel.org, mingo@elte.hu,
	laijs@cn.fujitsu.com, rostedt@goodmis.org, peterz@infradead.org,
	mathieu.desnoyers@polymtl.ca, jiayingz@google.com,
	mbligh@google.com, lizf@cn.fujitsu.com
Subject: Re: [PATCH 08/12] add trace events for each syscall entry/exit
Date: Tue, 11 Aug 2009 12:50:14 +0200	[thread overview]
Message-ID: <20090811105010.GB4938@nowhere> (raw)
In-Reply-To: <aa785a0431b72327d288ef9b921e824d7f494de2.1249932670.git.jbaron@redhat.com>

On Mon, Aug 10, 2009 at 04:52:47PM -0400, Jason Baron wrote:
> Layer Frederic's syscall tracer on tracepoints. We create trace events via
> hooking into the SYCALL_DEFINE macros. This allows us to individually toggle
> syscall entry and exit points on/off.
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>
> 
> ---
>  include/linux/syscalls.h      |   61 +++++++++++++-
>  include/trace/syscall.h       |   18 ++--
>  kernel/trace/trace_syscalls.c |  183 ++++++++++++++++++++---------------------
>  3 files changed, 159 insertions(+), 103 deletions(-)
> 
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 80de700..5e5b4d3 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -64,6 +64,7 @@ struct perf_counter_attr;
>  #include <linux/sem.h>
>  #include <asm/siginfo.h>
>  #include <asm/signal.h>
> +#include <linux/unistd.h>
>  #include <linux/quota.h>
>  #include <linux/key.h>
>  #include <trace/syscall.h>
> @@ -112,6 +113,59 @@ struct perf_counter_attr;
>  #define __SC_STR_TDECL5(t, a, ...)	#t, __SC_STR_TDECL4(__VA_ARGS__)
>  #define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)
>  
> +
> +#define SYSCALL_TRACE_ENTER_EVENT(sname)				\
> +	static struct ftrace_event_call event_enter_##sname;		\
> +	static int init_enter_##sname(void)				\
> +	{								\
> +		int num;						\
> +		num = syscall_name_to_nr("sys"#sname);			\
> +		if (num < 0)						\
> +			return -ENOSYS;					\
> +		register_ftrace_event(&event_syscall_enter);		\
> +		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
> +		init_preds(&event_enter_##sname);			\
> +		return 0;						\
> +	}								\



You could rather add the struct ftrace_event_call *event as
a parameter to a generic function static int init_enter_syscall()
That would require adding this parameter in the raw_init() callback.
If I remember well, Masami does that in his kprobes events patchset.

May be we can let it as is and wait for his patchset to be integrated
to update that.


> +	static struct ftrace_event_call __used				\
> +	  __attribute__((__aligned__(4)))				\
> +	  __attribute__((section("_ftrace_events")))			\
> +	  event_enter_##sname = {					\
> +		.name                   = "sys_enter"#sname,		\
> +		.system                 = "syscalls",			\
> +		.event                  = &event_syscall_enter,		\
> +		.raw_init		= init_enter_##sname,		\
> +		.regfunc		= reg_event_syscall_enter,	\
> +		.unregfunc		= unreg_event_syscall_enter,	\
> +		.data			= "sys"#sname,			\
> +	}
> +
> +#define SYSCALL_TRACE_EXIT_EVENT(sname)					\
> +	static struct ftrace_event_call event_exit_##sname;		\
> +	static int init_exit_##sname(void)				\
> +	{								\
> +		int num;						\
> +		num = syscall_name_to_nr("sys"#sname);			\
> +		if (num < 0)						\
> +			return -ENOSYS;					\
> +		register_ftrace_event(&event_syscall_exit);		\
> +		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
> +		init_preds(&event_exit_##sname);			\
> +		return 0;						\
> +	}								\
> +	static struct ftrace_event_call __used				\
> +	  __attribute__((__aligned__(4)))				\
> +	  __attribute__((section("_ftrace_events")))			\
> +	  event_exit_##sname = {					\
> +		.name                   = "sys_exit"#sname,		\
> +		.system                 = "syscalls",			\
> +		.event                  = &event_syscall_exit,		\
> +		.raw_init		= init_exit_##sname,		\
> +		.regfunc		= reg_event_syscall_exit,	\
> +		.unregfunc		= unreg_event_syscall_exit,	\
> +		.data			= "sys"#sname,			\
> +	}
> +
>  #define SYSCALL_METADATA(sname, nb)				\
>  	static const struct syscall_metadata __used		\
>  	  __attribute__((__aligned__(4)))			\
> @@ -121,7 +175,9 @@ struct perf_counter_attr;
>  		.nb_args 	= nb,				\
>  		.types		= types_##sname,		\
>  		.args		= args_##sname,			\
> -	}
> +	};							\
> +	SYSCALL_TRACE_ENTER_EVENT(sname);			\
> +	SYSCALL_TRACE_EXIT_EVENT(sname);
>  
>  #define SYSCALL_DEFINE0(sname)					\
>  	static const struct syscall_metadata __used		\
> @@ -131,8 +187,9 @@ struct perf_counter_attr;
>  		.name 		= "sys_"#sname,			\
>  		.nb_args 	= 0,				\
>  	};							\
> +	SYSCALL_TRACE_ENTER_EVENT(_##sname);			\
> +	SYSCALL_TRACE_EXIT_EVENT(_##sname);			\
>  	asmlinkage long sys_##sname(void)
> -
>  #else
>  #define SYSCALL_DEFINE0(name)	   asmlinkage long sys_##name(void)
>  #endif
> diff --git a/include/trace/syscall.h b/include/trace/syscall.h
> index 3951d77..73fb8b4 100644
> --- a/include/trace/syscall.h
> +++ b/include/trace/syscall.h
> @@ -2,6 +2,8 @@
>  #define _TRACE_SYSCALL_H
>  
>  #include <linux/tracepoint.h>
> +#include <linux/unistd.h>
> +#include <linux/ftrace_event.h>
>  
>  #include <asm/ptrace.h>
>  
> @@ -40,15 +42,13 @@ struct syscall_metadata {
>  
>  #ifdef CONFIG_FTRACE_SYSCALLS
>  extern struct syscall_metadata *syscall_nr_to_meta(int nr);
> -extern void start_ftrace_syscalls(void);
> -extern void stop_ftrace_syscalls(void);
> -extern void ftrace_syscall_enter(struct pt_regs *regs);
> -extern void ftrace_syscall_exit(struct pt_regs *regs);
> -#else
> -static inline void start_ftrace_syscalls(void)			{ }
> -static inline void stop_ftrace_syscalls(void)			{ }
> -static inline void ftrace_syscall_enter(struct pt_regs *regs)	{ }
> -static inline void ftrace_syscall_exit(struct pt_regs *regs)	{ }
> +extern int syscall_name_to_nr(char *name);
> +extern struct trace_event event_syscall_enter;
> +extern struct trace_event event_syscall_exit;
> +extern int reg_event_syscall_enter(void *ptr);
> +extern void unreg_event_syscall_enter(void *ptr);
> +extern int reg_event_syscall_exit(void *ptr);
> +extern void unreg_event_syscall_exit(void *ptr);
>  #endif
>  
>  #endif /* _TRACE_SYSCALL_H */
> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> index 08aed43..c7ae25e 100644
> --- a/kernel/trace/trace_syscalls.c
> +++ b/kernel/trace/trace_syscalls.c
> @@ -1,15 +1,16 @@
>  #include <trace/syscall.h>
>  #include <linux/kernel.h>
> +#include <linux/ftrace.h>
>  #include <asm/syscall.h>
>  
>  #include "trace_output.h"
>  #include "trace.h"
>  
> -/* Keep a counter of the syscall tracing users */
> -static int refcount;
> -
> -/* Prevent from races on thread flags toggling */
>  static DEFINE_MUTEX(syscall_trace_lock);
> +static int sys_refcount_enter;
> +static int sys_refcount_exit;
> +static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
> +static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
>  
>  /* Option to display the parameters types */
>  enum {
> @@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
>  	return TRACE_TYPE_HANDLED;
>  }
>  
> -void start_ftrace_syscalls(void)
> -{
> -	unsigned long flags;
> -	struct task_struct *g, *t;
> -
> -	mutex_lock(&syscall_trace_lock);
> -
> -	/* Don't enable the flag on the tasks twice */
> -	if (++refcount != 1)
> -		goto unlock;
> -
> -	read_lock_irqsave(&tasklist_lock, flags);
> -
> -	do_each_thread(g, t) {
> -		set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> -	} while_each_thread(g, t);
> -
> -	read_unlock_irqrestore(&tasklist_lock, flags);
> -
> -unlock:
> -	mutex_unlock(&syscall_trace_lock);
> -}
> -
> -void stop_ftrace_syscalls(void)
> -{
> -	unsigned long flags;
> -	struct task_struct *g, *t;
> -
> -	mutex_lock(&syscall_trace_lock);
> -
> -	/* There are perhaps still some users */
> -	if (--refcount)
> -		goto unlock;
> -
> -	read_lock_irqsave(&tasklist_lock, flags);
> -
> -	do_each_thread(g, t) {
> -		clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> -	} while_each_thread(g, t);
> -
> -	read_unlock_irqrestore(&tasklist_lock, flags);
> -
> -unlock:
> -	mutex_unlock(&syscall_trace_lock);
> -}
> -
> -void ftrace_syscall_enter(struct pt_regs *regs)
> +void ftrace_syscall_enter(struct pt_regs *regs, long id)
>  {
>  	struct syscall_trace_enter *entry;
>  	struct syscall_metadata *sys_data;
> @@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
>  	int syscall_nr;
>  
>  	syscall_nr = syscall_get_nr(current, regs);
> +	if (!test_bit(syscall_nr, enabled_enter_syscalls))
> +		return;
>  
>  	sys_data = syscall_nr_to_meta(syscall_nr);
>  	if (!sys_data)
> @@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs)
>  	trace_wake_up();
>  }
>  
> -void ftrace_syscall_exit(struct pt_regs *regs)
> +void ftrace_syscall_exit(struct pt_regs *regs, long ret)
>  {
>  	struct syscall_trace_exit *entry;
>  	struct syscall_metadata *sys_data;
> @@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs)
>  	int syscall_nr;
>  
>  	syscall_nr = syscall_get_nr(current, regs);
> +	if (!test_bit(syscall_nr, enabled_exit_syscalls))
> +		return;
>  
>  	sys_data = syscall_nr_to_meta(syscall_nr);
>  	if (!sys_data)
> @@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs)
>  	trace_wake_up();
>  }
>  
> -static int init_syscall_tracer(struct trace_array *tr)
> +int reg_event_syscall_enter(void *ptr)
>  {
> -	start_ftrace_syscalls();
> -
> -	return 0;
> +	int ret = 0;
> +	int num;
> +	char *name;
> +
> +	name = (char *)ptr;
> +	num = syscall_name_to_nr(name);
> +	if (num < 0 || num >= FTRACE_SYSCALL_MAX)



I wonder if we should WARN once in this case. At least we would
be aware of new yet unsupported syscalls.



> +		return -ENOSYS;
> +	mutex_lock(&syscall_trace_lock);
> +	if (!sys_refcount_enter)
> +		ret = register_trace_syscall_enter(ftrace_syscall_enter);
> +	if (ret) {
> +		pr_info("event trace: Could not activate"
> +				"syscall entry trace point");
> +	} else {
> +		set_bit(num, enabled_enter_syscalls);
> +		sys_refcount_enter++;
> +	}
> +	mutex_unlock(&syscall_trace_lock);
> +	return ret;
>  }
>  
> -static void reset_syscall_tracer(struct trace_array *tr)
> +void unreg_event_syscall_enter(void *ptr)
>  {
> -	stop_ftrace_syscalls();
> -	tracing_reset_online_cpus(tr);
> -}
> -
> -static struct trace_event syscall_enter_event = {
> -	.type	 	= TRACE_SYSCALL_ENTER,
> -	.trace		= print_syscall_enter,
> -};
> -
> -static struct trace_event syscall_exit_event = {
> -	.type	 	= TRACE_SYSCALL_EXIT,
> -	.trace		= print_syscall_exit,
> -};
> +	int num;
> +	char *name;
>  
> -static struct tracer syscall_tracer __read_mostly = {
> -	.name	     	= "syscall",
> -	.init		= init_syscall_tracer,
> -	.reset		= reset_syscall_tracer,
> -	.flags		= &syscalls_flags,
> -};
> +	name = (char *)ptr;
> +	num = syscall_name_to_nr(name);
> +	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
> +		return;
> +	mutex_lock(&syscall_trace_lock);
> +	sys_refcount_enter--;
> +	clear_bit(num, enabled_enter_syscalls);
> +	if (!sys_refcount_enter)
> +		unregister_trace_syscall_enter(ftrace_syscall_enter);
> +	mutex_unlock(&syscall_trace_lock);
> +}
>  
> -__init int register_ftrace_syscalls(void)
> +int reg_event_syscall_exit(void *ptr)
>  {
> -	int ret;
> -
> -	ret = register_ftrace_event(&syscall_enter_event);
> -	if (!ret) {
> -		printk(KERN_WARNING "event %d failed to register\n",
> -		       syscall_enter_event.type);
> -		WARN_ON_ONCE(1);
> +	int ret = 0;
> +	int num;
> +	char *name;
> +
> +	name = (char *)ptr;
> +	num = syscall_name_to_nr(name);
> +	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
> +		return -ENOSYS;
> +	mutex_lock(&syscall_trace_lock);
> +	if (!sys_refcount_exit)
> +		ret = register_trace_syscall_exit(ftrace_syscall_exit);
> +	if (ret) {
> +		pr_info("event trace: Could not activate"
> +				"syscall exit trace point");
> +	} else {
> +		set_bit(num, enabled_exit_syscalls);
> +		sys_refcount_exit++;
>  	}
> +	mutex_unlock(&syscall_trace_lock);
> +	return ret;
> +}
>  
> -	ret = register_ftrace_event(&syscall_exit_event);
> -	if (!ret) {
> -		printk(KERN_WARNING "event %d failed to register\n",
> -		       syscall_exit_event.type);
> -		WARN_ON_ONCE(1);
> -	}
> +void unreg_event_syscall_exit(void *ptr)
> +{
> +	int num;
> +	char *name;
>  
> -	return register_tracer(&syscall_tracer);
> +	name = (char *)ptr;
> +	num = syscall_name_to_nr(name);
> +	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
> +		return;
> +	mutex_lock(&syscall_trace_lock);
> +	sys_refcount_exit--;
> +	clear_bit(num, enabled_exit_syscalls);
> +	if (!sys_refcount_exit)
> +		unregister_trace_syscall_exit(ftrace_syscall_exit);
> +	mutex_unlock(&syscall_trace_lock);
>  }
> -device_initcall(register_ftrace_syscalls);
> +
> +struct trace_event event_syscall_enter = {
> +	.trace			= print_syscall_enter,
> +	.type			= TRACE_SYSCALL_ENTER
> +};
> +
> +struct trace_event event_syscall_exit = {
> +	.trace			= print_syscall_exit,
> +	.type			= TRACE_SYSCALL_EXIT
> +};
> -- 
> 1.6.2.5
> 

Nice.

It's a bit too bad that enter and exit must be that separated
whereas their callbacks are pretty the same.

But I guess if we want to nicely decouple both, we don't have the choice.


  reply	other threads:[~2009-08-11 11:59 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-08-10 20:52 [PATCH 00/12] add syscall tracepoints V3 Jason Baron
2009-08-10 20:52 ` [PATCH 01/12] map syscall name to number Jason Baron
2009-08-10 20:52 ` [PATCH 02/12] call arch_init_ftrace_syscalls at boot Jason Baron
2009-08-10 20:52 ` [PATCH 03/12] add DECLARE_TRACE_WITH_CALLBACK() macro Jason Baron
2009-08-10 20:52 ` [PATCH 04/12] add syscall tracepoints Jason Baron
2009-08-10 20:52 ` [PATCH 05/12] update FTRACE_SYSCALL_MAX Jason Baron
2009-08-11 11:00   ` Frederic Weisbecker
2009-08-11 19:39     ` Matt Fleming
2009-08-24 13:41     ` Paul Mundt
2009-08-24 14:06       ` Jason Baron
2009-08-24 14:15         ` Paul Mundt
2009-08-24 14:34           ` Frederic Weisbecker
2009-08-24 14:37             ` Paul Mundt
2009-08-24 14:42           ` Jason Baron
2009-08-24 14:50             ` Paul Mundt
2009-08-24 18:34               ` Ingo Molnar
2009-08-10 20:52 ` [PATCH 06/12] trace_event - raw_init bailout Jason Baron
2009-08-10 20:52 ` [PATCH 07/12] add ftrace_event_call void * 'data' field Jason Baron
2009-08-11 10:09   ` Frederic Weisbecker
2009-08-17 22:19     ` Steven Rostedt
2009-08-17 23:09       ` Frederic Weisbecker
2009-08-18  0:06         ` Steven Rostedt
2009-08-10 20:52 ` [PATCH 08/12] add trace events for each syscall entry/exit Jason Baron
2009-08-11 10:50   ` Frederic Weisbecker [this message]
2009-08-11 11:45     ` Ingo Molnar
2009-08-11 12:01       ` Frederic Weisbecker
2009-08-25 12:50   ` Hendrik Brueckner
2009-08-25 14:15     ` Frederic Weisbecker
2009-08-25 16:02       ` Hendrik Brueckner
2009-08-25 16:20         ` Mathieu Desnoyers
2009-08-25 16:59           ` Frederic Weisbecker
2009-08-25 17:31             ` Frederic Weisbecker
2009-08-25 18:31               ` Mathieu Desnoyers
2009-08-25 19:42                 ` Frederic Weisbecker
2009-08-25 19:51                   ` Mathieu Desnoyers
2009-08-26  0:19                     ` Frederic Weisbecker
2009-08-26  0:42                       ` Mathieu Desnoyers
2009-08-26  7:28                         ` Ingo Molnar
2009-08-26 17:11                           ` Mathieu Desnoyers
2009-08-26  6:48                   ` Peter Zijlstra
2009-08-25 22:04                 ` Martin Schwidefsky
2009-08-26  7:38                   ` Heiko Carstens
2009-08-26 12:32                     ` Frederic Weisbecker
2009-08-26  6:21                 ` Peter Zijlstra
2009-08-26 17:08                   ` Mathieu Desnoyers
2009-08-26 18:41                     ` Christoph Hellwig
2009-08-26 18:42                       ` Christoph Hellwig
2009-08-26 19:01                         ` Mathieu Desnoyers
2009-08-26  7:10                 ` Peter Zijlstra
2009-08-26 17:10                   ` Mathieu Desnoyers
2009-08-26 17:24                   ` H. Peter Anvin
2009-08-25 17:04           ` Jason Baron
2009-08-25 18:15             ` Mathieu Desnoyers
2009-08-26 12:35         ` Frederic Weisbecker
2009-08-26 12:59           ` Heiko Carstens
2009-08-26 13:30             ` Frederic Weisbecker
2009-08-26 13:48               ` Steven Rostedt
2009-08-26 13:53                 ` Frederic Weisbecker
2009-08-26 14:44                   ` Steven Rostedt
2009-08-26 13:56                 ` Peter Zijlstra
2009-08-26 14:41                   ` Steven Rostedt
2009-08-26 14:10               ` Heiko Carstens
2009-08-26 14:27                 ` Frederic Weisbecker
2009-08-26 14:43                   ` Steven Rostedt
2009-08-26 16:14                     ` Frederic Weisbecker
2009-08-26 14:43                 ` Steven Rostedt
2009-08-26 14:41           ` Hendrik Brueckner
2009-08-28 12:28         ` [tip:tracing/core] tracing: Don't trace kernel thread syscalls tip-bot for Hendrik Brueckner
2009-08-25 21:40     ` [PATCH 08/12] add trace events for each syscall entry/exit Frederic Weisbecker
2009-08-25 22:09       ` Frederic Weisbecker
2009-08-26  7:47         ` Heiko Carstens
2009-08-28 12:27     ` [tip:tracing/core] tracing: Check invalid syscall nr while tracing syscalls tip-bot for Hendrik Brueckner
2009-08-10 20:52 ` [PATCH 09/12] add support traceopint ids Jason Baron
2009-08-11 11:28   ` Frederic Weisbecker
2009-08-10 20:53 ` [PATCH 10/12] add perf counter support Jason Baron
2009-08-11 12:12   ` Frederic Weisbecker
2009-08-11 12:17     ` Ingo Molnar
2009-08-11 12:25       ` Frederic Weisbecker
2009-08-10 20:53 ` [PATCH 11/12] add more namespace area to 'perf list' output Jason Baron
2009-08-10 20:53 ` [PATCH 12/12] convert x86_64 mmap and uname to use DEFINE_SYSCALL Jason Baron
2009-08-25 12:31 ` [PATCH 00/12] add syscall tracepoints V3 - s390 arch update Hendrik Brueckner
2009-08-25 13:52   ` Frederic Weisbecker
2009-08-25 14:39     ` Heiko Carstens
2009-08-25 19:52       ` Frederic Weisbecker
2009-08-25 15:38     ` Hendrik Brueckner
2009-08-26 16:53   ` Frederic Weisbecker
2009-08-27  7:27     ` [PATCH]: tracing: s390 arch updates for tracing syscalls Hendrik Brueckner
2009-08-28 12:27   ` [tip:tracing/core] tracing: Add syscall tracepoints - s390 arch update tip-bot for Hendrik Brueckner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090811105010.GB4938@nowhere \
    --to=fweisbec@gmail.com \
    --cc=jbaron@redhat.com \
    --cc=jiayingz@google.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=mathieu.desnoyers@polymtl.ca \
    --cc=mbligh@google.com \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox