All of lore.kernel.org
 help / color / mirror / Atom feed
From: Frederic Weisbecker <fweisbec@gmail.com>
To: Jason Baron <jbaron@redhat.com>
Cc: linux-kernel@vger.kernel.org, mingo@elte.hu,
	laijs@cn.fujitsu.com, rostedt@goodmis.org, peterz@infradead.org,
	mathieu.desnoyers@polymtl.ca, jiayingz@google.com,
	mbligh@google.com, lizf@cn.fujitsu.com
Subject: Re: [PATCH 08/12] add trace events for each syscall entry/exit
Date: Tue, 11 Aug 2009 12:50:14 +0200	[thread overview]
Message-ID: <20090811105010.GB4938@nowhere> (raw)
In-Reply-To: <aa785a0431b72327d288ef9b921e824d7f494de2.1249932670.git.jbaron@redhat.com>

On Mon, Aug 10, 2009 at 04:52:47PM -0400, Jason Baron wrote:
> Layer Frederic's syscall tracer on tracepoints. We create trace events via
> hooking into the SYCALL_DEFINE macros. This allows us to individually toggle
> syscall entry and exit points on/off.
> 
> Signed-off-by: Jason Baron <jbaron@redhat.com>
> 
> ---
>  include/linux/syscalls.h      |   61 +++++++++++++-
>  include/trace/syscall.h       |   18 ++--
>  kernel/trace/trace_syscalls.c |  183 ++++++++++++++++++++---------------------
>  3 files changed, 159 insertions(+), 103 deletions(-)
> 
> diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
> index 80de700..5e5b4d3 100644
> --- a/include/linux/syscalls.h
> +++ b/include/linux/syscalls.h
> @@ -64,6 +64,7 @@ struct perf_counter_attr;
>  #include <linux/sem.h>
>  #include <asm/siginfo.h>
>  #include <asm/signal.h>
> +#include <linux/unistd.h>
>  #include <linux/quota.h>
>  #include <linux/key.h>
>  #include <trace/syscall.h>
> @@ -112,6 +113,59 @@ struct perf_counter_attr;
>  #define __SC_STR_TDECL5(t, a, ...)	#t, __SC_STR_TDECL4(__VA_ARGS__)
>  #define __SC_STR_TDECL6(t, a, ...)	#t, __SC_STR_TDECL5(__VA_ARGS__)
>  
> +
> +#define SYSCALL_TRACE_ENTER_EVENT(sname)				\
> +	static struct ftrace_event_call event_enter_##sname;		\
> +	static int init_enter_##sname(void)				\
> +	{								\
> +		int num;						\
> +		num = syscall_name_to_nr("sys"#sname);			\
> +		if (num < 0)						\
> +			return -ENOSYS;					\
> +		register_ftrace_event(&event_syscall_enter);		\
> +		INIT_LIST_HEAD(&event_enter_##sname.fields);		\
> +		init_preds(&event_enter_##sname);			\
> +		return 0;						\
> +	}								\



You could rather add the struct ftrace_event_call *event as
a parameter to a generic function static int init_enter_syscall()
That would require adding this parameter in the raw_init() callback.
If I remember well, Masami does that in his kprobes events patchset.

May be we can let it as is and wait for his patchset to be integrated
to update that.


> +	static struct ftrace_event_call __used				\
> +	  __attribute__((__aligned__(4)))				\
> +	  __attribute__((section("_ftrace_events")))			\
> +	  event_enter_##sname = {					\
> +		.name                   = "sys_enter"#sname,		\
> +		.system                 = "syscalls",			\
> +		.event                  = &event_syscall_enter,		\
> +		.raw_init		= init_enter_##sname,		\
> +		.regfunc		= reg_event_syscall_enter,	\
> +		.unregfunc		= unreg_event_syscall_enter,	\
> +		.data			= "sys"#sname,			\
> +	}
> +
> +#define SYSCALL_TRACE_EXIT_EVENT(sname)					\
> +	static struct ftrace_event_call event_exit_##sname;		\
> +	static int init_exit_##sname(void)				\
> +	{								\
> +		int num;						\
> +		num = syscall_name_to_nr("sys"#sname);			\
> +		if (num < 0)						\
> +			return -ENOSYS;					\
> +		register_ftrace_event(&event_syscall_exit);		\
> +		INIT_LIST_HEAD(&event_exit_##sname.fields);		\
> +		init_preds(&event_exit_##sname);			\
> +		return 0;						\
> +	}								\
> +	static struct ftrace_event_call __used				\
> +	  __attribute__((__aligned__(4)))				\
> +	  __attribute__((section("_ftrace_events")))			\
> +	  event_exit_##sname = {					\
> +		.name                   = "sys_exit"#sname,		\
> +		.system                 = "syscalls",			\
> +		.event                  = &event_syscall_exit,		\
> +		.raw_init		= init_exit_##sname,		\
> +		.regfunc		= reg_event_syscall_exit,	\
> +		.unregfunc		= unreg_event_syscall_exit,	\
> +		.data			= "sys"#sname,			\
> +	}
> +
>  #define SYSCALL_METADATA(sname, nb)				\
>  	static const struct syscall_metadata __used		\
>  	  __attribute__((__aligned__(4)))			\
> @@ -121,7 +175,9 @@ struct perf_counter_attr;
>  		.nb_args 	= nb,				\
>  		.types		= types_##sname,		\
>  		.args		= args_##sname,			\
> -	}
> +	};							\
> +	SYSCALL_TRACE_ENTER_EVENT(sname);			\
> +	SYSCALL_TRACE_EXIT_EVENT(sname);
>  
>  #define SYSCALL_DEFINE0(sname)					\
>  	static const struct syscall_metadata __used		\
> @@ -131,8 +187,9 @@ struct perf_counter_attr;
>  		.name 		= "sys_"#sname,			\
>  		.nb_args 	= 0,				\
>  	};							\
> +	SYSCALL_TRACE_ENTER_EVENT(_##sname);			\
> +	SYSCALL_TRACE_EXIT_EVENT(_##sname);			\
>  	asmlinkage long sys_##sname(void)
> -
>  #else
>  #define SYSCALL_DEFINE0(name)	   asmlinkage long sys_##name(void)
>  #endif
> diff --git a/include/trace/syscall.h b/include/trace/syscall.h
> index 3951d77..73fb8b4 100644
> --- a/include/trace/syscall.h
> +++ b/include/trace/syscall.h
> @@ -2,6 +2,8 @@
>  #define _TRACE_SYSCALL_H
>  
>  #include <linux/tracepoint.h>
> +#include <linux/unistd.h>
> +#include <linux/ftrace_event.h>
>  
>  #include <asm/ptrace.h>
>  
> @@ -40,15 +42,13 @@ struct syscall_metadata {
>  
>  #ifdef CONFIG_FTRACE_SYSCALLS
>  extern struct syscall_metadata *syscall_nr_to_meta(int nr);
> -extern void start_ftrace_syscalls(void);
> -extern void stop_ftrace_syscalls(void);
> -extern void ftrace_syscall_enter(struct pt_regs *regs);
> -extern void ftrace_syscall_exit(struct pt_regs *regs);
> -#else
> -static inline void start_ftrace_syscalls(void)			{ }
> -static inline void stop_ftrace_syscalls(void)			{ }
> -static inline void ftrace_syscall_enter(struct pt_regs *regs)	{ }
> -static inline void ftrace_syscall_exit(struct pt_regs *regs)	{ }
> +extern int syscall_name_to_nr(char *name);
> +extern struct trace_event event_syscall_enter;
> +extern struct trace_event event_syscall_exit;
> +extern int reg_event_syscall_enter(void *ptr);
> +extern void unreg_event_syscall_enter(void *ptr);
> +extern int reg_event_syscall_exit(void *ptr);
> +extern void unreg_event_syscall_exit(void *ptr);
>  #endif
>  
>  #endif /* _TRACE_SYSCALL_H */
> diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
> index 08aed43..c7ae25e 100644
> --- a/kernel/trace/trace_syscalls.c
> +++ b/kernel/trace/trace_syscalls.c
> @@ -1,15 +1,16 @@
>  #include <trace/syscall.h>
>  #include <linux/kernel.h>
> +#include <linux/ftrace.h>
>  #include <asm/syscall.h>
>  
>  #include "trace_output.h"
>  #include "trace.h"
>  
> -/* Keep a counter of the syscall tracing users */
> -static int refcount;
> -
> -/* Prevent from races on thread flags toggling */
>  static DEFINE_MUTEX(syscall_trace_lock);
> +static int sys_refcount_enter;
> +static int sys_refcount_exit;
> +static DECLARE_BITMAP(enabled_enter_syscalls, FTRACE_SYSCALL_MAX);
> +static DECLARE_BITMAP(enabled_exit_syscalls, FTRACE_SYSCALL_MAX);
>  
>  /* Option to display the parameters types */
>  enum {
> @@ -95,53 +96,7 @@ print_syscall_exit(struct trace_iterator *iter, int flags)
>  	return TRACE_TYPE_HANDLED;
>  }
>  
> -void start_ftrace_syscalls(void)
> -{
> -	unsigned long flags;
> -	struct task_struct *g, *t;
> -
> -	mutex_lock(&syscall_trace_lock);
> -
> -	/* Don't enable the flag on the tasks twice */
> -	if (++refcount != 1)
> -		goto unlock;
> -
> -	read_lock_irqsave(&tasklist_lock, flags);
> -
> -	do_each_thread(g, t) {
> -		set_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> -	} while_each_thread(g, t);
> -
> -	read_unlock_irqrestore(&tasklist_lock, flags);
> -
> -unlock:
> -	mutex_unlock(&syscall_trace_lock);
> -}
> -
> -void stop_ftrace_syscalls(void)
> -{
> -	unsigned long flags;
> -	struct task_struct *g, *t;
> -
> -	mutex_lock(&syscall_trace_lock);
> -
> -	/* There are perhaps still some users */
> -	if (--refcount)
> -		goto unlock;
> -
> -	read_lock_irqsave(&tasklist_lock, flags);
> -
> -	do_each_thread(g, t) {
> -		clear_tsk_thread_flag(t, TIF_SYSCALL_FTRACE);
> -	} while_each_thread(g, t);
> -
> -	read_unlock_irqrestore(&tasklist_lock, flags);
> -
> -unlock:
> -	mutex_unlock(&syscall_trace_lock);
> -}
> -
> -void ftrace_syscall_enter(struct pt_regs *regs)
> +void ftrace_syscall_enter(struct pt_regs *regs, long id)
>  {
>  	struct syscall_trace_enter *entry;
>  	struct syscall_metadata *sys_data;
> @@ -150,6 +105,8 @@ void ftrace_syscall_enter(struct pt_regs *regs)
>  	int syscall_nr;
>  
>  	syscall_nr = syscall_get_nr(current, regs);
> +	if (!test_bit(syscall_nr, enabled_enter_syscalls))
> +		return;
>  
>  	sys_data = syscall_nr_to_meta(syscall_nr);
>  	if (!sys_data)
> @@ -170,7 +127,7 @@ void ftrace_syscall_enter(struct pt_regs *regs)
>  	trace_wake_up();
>  }
>  
> -void ftrace_syscall_exit(struct pt_regs *regs)
> +void ftrace_syscall_exit(struct pt_regs *regs, long ret)
>  {
>  	struct syscall_trace_exit *entry;
>  	struct syscall_metadata *sys_data;
> @@ -178,6 +135,8 @@ void ftrace_syscall_exit(struct pt_regs *regs)
>  	int syscall_nr;
>  
>  	syscall_nr = syscall_get_nr(current, regs);
> +	if (!test_bit(syscall_nr, enabled_exit_syscalls))
> +		return;
>  
>  	sys_data = syscall_nr_to_meta(syscall_nr);
>  	if (!sys_data)
> @@ -196,54 +155,94 @@ void ftrace_syscall_exit(struct pt_regs *regs)
>  	trace_wake_up();
>  }
>  
> -static int init_syscall_tracer(struct trace_array *tr)
> +int reg_event_syscall_enter(void *ptr)
>  {
> -	start_ftrace_syscalls();
> -
> -	return 0;
> +	int ret = 0;
> +	int num;
> +	char *name;
> +
> +	name = (char *)ptr;
> +	num = syscall_name_to_nr(name);
> +	if (num < 0 || num >= FTRACE_SYSCALL_MAX)



I wonder if we should WARN once in this case. At least we would
be aware of new yet unsupported syscalls.



> +		return -ENOSYS;
> +	mutex_lock(&syscall_trace_lock);
> +	if (!sys_refcount_enter)
> +		ret = register_trace_syscall_enter(ftrace_syscall_enter);
> +	if (ret) {
> +		pr_info("event trace: Could not activate"
> +				"syscall entry trace point");
> +	} else {
> +		set_bit(num, enabled_enter_syscalls);
> +		sys_refcount_enter++;
> +	}
> +	mutex_unlock(&syscall_trace_lock);
> +	return ret;
>  }
>  
> -static void reset_syscall_tracer(struct trace_array *tr)
> +void unreg_event_syscall_enter(void *ptr)
>  {
> -	stop_ftrace_syscalls();
> -	tracing_reset_online_cpus(tr);
> -}
> -
> -static struct trace_event syscall_enter_event = {
> -	.type	 	= TRACE_SYSCALL_ENTER,
> -	.trace		= print_syscall_enter,
> -};
> -
> -static struct trace_event syscall_exit_event = {
> -	.type	 	= TRACE_SYSCALL_EXIT,
> -	.trace		= print_syscall_exit,
> -};
> +	int num;
> +	char *name;
>  
> -static struct tracer syscall_tracer __read_mostly = {
> -	.name	     	= "syscall",
> -	.init		= init_syscall_tracer,
> -	.reset		= reset_syscall_tracer,
> -	.flags		= &syscalls_flags,
> -};
> +	name = (char *)ptr;
> +	num = syscall_name_to_nr(name);
> +	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
> +		return;
> +	mutex_lock(&syscall_trace_lock);
> +	sys_refcount_enter--;
> +	clear_bit(num, enabled_enter_syscalls);
> +	if (!sys_refcount_enter)
> +		unregister_trace_syscall_enter(ftrace_syscall_enter);
> +	mutex_unlock(&syscall_trace_lock);
> +}
>  
> -__init int register_ftrace_syscalls(void)
> +int reg_event_syscall_exit(void *ptr)
>  {
> -	int ret;
> -
> -	ret = register_ftrace_event(&syscall_enter_event);
> -	if (!ret) {
> -		printk(KERN_WARNING "event %d failed to register\n",
> -		       syscall_enter_event.type);
> -		WARN_ON_ONCE(1);
> +	int ret = 0;
> +	int num;
> +	char *name;
> +
> +	name = (char *)ptr;
> +	num = syscall_name_to_nr(name);
> +	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
> +		return -ENOSYS;
> +	mutex_lock(&syscall_trace_lock);
> +	if (!sys_refcount_exit)
> +		ret = register_trace_syscall_exit(ftrace_syscall_exit);
> +	if (ret) {
> +		pr_info("event trace: Could not activate"
> +				"syscall exit trace point");
> +	} else {
> +		set_bit(num, enabled_exit_syscalls);
> +		sys_refcount_exit++;
>  	}
> +	mutex_unlock(&syscall_trace_lock);
> +	return ret;
> +}
>  
> -	ret = register_ftrace_event(&syscall_exit_event);
> -	if (!ret) {
> -		printk(KERN_WARNING "event %d failed to register\n",
> -		       syscall_exit_event.type);
> -		WARN_ON_ONCE(1);
> -	}
> +void unreg_event_syscall_exit(void *ptr)
> +{
> +	int num;
> +	char *name;
>  
> -	return register_tracer(&syscall_tracer);
> +	name = (char *)ptr;
> +	num = syscall_name_to_nr(name);
> +	if (num < 0 || num >= FTRACE_SYSCALL_MAX)
> +		return;
> +	mutex_lock(&syscall_trace_lock);
> +	sys_refcount_exit--;
> +	clear_bit(num, enabled_exit_syscalls);
> +	if (!sys_refcount_exit)
> +		unregister_trace_syscall_exit(ftrace_syscall_exit);
> +	mutex_unlock(&syscall_trace_lock);
>  }
> -device_initcall(register_ftrace_syscalls);
> +
> +struct trace_event event_syscall_enter = {
> +	.trace			= print_syscall_enter,
> +	.type			= TRACE_SYSCALL_ENTER
> +};
> +
> +struct trace_event event_syscall_exit = {
> +	.trace			= print_syscall_exit,
> +	.type			= TRACE_SYSCALL_EXIT
> +};
> -- 
> 1.6.2.5
> 

Nice.

It's a bit too bad that enter and exit must be that separated
whereas their callbacks are pretty the same.

But I guess if we want to nicely decouple both, we don't have the choice.


  reply	other threads:[~2009-08-11 11:59 UTC|newest]

Thread overview: 88+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-08-10 20:52 [PATCH 00/12] add syscall tracepoints V3 Jason Baron
2009-08-10 20:52 ` [PATCH 01/12] map syscall name to number Jason Baron
2009-08-10 20:52 ` [PATCH 02/12] call arch_init_ftrace_syscalls at boot Jason Baron
2009-08-10 20:52 ` [PATCH 03/12] add DECLARE_TRACE_WITH_CALLBACK() macro Jason Baron
2009-08-10 20:52 ` [PATCH 04/12] add syscall tracepoints Jason Baron
2009-08-10 20:52 ` [PATCH 05/12] update FTRACE_SYSCALL_MAX Jason Baron
2009-08-11 11:00   ` Frederic Weisbecker
2009-08-11 19:39     ` Matt Fleming
2009-08-24 13:41     ` Paul Mundt
2009-08-24 14:06       ` Jason Baron
2009-08-24 14:15         ` Paul Mundt
2009-08-24 14:34           ` Frederic Weisbecker
2009-08-24 14:37             ` Paul Mundt
2009-08-24 14:42           ` Jason Baron
2009-08-24 14:50             ` Paul Mundt
2009-08-24 18:34               ` Ingo Molnar
2009-08-10 20:52 ` [PATCH 06/12] trace_event - raw_init bailout Jason Baron
2009-08-10 20:52 ` [PATCH 07/12] add ftrace_event_call void * 'data' field Jason Baron
2009-08-11 10:09   ` Frederic Weisbecker
2009-08-17 22:19     ` Steven Rostedt
2009-08-17 23:09       ` Frederic Weisbecker
2009-08-18  0:06         ` Steven Rostedt
2009-08-10 20:52 ` [PATCH 08/12] add trace events for each syscall entry/exit Jason Baron
2009-08-11 10:50   ` Frederic Weisbecker [this message]
2009-08-11 11:45     ` Ingo Molnar
2009-08-11 12:01       ` Frederic Weisbecker
2009-08-25 12:50   ` Hendrik Brueckner
2009-08-25 14:15     ` Frederic Weisbecker
2009-08-25 16:02       ` Hendrik Brueckner
2009-08-25 16:20         ` Mathieu Desnoyers
2009-08-25 16:59           ` Frederic Weisbecker
2009-08-25 17:31             ` Frederic Weisbecker
2009-08-25 18:31               ` Mathieu Desnoyers
2009-08-25 19:42                 ` Frederic Weisbecker
2009-08-25 19:51                   ` Mathieu Desnoyers
2009-08-26  0:19                     ` Frederic Weisbecker
2009-08-26  0:42                       ` Mathieu Desnoyers
2009-08-26  7:28                         ` Ingo Molnar
2009-08-26 17:11                           ` Mathieu Desnoyers
2009-08-26  6:48                   ` Peter Zijlstra
2009-08-25 22:04                 ` Martin Schwidefsky
2009-08-26  7:38                   ` Heiko Carstens
2009-08-26 12:32                     ` Frederic Weisbecker
2009-08-26  6:21                 ` Peter Zijlstra
2009-08-26 17:08                   ` Mathieu Desnoyers
2009-08-26 18:41                     ` Christoph Hellwig
2009-08-26 18:42                       ` Christoph Hellwig
2009-08-26 19:01                         ` Mathieu Desnoyers
2009-08-26  7:10                 ` Peter Zijlstra
2009-08-26 17:10                   ` Mathieu Desnoyers
2009-08-26 17:24                   ` H. Peter Anvin
2009-08-25 17:04           ` Jason Baron
2009-08-25 18:15             ` Mathieu Desnoyers
2009-08-26 12:35         ` Frederic Weisbecker
2009-08-26 12:59           ` Heiko Carstens
2009-08-26 13:30             ` Frederic Weisbecker
2009-08-26 13:48               ` Steven Rostedt
2009-08-26 13:53                 ` Frederic Weisbecker
2009-08-26 14:44                   ` Steven Rostedt
2009-08-26 13:56                 ` Peter Zijlstra
2009-08-26 14:41                   ` Steven Rostedt
2009-08-26 14:10               ` Heiko Carstens
2009-08-26 14:27                 ` Frederic Weisbecker
2009-08-26 14:43                   ` Steven Rostedt
2009-08-26 16:14                     ` Frederic Weisbecker
2009-08-26 14:43                 ` Steven Rostedt
2009-08-26 14:41           ` Hendrik Brueckner
2009-08-28 12:28         ` [tip:tracing/core] tracing: Don't trace kernel thread syscalls tip-bot for Hendrik Brueckner
2009-08-25 21:40     ` [PATCH 08/12] add trace events for each syscall entry/exit Frederic Weisbecker
2009-08-25 22:09       ` Frederic Weisbecker
2009-08-26  7:47         ` Heiko Carstens
2009-08-28 12:27     ` [tip:tracing/core] tracing: Check invalid syscall nr while tracing syscalls tip-bot for Hendrik Brueckner
2009-08-10 20:52 ` [PATCH 09/12] add support traceopint ids Jason Baron
2009-08-11 11:28   ` Frederic Weisbecker
2009-08-10 20:53 ` [PATCH 10/12] add perf counter support Jason Baron
2009-08-11 12:12   ` Frederic Weisbecker
2009-08-11 12:17     ` Ingo Molnar
2009-08-11 12:25       ` Frederic Weisbecker
2009-08-10 20:53 ` [PATCH 11/12] add more namespace area to 'perf list' output Jason Baron
2009-08-10 20:53 ` [PATCH 12/12] convert x86_64 mmap and uname to use DEFINE_SYSCALL Jason Baron
2009-08-25 12:31 ` [PATCH 00/12] add syscall tracepoints V3 - s390 arch update Hendrik Brueckner
2009-08-25 13:52   ` Frederic Weisbecker
2009-08-25 14:39     ` Heiko Carstens
2009-08-25 19:52       ` Frederic Weisbecker
2009-08-25 15:38     ` Hendrik Brueckner
2009-08-26 16:53   ` Frederic Weisbecker
2009-08-27  7:27     ` [PATCH]: tracing: s390 arch updates for tracing syscalls Hendrik Brueckner
2009-08-28 12:27   ` [tip:tracing/core] tracing: Add syscall tracepoints - s390 arch update tip-bot for Hendrik Brueckner

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20090811105010.GB4938@nowhere \
    --to=fweisbec@gmail.com \
    --cc=jbaron@redhat.com \
    --cc=jiayingz@google.com \
    --cc=laijs@cn.fujitsu.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=lizf@cn.fujitsu.com \
    --cc=mathieu.desnoyers@polymtl.ca \
    --cc=mbligh@google.com \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    --cc=rostedt@goodmis.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.