From: Frederic Weisbecker <fweisbec@gmail.com>
To: Jason Baron <jbaron@redhat.com>
Cc: linux-kernel@vger.kernel.org, mingo@elte.hu,
laijs@cn.fujitsu.com, rostedt@goodmis.org, peterz@infradead.org,
mathieu.desnoyers@polymtl.ca, jiayingz@google.com,
mbligh@google.com, roland@redhat.com, fche@redhat.com
Subject: Re: [PATCH 2/2] convert to syscall tracepoints
Date: Sun, 7 Jun 2009 21:19:05 +0200 [thread overview]
Message-ID: <20090607191903.GA6021@nowhere> (raw)
In-Reply-To: <af4031af81ce372eac2be125dabc2ea746b4308b.1244222378.git.jbaron@redhat.com>
On Fri, Jun 05, 2009 at 02:08:08PM -0400, Jason Baron wrote:
>
> Implements syscall tracer via tracepoints and TRACE_EVENT(). Introduces
> a new tracing flag 'trace_syscalls', which must be toggled to enable this
> feature.
>
>
> Signed-off-by: Jason Baron <jbaron@redhat.com>
>
> ---
> arch/x86/kernel/ptrace.c | 8 +-
> include/asm-generic/syscall.h | 3 +
> include/trace/events/syscalls.h | 4202 +++++++++++++++++++++++++++++++++++++++
> include/trace/syscall.h | 6 +
> kernel/trace/Makefile | 1 -
> kernel/trace/trace.c | 101 +
> kernel/trace/trace_syscalls.c | 250 ---
> 7 files changed, 4317 insertions(+), 254 deletions(-)
> create mode 100644 include/trace/events/syscalls.h
> delete mode 100644 kernel/trace/trace_syscalls.c
>
> diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
> index 09ecbde..1016619 100644
> --- a/arch/x86/kernel/ptrace.c
> +++ b/arch/x86/kernel/ptrace.c
> @@ -35,7 +35,9 @@
> #include <asm/proto.h>
> #include <asm/ds.h>
>
> -#include <trace/syscall.h>
> +#include <linux/ftrace.h>
> +#define CREATE_TRACE_POINTS
> +#include <trace/events/syscalls.h>
>
> #include "tls.h"
>
> @@ -1498,7 +1500,7 @@ asmregparm long syscall_trace_enter(struct pt_regs *regs)
> ret = -1L;
>
> if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
> - ftrace_syscall_enter(regs);
> + syscall_tracepoints_enter(regs);
>
> if (unlikely(current->audit_context)) {
> if (IS_IA32)
> @@ -1524,7 +1526,7 @@ asmregparm void syscall_trace_leave(struct pt_regs *regs)
> audit_syscall_exit(AUDITSC_RESULT(regs->ax), regs->ax);
>
> if (unlikely(test_thread_flag(TIF_SYSCALL_FTRACE)))
> - ftrace_syscall_exit(regs);
> + syscall_tracepoints_exit(regs);
>
> if (test_thread_flag(TIF_SYSCALL_TRACE))
> tracehook_report_syscall_exit(regs, 0);
> diff --git a/include/asm-generic/syscall.h b/include/asm-generic/syscall.h
> index ea8087b..ef4c68e 100644
> --- a/include/asm-generic/syscall.h
> +++ b/include/asm-generic/syscall.h
> @@ -22,6 +22,9 @@
> struct task_struct;
> struct pt_regs;
>
> +
> +struct syscall_metadata *syscall_nr_to_meta(int nr);
> +
> /**
> * syscall_get_nr - find what system call a task is executing
> * @task: task of interest, must be blocked
> diff --git a/include/trace/events/syscalls.h b/include/trace/events/syscalls.h
> new file mode 100644
> index 0000000..de7143d
> --- /dev/null
> +++ b/include/trace/events/syscalls.h
> @@ -0,0 +1,4202 @@
> +#if !defined(_TRACE_SYSCALLS_H) || defined(TRACE_HEADER_MULTI_READ)
> +#define _TRACE_SYSCALLS_H
> +
> +#include <asm/syscall.h>
> +#include <asm-generic/syscall.h>
> +#include <linux/tracepoint.h>
> +#include <trace/syscall.h>
> +
> +#undef TRACE_SYSTEM
> +#define TRACE_SYSTEM syscalls
> +
> +struct epoll_event;
> +struct iattr;
> +struct inode;
> +struct iocb;
> +struct io_event;
> +struct iovec;
> +struct itimerspec;
> +struct itimerval;
> +struct kexec_segment;
> +struct linux_dirent;
> +struct linux_dirent64;
> +struct list_head;
> +struct msgbuf;
> +struct msghdr;
> +struct msqid_ds;
> +struct new_utsname;
> +struct nfsctl_arg;
> +struct __old_kernel_stat;
> +struct pollfd;
> +struct rlimit;
> +struct rusage;
> +struct sched_param;
> +struct semaphore;
> +struct sembuf;
> +struct shmid_ds;
> +struct sockaddr;
> +struct stat;
> +struct stat64;
> +struct statfs;
> +struct statfs64;
> +struct __sysctl_args;
> +struct sysinfo;
> +struct timespec;
> +struct timeval;
> +struct timex;
> +struct timezone;
> +struct tms;
> +struct utimbuf;
> +struct mq_attr;
> +struct compat_stat;
> +struct compat_timeval;
> +struct robust_list_head;
> +struct getcpu_cache;
> +struct old_linux_dirent;
> +struct perf_counter_hw_event;
> +
> +/* misc macros */
> +
> +#define clock_id_toname(id) \
> + (id == CLOCK_REALTIME ? "CLOCK_REALTIME" : \
> + id == CLOCK_MONOTONIC ? "CLOCK_MONOTONIC" : \
> + id == CLOCK_PROCESS_CPUTIME_ID ? "CLOCK_PROCESS_CPUTIME_ID" : \
> + id == CLOCK_MONOTONIC_RAW ? "CLOCK_MONOTONIC_RAW" : \
> + id == CLOCK_SGI_CYCLE ? "CLOCK_SGI_CYCLE" : \
> + "UNKNOWN CLOCK")
> +
> +/* enter helper macros */
> +
> +#define MAX_SYS_ARGS 6
> +
> +#define expand_enter_sys_args_0()
> +#define expand_enter_sys_args_1(t1) (t1) sys_args[0]
> +#define expand_enter_sys_args_2(t1, t2) expand_enter_sys_args_1(t1), (t2) sys_args[1]
> +#define expand_enter_sys_args_3(t1, t2, t3) expand_enter_sys_args_2(t1, t2), (t3) sys_args[2]
> +#define expand_enter_sys_args_4(t1, t2, t3, t4) expand_enter_sys_args_3(t1, t2, t3), (t4) sys_args[3]
> +#define expand_enter_sys_args_5(t1, t2, t3, t4, t5) expand_enter_sys_args_4(t1, t2, t3, t4), (t5) sys_args[4]
> +#define expand_enter_sys_args_6(t1, t2, t3, t4, t5, t6) expand_enter_sys_args_5(t1, t2, t3, t4, t5), (t6) sys_args[5]
> +
> +#define create_syscall_enter(n, sysname, ...) \
> + case __NR_##sysname: \
> + syscall_get_arguments(current, regs, 0, n, sys_args); \
> + trace_sysenter_##sysname(expand_enter_sys_args_##n(__VA_ARGS__)); \
> + break;
> +
> +#define expand_enter_proto_0() void
> +#define expand_enter_proto_1(t1, p1) t1 p1
> +#define expand_enter_proto_2(t2, p2, ...) t2 p2, expand_enter_proto_1(__VA_ARGS__)
> +#define expand_enter_proto_3(t3, p3, ...) t3 p3, expand_enter_proto_2(__VA_ARGS__)
> +#define expand_enter_proto_4(t4, p4, ...) t4 p4, expand_enter_proto_3(__VA_ARGS__)
> +#define expand_enter_proto_5(t5, p5, ...) t5 p5, expand_enter_proto_4(__VA_ARGS__)
> +#define expand_enter_proto_6(t6, p6, ...) t6 p6, expand_enter_proto_5(__VA_ARGS__)
> +
> +#define expand_enter_args_0()
> +#define expand_enter_args_1(t1, p1) p1
> +#define expand_enter_args_2(t2, p2, ...) p2, expand_enter_args_1(__VA_ARGS__)
> +#define expand_enter_args_3(t3, p3, ...) p3, expand_enter_args_2(__VA_ARGS__)
> +#define expand_enter_args_4(t4, p4, ...) p4, expand_enter_args_3(__VA_ARGS__)
> +#define expand_enter_args_5(t5, p5, ...) p5, expand_enter_args_4(__VA_ARGS__)
> +#define expand_enter_args_6(t6, p6, ...) p6, expand_enter_args_5(__VA_ARGS__)
> +
> +#define expand_enter_entry_0()
> +#define expand_enter_entry_1(t1, p1) __field(t1, p1)
> +#define expand_enter_entry_2(t2, p2, ...) __field(t2, p2) expand_enter_entry_1(__VA_ARGS__)
> +#define expand_enter_entry_3(t3, p3, ...) __field(t3, p3) expand_enter_entry_2(__VA_ARGS__)
> +#define expand_enter_entry_4(t4, p4, ...) __field(t4, p4) expand_enter_entry_3(__VA_ARGS__)
> +#define expand_enter_entry_5(t5, p5, ...) __field(t5, p5) expand_enter_entry_4(__VA_ARGS__)
> +#define expand_enter_entry_6(t6, p6, ...) __field(t6, p6) expand_enter_entry_5(__VA_ARGS__)
> +
> +#define expand_enter_assign_0()
> +#define expand_enter_assign_1(t1, p1) __entry->p1 = p1;
> +#define expand_enter_assign_2(t2, p2, ...) __entry->p2 = p2; expand_enter_assign_1(__VA_ARGS__)
> +#define expand_enter_assign_3(t3, p3, ...) __entry->p3 = p3; expand_enter_assign_2(__VA_ARGS__)
> +#define expand_enter_assign_4(t4, p4, ...) __entry->p4 = p4; expand_enter_assign_3(__VA_ARGS__)
> +#define expand_enter_assign_5(t5, p5, ...) __entry->p5 = p5; expand_enter_assign_4(__VA_ARGS__)
> +#define expand_enter_assign_6(t6, p6, ...) __entry->p6 = p6; expand_enter_assign_5(__VA_ARGS__)
> +
> +#define expand_enter_printk_1(t1, p1) (u64)__entry->p1
> +#define expand_enter_printk_2(t2, p2, ...) (u64)__entry->p2, expand_enter_printk_1(__VA_ARGS__)
> +#define expand_enter_printk_3(t3, p3, ...) (u64)__entry->p3, expand_enter_printk_2(__VA_ARGS__)
> +#define expand_enter_printk_4(t4, p4, ...) (u64)__entry->p4, expand_enter_printk_3(__VA_ARGS__)
> +#define expand_enter_printk_5(t5, p5, ...) (u64)__entry->p5, expand_enter_printk_4(__VA_ARGS__)
> +#define expand_enter_printk_6(t6, p6, ...) (u64)__entry->p6, expand_enter_printk_5(__VA_ARGS__)
> +
> +#define TP_printk_0() TP_printk()
> +#define TP_printk_1(...) TP_printk("%016Lx", expand_enter_printk_1(__VA_ARGS__))
> +#define TP_printk_2(...) TP_printk("%016Lx %016Lx", expand_enter_printk_2(__VA_ARGS__))
> +#define TP_printk_3(...) TP_printk("%016Lx %016Lx %016Lx", expand_enter_printk_3(__VA_ARGS__))
> +#define TP_printk_4(...) TP_printk("%016Lx %016Lx %016Lx %016Lx", expand_enter_printk_4(__VA_ARGS__))
> +#define TP_printk_5(...) TP_printk("%016Lx %016Lx %016Lx %016Lx %016Lx", \
> + expand_enter_printk_5(__VA_ARGS__))
> +#define TP_printk_6(...) TP_printk("%016Lx %016Lx %016Lx %016Lx %016Lx %016Lx", \
> + expand_enter_printk_6(__VA_ARGS__))
Hmm, may be just use %p so that it will adapt to the arch len.
Anyway we'll need to custom the syscall args printing, once we
have these tracepoints.
> +
> +#define trace_event_syscall_enter(n, name, ...) \
> + TRACE_EVENT(sysenter_##name, \
> + TP_PROTO(expand_enter_proto_##n(__VA_ARGS__)), \
> + TP_ARGS(expand_enter_args_##n(__VA_ARGS__)), \
> + TP_STRUCT__entry(expand_enter_entry_##n(__VA_ARGS__)), \
> + TP_fast_assign(expand_enter_assign_##n(__VA_ARGS__)), \
> + TP_printk_##n(__VA_ARGS__) \
> + );
> +
> +/* exit helper macros */
> +
> +#define create_syscall_exit(sysname) \
> + case __NR_##sysname: \
> + trace_sysexit_##sysname(ret); \
> + break; \
> +
> +#define trace_event_syscall_exit(name) \
> + TRACE_EVENT(sysexit_##name, \
> + TP_PROTO(long ret), \
> + TP_ARGS(ret), \
> + TP_STRUCT__entry( \
> + __field(long, retval) \
> + ), \
> + TP_fast_assign( \
> + __entry->retval = ret; \
> + ), \
> + TP_printk("return value: %ld", __entry->retval) \
> + );
Until there it looks good, these helpers can be applied in SYSCALL_DEFINE(),
but I really think the manually written per syscall tracepoints definition
is not a good idea.
What you did above may be fine to be integrated inside SYSCALL_DEFINEx() so
that we can benefit from the magic of defining each syscall tracepoints
in a single generic code.
It will probably require some tuning such as setting the TIF_FTRACE
flags from the reg() callback in TRACE_EVENT.
And probably some other things.
Thanks.
Frederic.
> +#ifdef __NR_time
> +trace_event_syscall_enter(1, time, time_t __user *, tloc);
> +trace_event_syscall_exit(time);
> +#define ENTERCASEtime create_syscall_enter(1, time, time_t __user *);
> +#define EXITCASEtime create_syscall_exit(time);
> +#else
> +#define ENTERCASEtime
> +#define EXITCASEtime
> +#endif
> +
> +#ifdef __NR_stime
> +trace_event_syscall_enter(1, stime, time_t __user *, tptr);
> +trace_event_syscall_exit(stime);
> +#define ENTERCASEstime create_syscall_enter(1, stime, time_t __user *);
> +#define EXITCASEstime create_syscall_exit(stime);
> +#else
> +#define ENTERCASEstime
> +#define EXITCASEstime
> +#endif
prev parent reply other threads:[~2009-06-07 19:19 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2009-06-05 18:07 [PATCH 0/2] convert ftrace syscalls to TRACE_EVENT Jason Baron
2009-06-05 18:08 ` [PATCH 1/2] allow TP_printk() to have no args Jason Baron
2009-06-05 18:08 ` [PATCH 2/2] convert to syscall tracepoints Jason Baron
2009-06-07 13:29 ` Ingo Molnar
2009-06-08 20:24 ` Jason Baron
2009-06-08 20:40 ` Ingo Molnar
2009-06-08 21:11 ` Jason Baron
2009-06-08 21:25 ` Ingo Molnar
2009-06-08 21:38 ` Jason Baron
2009-06-08 22:00 ` Ingo Molnar
2009-06-08 23:02 ` Frederic Weisbecker
2009-06-09 14:13 ` Jason Baron
2009-06-09 18:53 ` Frederic Weisbecker
2009-06-09 19:17 ` Jason Baron
2009-06-07 19:19 ` Frederic Weisbecker [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20090607191903.GA6021@nowhere \
--to=fweisbec@gmail.com \
--cc=fche@redhat.com \
--cc=jbaron@redhat.com \
--cc=jiayingz@google.com \
--cc=laijs@cn.fujitsu.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.desnoyers@polymtl.ca \
--cc=mbligh@google.com \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
--cc=roland@redhat.com \
--cc=rostedt@goodmis.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.