[PATCH] tracer for sys_open()

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

* [PATCH] tracer for sys_open() - sreadahead
@ 2009-01-27 20:08 Kok, Auke
  2009-01-27 20:51 ` Arnaldo Carvalho de Melo
                   ` (2 more replies)
  0 siblings, 3 replies; 38+ messages in thread
From: Kok, Auke @ 2009-01-27 20:08 UTC (permalink / raw)
  To: Linux Kernel Mailing List
  Cc: powertop ml, Arjan van de Ven, Ingo Molnar, srostedt


This tracer monitors regular file open() syscalls. This is a fast
and low-overhead alternative to strace, and does not allow or
require to be attached to every process.

The tracer only logs succesfull calls, as those are the only ones we
are currently interested in, and we can determine the absolute path
of these files as we log.

Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>


diff --git a/fs/open.c b/fs/open.c
index a3a78ce..8cf2a6b 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -30,6 +30,10 @@
 #include <linux/audit.h>
 #include <linux/falloc.h>

+#include <trace/fs.h>
+
+DEFINE_TRACE(do_sys_open);
+
 int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
 {
        int retval = -ENODEV;
@@ -1040,6 +1044,7 @@ long do_sys_open(int dfd, const char __user *filename, int
flags, int mode)
                                fsnotify_open(f->f_path.dentry);
                                fd_install(fd, f);
                        }
+                       trace_do_sys_open(f, flags, mode, fd);
                }
                putname(tmp);
        }
diff --git a/include/trace/fs.h b/include/trace/fs.h
new file mode 100644
index 0000000..870eec2
--- /dev/null
+++ b/include/trace/fs.h
@@ -0,0 +1,11 @@
+#ifndef _TRACE_FS_H
+#define _TRACE_FS_H
+
+#include <linux/fs.h>
+#include <linux/tracepoint.h>
+
+DECLARE_TRACE(do_sys_open,
+       TPPROTO(struct file *filp, int flags, int mode, long fd),
+               TPARGS(filp, flags, mode, fd));
+
+#endif
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index e2a4ff6..0400815 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -149,6 +149,15 @@ config CONTEXT_SWITCH_TRACER
          This tracer gets called from the context switch and records
          all switching of tasks.

+config OPEN_CLOSE_TRACER
+       bool "Trace open() calls"
+       depends on DEBUG_KERNEL
+       select TRACING
+       select MARKERS
+       help
+         This tracer records open() syscalls. These calls are made when
+         files are accessed on disk.
+
 config BOOT_TRACER
        bool "Trace boot initcalls"
        depends on DEBUG_KERNEL
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 349d5a9..25cec6c 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -20,6 +20,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o

 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
+obj-$(CONFIG_OPEN_CLOSE_TRACER) += trace_open_close.o
 obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
 obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
 obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 4d3d381..24c17d2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -30,6 +30,7 @@ enum trace_type {
        TRACE_USER_STACK,
        TRACE_HW_BRANCHES,
        TRACE_POWER,
+       TRACE_OPEN,

        __TRACE_LAST_TYPE
 };
diff --git a/kernel/trace/trace_open_close.c b/kernel/trace/trace_open_close.c
new file mode 100644
index 0000000..4250efc
--- /dev/null
+++ b/kernel/trace/trace_open_close.c
@@ -0,0 +1,148 @@
+/*
+ * trace open calls
+ * Copyright (C) 2009 Intel Corporation
+ *
+ * Based extensively on trace_sched_switch.c
+ * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/fs.h>
+#include <linux/debugfs.h>
+#include <linux/kallsyms.h>
+#include <linux/uaccess.h>
+#include <linux/ftrace.h>
+#include <trace/fs.h>
+
+#include "trace.h"
+
+
+static struct trace_array      *ctx_trace;
+static int __read_mostly       open_trace_enabled;
+static atomic_t                        open_ref;
+
+static void probe_do_sys_open(struct file *filp, int flags, int mode, long fd)
+{
+       char *buf;
+       char *fname;
+
+       if (!atomic_read(&open_ref))
+               return;
+
+       if (!open_trace_enabled)
+               return;
+
+       buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
+       if (!buf)
+               return;
+       fname = d_path(&filp->f_path, buf, PAGE_SIZE);
+
+       if (IS_ERR(fname))
+               goto out;
+
+       ftrace_printk("%s: open(\"%s\", %d, %d) = %ld\n",
+               current->comm, fname, flags, mode, fd);
+out:
+       kfree(buf);
+}
+
+static void open_trace_reset(struct trace_array *tr)
+{
+       tr->time_start = ftrace_now(tr->cpu);
+       tracing_reset_online_cpus(tr);
+}
+
+static int open_trace_register(void)
+{
+       int ret;
+
+       ret = register_trace_do_sys_open(probe_do_sys_open);
+       if (ret) {
+               pr_info("open trace: Could not activate tracepoint"
+                       " probe to do_open\n");
+       }
+
+       return ret;
+}
+
+static void open_trace_unregister(void)
+{
+       unregister_trace_do_sys_open(probe_do_sys_open);
+}
+
+static void open_trace_start(void)
+{
+       long ref;
+
+       ref = atomic_inc_return(&open_ref);
+       if (ref == 1)
+               open_trace_register();
+}
+
+static void open_trace_stop(void)
+{
+       long ref;
+
+       ref = atomic_dec_and_test(&open_ref);
+       if (ref)
+               open_trace_unregister();
+}
+
+void open_trace_start_cmdline_record(void)
+{
+       open_trace_start();
+}
+
+void open_trace_stop_cmdline_record(void)
+{
+       open_trace_stop();
+}
+
+static void open_start_trace(struct trace_array *tr)
+{
+       open_trace_reset(tr);
+       open_trace_start_cmdline_record();
+       open_trace_enabled = 1;
+}
+
+static void open_stop_trace(struct trace_array *tr)
+{
+       open_trace_enabled = 0;
+       open_trace_stop_cmdline_record();
+}
+
+static int open_trace_init(struct trace_array *tr)
+{
+       ctx_trace = tr;
+
+       open_start_trace(tr);
+       return 0;
+}
+
+static void reset_open_trace(struct trace_array *tr)
+{
+       open_stop_trace(tr);
+}
+
+static struct tracer open_trace __read_mostly =
+{
+       .name           = "open",
+       .init           = open_trace_init,
+       .reset          = reset_open_trace,
+};
+
+__init static int init_open_trace(void)
+{
+       int ret = 0;
+
+       if (atomic_read(&open_ref))
+               ret = open_trace_register();
+       if (ret) {
+               pr_info("error registering open trace\n");
+               return ret;
+       }
+       return register_tracer(&open_trace);
+}
+device_initcall(init_open_trace);
+

^ permalink raw reply related	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 20:08 [PATCH] tracer for sys_open() - sreadahead Kok, Auke
@ 2009-01-27 20:51 ` Arnaldo Carvalho de Melo
  2009-01-27 21:14   ` Frederic Weisbecker
  2009-01-27 22:43 ` Frederic Weisbecker
  2009-01-30 20:22 ` Pavel Machek
  2 siblings, 1 reply; 38+ messages in thread
From: Arnaldo Carvalho de Melo @ 2009-01-27 20:51 UTC (permalink / raw)
  To: Kok, Auke
  Cc: Linux Kernel Mailing List, powertop ml, Arjan van de Ven,
	Ingo Molnar, srostedt, Frederic Weisbecker, Frank Ch. Eigler,
	Neil Horman

Em Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke escreveu:
> This tracer monitors regular file open() syscalls. This is a fast
> and low-overhead alternative to strace, and does not allow or
> require to be attached to every process.
> 
> The tracer only logs succesfull calls, as those are the only ones we
> are currently interested in, and we can determine the absolute path
> of these files as we log.
> 
> Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>

<SNIP>
 
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 4d3d381..24c17d2 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -30,6 +30,7 @@ enum trace_type {
>         TRACE_USER_STACK,
>         TRACE_HW_BRANCHES,
>         TRACE_POWER,
> +       TRACE_OPEN,

Why not a TRACE_VFS or TRACE_SYSCALL and then multiplex there open,
close, etc? trace_assign_type will get humongous in no time this way.

TRACE_BLK does multiplexing, as does TRACE_PROCESS that Frank just
posted too, and I'm working on a plugin for the patch that Neil
submitted for the socket layer that also will need an entry there for
its tracepoints.

Frederic, it seems that discussion about subtypes has to continue :-)

- Arnaldo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 20:51 ` Arnaldo Carvalho de Melo
@ 2009-01-27 21:14   ` Frederic Weisbecker
  2009-01-28 22:05     ` Kok, Auke
  0 siblings, 1 reply; 38+ messages in thread
From: Frederic Weisbecker @ 2009-01-27 21:14 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Kok, Auke, Linux Kernel Mailing List,
	powertop ml, Arjan van de Ven, Ingo Molnar, srostedt,
	Frank Ch. Eigler, Neil Horman

On Tue, Jan 27, 2009 at 06:51:37PM -0200, Arnaldo Carvalho de Melo wrote:
> Em Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke escreveu:
> > This tracer monitors regular file open() syscalls. This is a fast
> > and low-overhead alternative to strace, and does not allow or
> > require to be attached to every process.
> > 
> > The tracer only logs succesfull calls, as those are the only ones we
> > are currently interested in, and we can determine the absolute path
> > of these files as we log.
> > 
> > Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
> 
> <SNIP>
>  
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index 4d3d381..24c17d2 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -30,6 +30,7 @@ enum trace_type {
> >         TRACE_USER_STACK,
> >         TRACE_HW_BRANCHES,
> >         TRACE_POWER,
> > +       TRACE_OPEN,
> 
> Why not a TRACE_VFS or TRACE_SYSCALL and then multiplex there open,
> close, etc? trace_assign_type will get humongous in no time this way.


That's what I was about to answer too.
That would be sad to find one tracer for open, one for read, one for write...

 
> TRACE_BLK does multiplexing, as does TRACE_PROCESS that Frank just
> posted too, and I'm working on a plugin for the patch that Neil
> submitted for the socket layer that also will need an entry there for
> its tracepoints.
> 
> Frederic, it seems that discussion about subtypes has to continue :-)


Indeed, that becomes serious :-)


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 21:14   ` Frederic Weisbecker
@ 2009-01-28 22:05     ` Kok, Auke
  2009-01-29  0:45       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 38+ messages in thread
From: Kok, Auke @ 2009-01-28 22:05 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Arnaldo Carvalho de Melo, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, Ingo Molnar, srostedt@redhat.com,
	Frank Ch. Eigler, Neil Horman, Kok, Auke

Frederic Weisbecker wrote:
> On Tue, Jan 27, 2009 at 06:51:37PM -0200, Arnaldo Carvalho de Melo wrote:
>> Em Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke escreveu:
>>> This tracer monitors regular file open() syscalls. This is a fast
>>> and low-overhead alternative to strace, and does not allow or
>>> require to be attached to every process.
>>>
>>> The tracer only logs succesfull calls, as those are the only ones we
>>> are currently interested in, and we can determine the absolute path
>>> of these files as we log.
>>>
>>> Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
>> <SNIP>
>>  
>>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
>>> index 4d3d381..24c17d2 100644
>>> --- a/kernel/trace/trace.h
>>> +++ b/kernel/trace/trace.h
>>> @@ -30,6 +30,7 @@ enum trace_type {
>>>         TRACE_USER_STACK,
>>>         TRACE_HW_BRANCHES,
>>>         TRACE_POWER,
>>> +       TRACE_OPEN,
>> Why not a TRACE_VFS or TRACE_SYSCALL and then multiplex there open,
>> close, etc? trace_assign_type will get humongous in no time this way.
> 
> 
> That's what I was about to answer too.
> That would be sad to find one tracer for open, one for read, one for write...
> 
>  
>> TRACE_BLK does multiplexing, as does TRACE_PROCESS that Frank just
>> posted too, and I'm working on a plugin for the patch that Neil
>> submitted for the socket layer that also will need an entry there for
>> its tracepoints.
>>
>> Frederic, it seems that discussion about subtypes has to continue :-)
> 
> 
> Indeed, that becomes serious :-)
> 

I totally agree - this tracer was purely made quick-and-dirty to get sreadahead to
the next level. The in-kernel syscall trace facility is extremely potent and I
don't doubt that my patch may have been found slightly absurd by some :)

In all it was not productive to write a giant trace facility just to get
sreadahead one tracer type. Now that the code works and the benefit is proven, I'm
happy to see if we can work on making a decent long-term non-specialistic solution.

Auke


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-28 22:05     ` Kok, Auke
@ 2009-01-29  0:45       ` Arnaldo Carvalho de Melo
  2009-01-29 13:39         ` Frédéric Weisbecker
  0 siblings, 1 reply; 38+ messages in thread
From: Arnaldo Carvalho de Melo @ 2009-01-29  0:45 UTC (permalink / raw)
  To: Kok, Auke
  Cc: Frederic Weisbecker, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, Ingo Molnar, srostedt@redhat.com,
	Frank Ch. Eigler, Neil Horman, Kok, Auke

Em Wed, Jan 28, 2009 at 02:05:20PM -0800, Kok, Auke escreveu:
> Frederic Weisbecker wrote:
> > On Tue, Jan 27, 2009 at 06:51:37PM -0200, Arnaldo Carvalho de Melo wrote:
> >> Em Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke escreveu:
> >>> This tracer monitors regular file open() syscalls. This is a fast
> >>> and low-overhead alternative to strace, and does not allow or
> >>> require to be attached to every process.
> >>>
> >>> The tracer only logs succesfull calls, as those are the only ones we
> >>> are currently interested in, and we can determine the absolute path
> >>> of these files as we log.
> >>>
> >>> Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
> >> <SNIP>
> >>  
> >>> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> >>> index 4d3d381..24c17d2 100644
> >>> --- a/kernel/trace/trace.h
> >>> +++ b/kernel/trace/trace.h
> >>> @@ -30,6 +30,7 @@ enum trace_type {
> >>>         TRACE_USER_STACK,
> >>>         TRACE_HW_BRANCHES,
> >>>         TRACE_POWER,
> >>> +       TRACE_OPEN,
> >> Why not a TRACE_VFS or TRACE_SYSCALL and then multiplex there open,
> >> close, etc? trace_assign_type will get humongous in no time this way.
> > 
> > 
> > That's what I was about to answer too.
> > That would be sad to find one tracer for open, one for read, one for write...
> > 
> >  
> >> TRACE_BLK does multiplexing, as does TRACE_PROCESS that Frank just
> >> posted too, and I'm working on a plugin for the patch that Neil
> >> submitted for the socket layer that also will need an entry there for
> >> its tracepoints.
> >>
> >> Frederic, it seems that discussion about subtypes has to continue :-)
> > 
> > 
> > Indeed, that becomes serious :-)
> > 
> 
> I totally agree - this tracer was purely made quick-and-dirty to get sreadahead to
> the next level. The in-kernel syscall trace facility is extremely potent and I
> don't doubt that my patch may have been found slightly absurd by some :)

hey, hey, your patch just illustrated that we need a subtype facility.

I'm (as all the other people in some shape or form involved in pushing
some sort of common tracing infrastructure into the kernel, I guess)
excited about more developers using what is being put in place :-)
 
> In all it was not productive to write a giant trace facility just to get
> sreadahead one tracer type. Now that the code works and the benefit is proven, I'm
> happy to see if we can work on making a decent long-term non-specialistic solution.

That is the spirit!

- Arnaldo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29  0:45       ` Arnaldo Carvalho de Melo
@ 2009-01-29 13:39         ` Frédéric Weisbecker
  2009-01-29 13:40           ` Frédéric Weisbecker
  0 siblings, 1 reply; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-29 13:39 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Kok, Auke, Frederic Weisbecker,
	Linux Kernel Mailing List, powertop ml, Arjan van de Ven,
	Ingo Molnar, srostedt@redhat.com, Frank Ch. Eigler, Neil Horman,
	Kok, Auke

2009/1/29 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>:
> hey, hey, your patch just illustrated that we need a subtype facility.
>
> I'm (as all the other people in some shape or form involved in pushing
> some sort of common tracing infrastructure into the kernel, I guess)
> excited about more developers using what is being put in place :-)

Indeed :-)

About these subtypes. I've thought about what can be intuitive for
developers who
will use it.

I think the struct trace_event can be reused by just adding a subtype
event inside:

+ int (*find_subtype_func) (struct trace_iterator *);

 struct trace_event {
 	struct hlist_node	node;
 	int			type;
 	trace_print_func	trace;
 	trace_print_func	latency_trace;
 	trace_print_func	raw;
 	trace_print_func	hex;
 	trace_print_func	binary;
+      struct trace_event *subevent_array;
+      find_subtype_func find_subtype;
 };

And then I shall implement an event internal hlist for each global event.
All a tracer developer just have to do is:

_ implement a general trace_event for his global event type
_ implement each trace subevent as an array, one will have to define
its own set of values for the types, they will
  not conflict with those used by global event types since they are
isolated and depend on one global event type.
_ implement find_subtype callback. When a new trace has to be printed,
this callback is called to find the subtype
  value corresponding to a trace. If the tracer returns a positive
value, the matching subevent is searched on the subevent_array
  and the appropriate trace_print_func is called.
  Otherwise if find_subtype is NULL or returns -EINVAL, then the
appropriate trace_print_func of the current global event is called, as
  usual.

We can even make it recursive, letting one crazy tracer beeing able to
have subtypes of subtypes :-)

Hm? If you are ok, I shall start it.

>> In all it was not productive to write a giant trace facility just to get
>> sreadahead one tracer type. Now that the code works and the benefit is proven, I'm
>> happy to see if we can work on making a decent long-term non-specialistic solution.
>
> That is the spirit!
>
> - Arnaldo
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 13:39         ` Frédéric Weisbecker
@ 2009-01-29 13:40           ` Frédéric Weisbecker
  0 siblings, 0 replies; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-29 13:40 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo, Kok, Auke, Frederic Weisbecker,
	Linux Kernel Mailing List, powertop ml, Arjan van de Ven,
	Ingo Molnar, srostedt@redhat.com, Frank Ch. Eigler, Neil Horman,
	Kok, Auke

2009/1/29 Frédéric Weisbecker <fweisbec@gmail.com>:
> 2009/1/29 Arnaldo Carvalho de Melo <acme@ghostprotocols.net>:
>> hey, hey, your patch just illustrated that we need a subtype facility.
>>
>> I'm (as all the other people in some shape or form involved in pushing
>> some sort of common tracing infrastructure into the kernel, I guess)
>> excited about more developers using what is being put in place :-)
>
>
> Indeed :-)
>
> About these subtypes. I've thought about what can be intuitive for
> developers who
> will use it.
>
> I think the struct trace_event can be reused by just adding a subtype
> event inside:
>
> + int (*find_subtype_func) (struct trace_iterator *);


Sorry: forgot the typedef there...

>
>  struct trace_event {
>        struct hlist_node       node;
>        int                     type;
>        trace_print_func        trace;
>        trace_print_func        latency_trace;
>        trace_print_func        raw;
>        trace_print_func        hex;
>        trace_print_func        binary;
> +      struct trace_event *subevent_array;
> +      find_subtype_func find_subtype;
>  };
>
> And then I shall implement an event internal hlist for each global event.
> All a tracer developer just have to do is:
>
> _ implement a general trace_event for his global event type
> _ implement each trace subevent as an array, one will have to define
> its own set of values for the types, they will
>  not conflict with those used by global event types since they are
> isolated and depend on one global event type.
> _ implement find_subtype callback. When a new trace has to be printed,
> this callback is called to find the subtype
>  value corresponding to a trace. If the tracer returns a positive
> value, the matching subevent is searched on the subevent_array
>  and the appropriate trace_print_func is called.
>  Otherwise if find_subtype is NULL or returns -EINVAL, then the
> appropriate trace_print_func of the current global event is called, as
>  usual.
>
> We can even make it recursive, letting one crazy tracer beeing able to
> have subtypes of subtypes :-)
>
> Hm? If you are ok, I shall start it.
>
>
>>> In all it was not productive to write a giant trace facility just to get
>>> sreadahead one tracer type. Now that the code works and the benefit is proven, I'm
>>> happy to see if we can work on making a decent long-term non-specialistic solution.
>>
>> That is the spirit!
>>
>> - Arnaldo
>>
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 20:08 [PATCH] tracer for sys_open() - sreadahead Kok, Auke
  2009-01-27 20:51 ` Arnaldo Carvalho de Melo
@ 2009-01-27 22:43 ` Frederic Weisbecker
  2009-01-27 22:50   ` Frederic Weisbecker
                     ` (3 more replies)
  2009-01-30 20:22 ` Pavel Machek
  2 siblings, 4 replies; 38+ messages in thread
From: Frederic Weisbecker @ 2009-01-27 22:43 UTC (permalink / raw)
  To: Kok, Auke
  Cc: Linux Kernel Mailing List, powertop ml, Arjan van de Ven,
	Ingo Molnar, srostedt, Arnaldo Carvalho de Melo, Frank Ch. Eigler,
	Neil Horman

On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
> 
> This tracer monitors regular file open() syscalls. This is a fast
> and low-overhead alternative to strace, and does not allow or
> require to be attached to every process.
> 
> The tracer only logs succesfull calls, as those are the only ones we
> are currently interested in, and we can determine the absolute path
> of these files as we log.
> 
> Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>


Hi Auke,

Speaking about a global syscall tracer, I made a patch to trace only the syscalls
with the function-graph-tracer.

http://lkml.org/lkml/2008/12/30/267

Its approach and purpose is different than a tracer dedicated only to syscalls.
The function graph tracer traces execution graph of the functions and is more about
execution time spent and code flow whereas a syscall tracer can provide more specific
informations about syscalls.

So both are not overlaping.

But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers
a ptrace hook when set.
This low-level part can easily be used by all tracers that would like to inspect syscalls.

Just a change is needed: Steven requested that the part inside syscall_trace_enter become
a tracepoint, making it totally shareable between tracers and easy to turn on and off.

And perhaps the parts that set/clear the flag on all tasks can be shared too.

So we can start with this low-level syscall tracing facility. If you want, I can adapt
this low-level part and submit a patch this week or the next one to give you this base
infrastructure.


Once we have it, I think a syscall tracer can be fed with new syscalls events through
several patch iterations, starting with the open and close one :-)

Are you ok with that?

Steven, Ingo, do you agree?

 
> 
> diff --git a/fs/open.c b/fs/open.c
> index a3a78ce..8cf2a6b 100644
> --- a/fs/open.c
> +++ b/fs/open.c
> @@ -30,6 +30,10 @@
>  #include <linux/audit.h>
>  #include <linux/falloc.h>
> 
> +#include <trace/fs.h>
> +
> +DEFINE_TRACE(do_sys_open);
> +
>  int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
>  {
>         int retval = -ENODEV;
> @@ -1040,6 +1044,7 @@ long do_sys_open(int dfd, const char __user *filename, int
> flags, int mode)
>                                 fsnotify_open(f->f_path.dentry);
>                                 fd_install(fd, f);
>                         }
> +                       trace_do_sys_open(f, flags, mode, fd);
>                 }
>                 putname(tmp);
>         }
> diff --git a/include/trace/fs.h b/include/trace/fs.h
> new file mode 100644
> index 0000000..870eec2
> --- /dev/null
> +++ b/include/trace/fs.h
> @@ -0,0 +1,11 @@
> +#ifndef _TRACE_FS_H
> +#define _TRACE_FS_H
> +
> +#include <linux/fs.h>
> +#include <linux/tracepoint.h>
> +
> +DECLARE_TRACE(do_sys_open,
> +       TPPROTO(struct file *filp, int flags, int mode, long fd),
> +               TPARGS(filp, flags, mode, fd));
> +
> +#endif
> diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> index e2a4ff6..0400815 100644
> --- a/kernel/trace/Kconfig
> +++ b/kernel/trace/Kconfig
> @@ -149,6 +149,15 @@ config CONTEXT_SWITCH_TRACER
>           This tracer gets called from the context switch and records
>           all switching of tasks.
> 
> +config OPEN_CLOSE_TRACER
> +       bool "Trace open() calls"
> +       depends on DEBUG_KERNEL
> +       select TRACING
> +       select MARKERS
> +       help
> +         This tracer records open() syscalls. These calls are made when
> +         files are accessed on disk.
> +
>  config BOOT_TRACER
>         bool "Trace boot initcalls"
>         depends on DEBUG_KERNEL
> diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> index 349d5a9..25cec6c 100644
> --- a/kernel/trace/Makefile
> +++ b/kernel/trace/Makefile
> @@ -20,6 +20,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
> 
>  obj-$(CONFIG_TRACING) += trace.o
>  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
> +obj-$(CONFIG_OPEN_CLOSE_TRACER) += trace_open_close.o
>  obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
>  obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
>  obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
> diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> index 4d3d381..24c17d2 100644
> --- a/kernel/trace/trace.h
> +++ b/kernel/trace/trace.h
> @@ -30,6 +30,7 @@ enum trace_type {
>         TRACE_USER_STACK,
>         TRACE_HW_BRANCHES,
>         TRACE_POWER,
> +       TRACE_OPEN,
> 
>         __TRACE_LAST_TYPE
>  };
> diff --git a/kernel/trace/trace_open_close.c b/kernel/trace/trace_open_close.c
> new file mode 100644
> index 0000000..4250efc
> --- /dev/null
> +++ b/kernel/trace/trace_open_close.c
> @@ -0,0 +1,148 @@
> +/*
> + * trace open calls
> + * Copyright (C) 2009 Intel Corporation
> + *
> + * Based extensively on trace_sched_switch.c
> + * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
> + *
> + */
> +
> +#include <linux/module.h>
> +#include <linux/fs.h>
> +#include <linux/debugfs.h>
> +#include <linux/kallsyms.h>
> +#include <linux/uaccess.h>
> +#include <linux/ftrace.h>
> +#include <trace/fs.h>
> +
> +#include "trace.h"
> +
> +
> +static struct trace_array      *ctx_trace;
> +static int __read_mostly       open_trace_enabled;
> +static atomic_t                        open_ref;
> +
> +static void probe_do_sys_open(struct file *filp, int flags, int mode, long fd)
> +{
> +       char *buf;
> +       char *fname;
> +
> +       if (!atomic_read(&open_ref))
> +               return;
> +
> +       if (!open_trace_enabled)
> +               return;
> +
> +       buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
> +       if (!buf)
> +               return;
> +       fname = d_path(&filp->f_path, buf, PAGE_SIZE);
> +
> +       if (IS_ERR(fname))
> +               goto out;
> +
> +       ftrace_printk("%s: open(\"%s\", %d, %d) = %ld\n",
> +               current->comm, fname, flags, mode, fd);
> +out:
> +       kfree(buf);
> +}
> +
> +static void open_trace_reset(struct trace_array *tr)
> +{
> +       tr->time_start = ftrace_now(tr->cpu);
> +       tracing_reset_online_cpus(tr);
> +}
> +
> +static int open_trace_register(void)
> +{
> +       int ret;
> +
> +       ret = register_trace_do_sys_open(probe_do_sys_open);
> +       if (ret) {
> +               pr_info("open trace: Could not activate tracepoint"
> +                       " probe to do_open\n");
> +       }
> +
> +       return ret;
> +}
> +
> +static void open_trace_unregister(void)
> +{
> +       unregister_trace_do_sys_open(probe_do_sys_open);
> +}
> +
> +static void open_trace_start(void)
> +{
> +       long ref;
> +
> +       ref = atomic_inc_return(&open_ref);
> +       if (ref == 1)
> +               open_trace_register();
> +}
> +
> +static void open_trace_stop(void)
> +{
> +       long ref;
> +
> +       ref = atomic_dec_and_test(&open_ref);
> +       if (ref)
> +               open_trace_unregister();
> +}
> +
> +void open_trace_start_cmdline_record(void)
> +{
> +       open_trace_start();
> +}
> +
> +void open_trace_stop_cmdline_record(void)
> +{
> +       open_trace_stop();
> +}
> +
> +static void open_start_trace(struct trace_array *tr)
> +{
> +       open_trace_reset(tr);
> +       open_trace_start_cmdline_record();
> +       open_trace_enabled = 1;
> +}
> +
> +static void open_stop_trace(struct trace_array *tr)
> +{
> +       open_trace_enabled = 0;
> +       open_trace_stop_cmdline_record();
> +}
> +
> +static int open_trace_init(struct trace_array *tr)
> +{
> +       ctx_trace = tr;
> +
> +       open_start_trace(tr);
> +       return 0;
> +}
> +
> +static void reset_open_trace(struct trace_array *tr)
> +{
> +       open_stop_trace(tr);
> +}
> +
> +static struct tracer open_trace __read_mostly =
> +{
> +       .name           = "open",
> +       .init           = open_trace_init,
> +       .reset          = reset_open_trace,
> +};
> +
> +__init static int init_open_trace(void)
> +{
> +       int ret = 0;
> +
> +       if (atomic_read(&open_ref))
> +               ret = open_trace_register();
> +       if (ret) {
> +               pr_info("error registering open trace\n");
> +               return ret;
> +       }
> +       return register_tracer(&open_trace);
> +}
> +device_initcall(init_open_trace);
> +
> --
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 22:43 ` Frederic Weisbecker
@ 2009-01-27 22:50   ` Frederic Weisbecker
  2009-01-29 14:04     ` Ingo Molnar
  2009-01-28  0:43   ` Frank Ch. Eigler
                     ` (2 subsequent siblings)
  3 siblings, 1 reply; 38+ messages in thread
From: Frederic Weisbecker @ 2009-01-27 22:50 UTC (permalink / raw)
  To: Kok, Auke
  Cc: Linux Kernel Mailing List, powertop ml, Arjan van de Ven,
	Ingo Molnar, srostedt, Arnaldo Carvalho de Melo, Frank Ch. Eigler,
	Neil Horman

On Tue, Jan 27, 2009 at 11:43:03PM +0100, Frederic Weisbecker wrote:
> On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
> > 
> > This tracer monitors regular file open() syscalls. This is a fast
> > and low-overhead alternative to strace, and does not allow or
> > require to be attached to every process.
> > 
> > The tracer only logs succesfull calls, as those are the only ones we
> > are currently interested in, and we can determine the absolute path
> > of these files as we log.
> > 
> > Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
> 
> 
> Hi Auke,
> 
> Speaking about a global syscall tracer, I made a patch to trace only the syscalls
> with the function-graph-tracer.
> 
> http://lkml.org/lkml/2008/12/30/267
> 
> Its approach and purpose is different than a tracer dedicated only to syscalls.
> The function graph tracer traces execution graph of the functions and is more about
> execution time spent and code flow whereas a syscall tracer can provide more specific
> informations about syscalls.
> 
> So both are not overlaping.
> 
> But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers

s/_TIF_SYSCALL_TRACE/_TIF_SYSCALL_FTRACE

_TIF_SYSCALL_TRACE is the one used by ptrace.


> a ptrace hook when set.
> This low-level part can easily be used by all tracers that would like to inspect syscalls.
> 
> Just a change is needed: Steven requested that the part inside syscall_trace_enter become
> a tracepoint, making it totally shareable between tracers and easy to turn on and off.
> 
> And perhaps the parts that set/clear the flag on all tasks can be shared too.
> 
> So we can start with this low-level syscall tracing facility. If you want, I can adapt
> this low-level part and submit a patch this week or the next one to give you this base
> infrastructure.
> 
> 
> Once we have it, I think a syscall tracer can be fed with new syscalls events through
> several patch iterations, starting with the open and close one :-)
> 
> Are you ok with that?
> 
> Steven, Ingo, do you agree?
> 
>  
> > 
> > diff --git a/fs/open.c b/fs/open.c
> > index a3a78ce..8cf2a6b 100644
> > --- a/fs/open.c
> > +++ b/fs/open.c
> > @@ -30,6 +30,10 @@
> >  #include <linux/audit.h>
> >  #include <linux/falloc.h>
> > 
> > +#include <trace/fs.h>
> > +
> > +DEFINE_TRACE(do_sys_open);
> > +
> >  int vfs_statfs(struct dentry *dentry, struct kstatfs *buf)
> >  {
> >         int retval = -ENODEV;
> > @@ -1040,6 +1044,7 @@ long do_sys_open(int dfd, const char __user *filename, int
> > flags, int mode)
> >                                 fsnotify_open(f->f_path.dentry);
> >                                 fd_install(fd, f);
> >                         }
> > +                       trace_do_sys_open(f, flags, mode, fd);
> >                 }
> >                 putname(tmp);
> >         }
> > diff --git a/include/trace/fs.h b/include/trace/fs.h
> > new file mode 100644
> > index 0000000..870eec2
> > --- /dev/null
> > +++ b/include/trace/fs.h
> > @@ -0,0 +1,11 @@
> > +#ifndef _TRACE_FS_H
> > +#define _TRACE_FS_H
> > +
> > +#include <linux/fs.h>
> > +#include <linux/tracepoint.h>
> > +
> > +DECLARE_TRACE(do_sys_open,
> > +       TPPROTO(struct file *filp, int flags, int mode, long fd),
> > +               TPARGS(filp, flags, mode, fd));
> > +
> > +#endif
> > diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
> > index e2a4ff6..0400815 100644
> > --- a/kernel/trace/Kconfig
> > +++ b/kernel/trace/Kconfig
> > @@ -149,6 +149,15 @@ config CONTEXT_SWITCH_TRACER
> >           This tracer gets called from the context switch and records
> >           all switching of tasks.
> > 
> > +config OPEN_CLOSE_TRACER
> > +       bool "Trace open() calls"
> > +       depends on DEBUG_KERNEL
> > +       select TRACING
> > +       select MARKERS
> > +       help
> > +         This tracer records open() syscalls. These calls are made when
> > +         files are accessed on disk.
> > +
> >  config BOOT_TRACER
> >         bool "Trace boot initcalls"
> >         depends on DEBUG_KERNEL
> > diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
> > index 349d5a9..25cec6c 100644
> > --- a/kernel/trace/Makefile
> > +++ b/kernel/trace/Makefile
> > @@ -20,6 +20,7 @@ obj-$(CONFIG_RING_BUFFER) += ring_buffer.o
> > 
> >  obj-$(CONFIG_TRACING) += trace.o
> >  obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
> > +obj-$(CONFIG_OPEN_CLOSE_TRACER) += trace_open_close.o
> >  obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
> >  obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
> >  obj-$(CONFIG_IRQSOFF_TRACER) += trace_irqsoff.o
> > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
> > index 4d3d381..24c17d2 100644
> > --- a/kernel/trace/trace.h
> > +++ b/kernel/trace/trace.h
> > @@ -30,6 +30,7 @@ enum trace_type {
> >         TRACE_USER_STACK,
> >         TRACE_HW_BRANCHES,
> >         TRACE_POWER,
> > +       TRACE_OPEN,
> > 
> >         __TRACE_LAST_TYPE
> >  };
> > diff --git a/kernel/trace/trace_open_close.c b/kernel/trace/trace_open_close.c
> > new file mode 100644
> > index 0000000..4250efc
> > --- /dev/null
> > +++ b/kernel/trace/trace_open_close.c
> > @@ -0,0 +1,148 @@
> > +/*
> > + * trace open calls
> > + * Copyright (C) 2009 Intel Corporation
> > + *
> > + * Based extensively on trace_sched_switch.c
> > + * Copyright (C) 2007 Steven Rostedt <srostedt@redhat.com>
> > + *
> > + */
> > +
> > +#include <linux/module.h>
> > +#include <linux/fs.h>
> > +#include <linux/debugfs.h>
> > +#include <linux/kallsyms.h>
> > +#include <linux/uaccess.h>
> > +#include <linux/ftrace.h>
> > +#include <trace/fs.h>
> > +
> > +#include "trace.h"
> > +
> > +
> > +static struct trace_array      *ctx_trace;
> > +static int __read_mostly       open_trace_enabled;
> > +static atomic_t                        open_ref;
> > +
> > +static void probe_do_sys_open(struct file *filp, int flags, int mode, long fd)
> > +{
> > +       char *buf;
> > +       char *fname;
> > +
> > +       if (!atomic_read(&open_ref))
> > +               return;
> > +
> > +       if (!open_trace_enabled)
> > +               return;
> > +
> > +       buf = kzalloc(PAGE_SIZE, GFP_KERNEL);
> > +       if (!buf)
> > +               return;
> > +       fname = d_path(&filp->f_path, buf, PAGE_SIZE);
> > +
> > +       if (IS_ERR(fname))
> > +               goto out;
> > +
> > +       ftrace_printk("%s: open(\"%s\", %d, %d) = %ld\n",
> > +               current->comm, fname, flags, mode, fd);
> > +out:
> > +       kfree(buf);
> > +}
> > +
> > +static void open_trace_reset(struct trace_array *tr)
> > +{
> > +       tr->time_start = ftrace_now(tr->cpu);
> > +       tracing_reset_online_cpus(tr);
> > +}
> > +
> > +static int open_trace_register(void)
> > +{
> > +       int ret;
> > +
> > +       ret = register_trace_do_sys_open(probe_do_sys_open);
> > +       if (ret) {
> > +               pr_info("open trace: Could not activate tracepoint"
> > +                       " probe to do_open\n");
> > +       }
> > +
> > +       return ret;
> > +}
> > +
> > +static void open_trace_unregister(void)
> > +{
> > +       unregister_trace_do_sys_open(probe_do_sys_open);
> > +}
> > +
> > +static void open_trace_start(void)
> > +{
> > +       long ref;
> > +
> > +       ref = atomic_inc_return(&open_ref);
> > +       if (ref == 1)
> > +               open_trace_register();
> > +}
> > +
> > +static void open_trace_stop(void)
> > +{
> > +       long ref;
> > +
> > +       ref = atomic_dec_and_test(&open_ref);
> > +       if (ref)
> > +               open_trace_unregister();
> > +}
> > +
> > +void open_trace_start_cmdline_record(void)
> > +{
> > +       open_trace_start();
> > +}
> > +
> > +void open_trace_stop_cmdline_record(void)
> > +{
> > +       open_trace_stop();
> > +}
> > +
> > +static void open_start_trace(struct trace_array *tr)
> > +{
> > +       open_trace_reset(tr);
> > +       open_trace_start_cmdline_record();
> > +       open_trace_enabled = 1;
> > +}
> > +
> > +static void open_stop_trace(struct trace_array *tr)
> > +{
> > +       open_trace_enabled = 0;
> > +       open_trace_stop_cmdline_record();
> > +}
> > +
> > +static int open_trace_init(struct trace_array *tr)
> > +{
> > +       ctx_trace = tr;
> > +
> > +       open_start_trace(tr);
> > +       return 0;
> > +}
> > +
> > +static void reset_open_trace(struct trace_array *tr)
> > +{
> > +       open_stop_trace(tr);
> > +}
> > +
> > +static struct tracer open_trace __read_mostly =
> > +{
> > +       .name           = "open",
> > +       .init           = open_trace_init,
> > +       .reset          = reset_open_trace,
> > +};
> > +
> > +__init static int init_open_trace(void)
> > +{
> > +       int ret = 0;
> > +
> > +       if (atomic_read(&open_ref))
> > +               ret = open_trace_register();
> > +       if (ret) {
> > +               pr_info("error registering open trace\n");
> > +               return ret;
> > +       }
> > +       return register_tracer(&open_trace);
> > +}
> > +device_initcall(init_open_trace);
> > +
> > --
> > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> > the body of a message to majordomo@vger.kernel.org
> > More majordomo info at  http://vger.kernel.org/majordomo-info.html
> > Please read the FAQ at  http://www.tux.org/lkml/


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 22:50   ` Frederic Weisbecker
@ 2009-01-29 14:04     ` Ingo Molnar
  2009-01-29 14:29       ` Frédéric Weisbecker
  0 siblings, 1 reply; 38+ messages in thread
From: Ingo Molnar @ 2009-01-29 14:04 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman


* Frederic Weisbecker <fweisbec@gmail.com> wrote:

> On Tue, Jan 27, 2009 at 11:43:03PM +0100, Frederic Weisbecker wrote:
> > On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
> > > 
> > > This tracer monitors regular file open() syscalls. This is a fast
> > > and low-overhead alternative to strace, and does not allow or
> > > require to be attached to every process.
> > > 
> > > The tracer only logs succesfull calls, as those are the only ones we
> > > are currently interested in, and we can determine the absolute path
> > > of these files as we log.
> > > 
> > > Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
> > 
> > 
> > Hi Auke,
> > 
> > Speaking about a global syscall tracer, I made a patch to trace only the syscalls
> > with the function-graph-tracer.
> > 
> > http://lkml.org/lkml/2008/12/30/267
> > 
> > Its approach and purpose is different than a tracer dedicated only to syscalls.
> > The function graph tracer traces execution graph of the functions and is more about
> > execution time spent and code flow whereas a syscall tracer can provide more specific
> > informations about syscalls.
> > 
> > So both are not overlaping.
> > 
> > But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers
> 
> s/_TIF_SYSCALL_TRACE/_TIF_SYSCALL_FTRACE

> > Once we have it, I think a syscall tracer can be fed with new syscalls 
> > events through several patch iterations, starting with the open and 
> > close one :-)
> > 
> > Are you ok with that?
> > 
> > Steven, Ingo, do you agree?

yes. We definitely need this on the asm syscall level, to not contaminate 
hundreds of syscalls with tracepoints.

Auke's sys_open() plugin would be a nice prototype for that concept - but 
in generally it would be useful to be able to augment kernel tracer output 
with all syscall events that occur.

The output would be something like a slimmed-down strace, but for the 
whole kernel and not tied to ptrace semantics (which are crippling).

Would you be interested in extending your syscall tracing concept with 
those bits and would you be interested in integrating Auke's plugin into 
that

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 14:04     ` Ingo Molnar
@ 2009-01-29 14:29       ` Frédéric Weisbecker
  2009-01-29 14:31         ` Ingo Molnar
  0 siblings, 1 reply; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-29 14:29 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Ananth N Mavinakayanahalli,
	utrace-devel, Roland McGrath

2009/1/29 Ingo Molnar <mingo@elte.hu>:
>
> * Frederic Weisbecker <fweisbec@gmail.com> wrote:
>
>> On Tue, Jan 27, 2009 at 11:43:03PM +0100, Frederic Weisbecker wrote:
>> > On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
>> > >
>> > > This tracer monitors regular file open() syscalls. This is a fast
>> > > and low-overhead alternative to strace, and does not allow or
>> > > require to be attached to every process.
>> > >
>> > > The tracer only logs succesfull calls, as those are the only ones we
>> > > are currently interested in, and we can determine the absolute path
>> > > of these files as we log.
>> > >
>> > > Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
>> >
>> >
>> > Hi Auke,
>> >
>> > Speaking about a global syscall tracer, I made a patch to trace only the syscalls
>> > with the function-graph-tracer.
>> >
>> > http://lkml.org/lkml/2008/12/30/267
>> >
>> > Its approach and purpose is different than a tracer dedicated only to syscalls.
>> > The function graph tracer traces execution graph of the functions and is more about
>> > execution time spent and code flow whereas a syscall tracer can provide more specific
>> > informations about syscalls.
>> >
>> > So both are not overlaping.
>> >
>> > But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers
>>
>> s/_TIF_SYSCALL_TRACE/_TIF_SYSCALL_FTRACE
>
>> > Once we have it, I think a syscall tracer can be fed with new syscalls
>> > events through several patch iterations, starting with the open and
>> > close one :-)
>> >
>> > Are you ok with that?
>> >
>> > Steven, Ingo, do you agree?
>
> yes. We definitely need this on the asm syscall level, to not contaminate
> hundreds of syscalls with tracepoints.
>
> Auke's sys_open() plugin would be a nice prototype for that concept - but
> in generally it would be useful to be able to augment kernel tracer output
> with all syscall events that occur.
>
> The output would be something like a slimmed-down strace, but for the
> whole kernel and not tied to ptrace semantics (which are crippling).
>
> Would you be interested in extending your syscall tracing concept with
> those bits and would you be interested in integrating Auke's plugin into
> that
>
>        Ingo


Several people talked me about utrace and gave some examples about it
in this discussion.
The Api is very convenient to fetch syscall numbers, arguments and
return values.
And the hooks are done in the generic core code, so it is arch independent.

The only drawback I can see is that it is not yet merged upstream, in
need of in-kernel users.
If it only depends on this condition, we could be these users...

What do you think?

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 14:29       ` Frédéric Weisbecker
@ 2009-01-29 14:31         ` Ingo Molnar
  2009-01-29 14:40           ` Frédéric Weisbecker
  0 siblings, 1 reply; 38+ messages in thread
From: Ingo Molnar @ 2009-01-29 14:31 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Ananth N Mavinakayanahalli,
	utrace-devel, Roland McGrath


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> 2009/1/29 Ingo Molnar <mingo@elte.hu>:
> >
> > * Frederic Weisbecker <fweisbec@gmail.com> wrote:
> >
> >> On Tue, Jan 27, 2009 at 11:43:03PM +0100, Frederic Weisbecker wrote:
> >> > On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
> >> > >
> >> > > This tracer monitors regular file open() syscalls. This is a fast
> >> > > and low-overhead alternative to strace, and does not allow or
> >> > > require to be attached to every process.
> >> > >
> >> > > The tracer only logs succesfull calls, as those are the only ones we
> >> > > are currently interested in, and we can determine the absolute path
> >> > > of these files as we log.
> >> > >
> >> > > Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
> >> >
> >> >
> >> > Hi Auke,
> >> >
> >> > Speaking about a global syscall tracer, I made a patch to trace only the syscalls
> >> > with the function-graph-tracer.
> >> >
> >> > http://lkml.org/lkml/2008/12/30/267
> >> >
> >> > Its approach and purpose is different than a tracer dedicated only to syscalls.
> >> > The function graph tracer traces execution graph of the functions and is more about
> >> > execution time spent and code flow whereas a syscall tracer can provide more specific
> >> > informations about syscalls.
> >> >
> >> > So both are not overlaping.
> >> >
> >> > But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers
> >>
> >> s/_TIF_SYSCALL_TRACE/_TIF_SYSCALL_FTRACE
> >
> >> > Once we have it, I think a syscall tracer can be fed with new syscalls
> >> > events through several patch iterations, starting with the open and
> >> > close one :-)
> >> >
> >> > Are you ok with that?
> >> >
> >> > Steven, Ingo, do you agree?
> >
> > yes. We definitely need this on the asm syscall level, to not contaminate
> > hundreds of syscalls with tracepoints.
> >
> > Auke's sys_open() plugin would be a nice prototype for that concept - but
> > in generally it would be useful to be able to augment kernel tracer output
> > with all syscall events that occur.
> >
> > The output would be something like a slimmed-down strace, but for the
> > whole kernel and not tied to ptrace semantics (which are crippling).
> >
> > Would you be interested in extending your syscall tracing concept with
> > those bits and would you be interested in integrating Auke's plugin into
> > that
> >
> >        Ingo
> 
> 
> Several people talked me about utrace and gave some examples about it in 
> this discussion. The Api is very convenient to fetch syscall numbers, 
> arguments and return values. And the hooks are done in the generic core 
> code, so it is arch independent.
> 
> The only drawback I can see is that it is not yet merged upstream, in 
> need of in-kernel users. If it only depends on this condition, we could 
> be these users...
> 
> What do you think?

sure - how do the minimal bits/callbacks look like which enable syscall 
tracing?

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 14:31         ` Ingo Molnar
@ 2009-01-29 14:40           ` Frédéric Weisbecker
  2009-01-29 14:48             ` Frédéric Weisbecker
  2009-01-29 15:09             ` Ingo Molnar
  0 siblings, 2 replies; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-29 14:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Ananth N Mavinakayanahalli,
	utrace-devel, Roland McGrath

2009/1/29 Ingo Molnar <mingo@elte.hu>:
>>
>> Several people talked me about utrace and gave some examples about it in
>> this discussion. The Api is very convenient to fetch syscall numbers,
>> arguments and return values. And the hooks are done in the generic core
>> code, so it is arch independent.
>>
>> The only drawback I can see is that it is not yet merged upstream, in
>> need of in-kernel users. If it only depends on this condition, we could
>> be these users...
>>
>> What do you think?
>
> sure - how do the minimal bits/callbacks look like which enable syscall
> tracing?
>
>        Ingo


There is a very straightforward example provided by Ananth in there:
http://lkml.org/lkml/2009/1/28/59

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 14:40           ` Frédéric Weisbecker
@ 2009-01-29 14:48             ` Frédéric Weisbecker
  2009-01-29 15:09             ` Ingo Molnar
  1 sibling, 0 replies; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-29 14:48 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Ananth N Mavinakayanahalli,
	utrace-devel, Roland McGrath

2009/1/29 Frédéric Weisbecker <fweisbec@gmail.com>:
> 2009/1/29 Ingo Molnar <mingo@elte.hu>:
>>>
>>> Several people talked me about utrace and gave some examples about it in
>>> this discussion. The Api is very convenient to fetch syscall numbers,
>>> arguments and return values. And the hooks are done in the generic core
>>> code, so it is arch independent.
>>>
>>> The only drawback I can see is that it is not yet merged upstream, in
>>> need of in-kernel users. If it only depends on this condition, we could
>>> be these users...
>>>
>>> What do you think?
>>
>> sure - how do the minimal bits/callbacks look like which enable syscall
>> tracing?
>>
>>        Ingo
>
>
> There is a very straightforward example provided by Ananth in there:
> http://lkml.org/lkml/2009/1/28/59
>

One other drawback may be the fact that utrace will be traced by the
function tracers... adding some junk on their traces.
But I guess this is just a matter of some patches to make it not traced.

BTW, there is an interesting proof of concept there:
http://lkml.org/lkml/2009/1/27/294

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 14:40           ` Frédéric Weisbecker
  2009-01-29 14:48             ` Frédéric Weisbecker
@ 2009-01-29 15:09             ` Ingo Molnar
  2009-01-29 15:17               ` Frédéric Weisbecker
  2009-01-29 15:34               ` Frédéric Weisbecker
  1 sibling, 2 replies; 38+ messages in thread
From: Ingo Molnar @ 2009-01-29 15:09 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Ananth N Mavinakayanahalli,
	utrace-devel, Roland McGrath


* Frédéric Weisbecker <fweisbec@gmail.com> wrote:

> 2009/1/29 Ingo Molnar <mingo@elte.hu>:
> >>
> >> Several people talked me about utrace and gave some examples about it in
> >> this discussion. The Api is very convenient to fetch syscall numbers,
> >> arguments and return values. And the hooks are done in the generic core
> >> code, so it is arch independent.
> >>
> >> The only drawback I can see is that it is not yet merged upstream, in
> >> need of in-kernel users. If it only depends on this condition, we could
> >> be these users...
> >>
> >> What do you think?
> >
> > sure - how do the minimal bits/callbacks look like which enable syscall
> > tracing?
> >
> >        Ingo
> 
> 
> There is a very straightforward example provided by Ananth in there:
> http://lkml.org/lkml/2009/1/28/59

I mean, how does the infrastructure patch look like - what code does this 
add to the kernel - just to get the syscall tracing bits. Lets get some 
progress here - it's clear that tracing syscalls is good, we just need to 
do it and look at actual patches.

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 15:09             ` Ingo Molnar
@ 2009-01-29 15:17               ` Frédéric Weisbecker
  2009-01-29 15:34               ` Frédéric Weisbecker
  1 sibling, 0 replies; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-29 15:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Ananth N Mavinakayanahalli,
	utrace-devel, Roland McGrath

2009/1/29 Ingo Molnar <mingo@elte.hu>:
>
> * Frédéric Weisbecker <fweisbec@gmail.com> wrote:
>
>> 2009/1/29 Ingo Molnar <mingo@elte.hu>:
>> >>
>> >> Several people talked me about utrace and gave some examples about it in
>> >> this discussion. The Api is very convenient to fetch syscall numbers,
>> >> arguments and return values. And the hooks are done in the generic core
>> >> code, so it is arch independent.
>> >>
>> >> The only drawback I can see is that it is not yet merged upstream, in
>> >> need of in-kernel users. If it only depends on this condition, we could
>> >> be these users...
>> >>
>> >> What do you think?
>> >
>> > sure - how do the minimal bits/callbacks look like which enable syscall
>> > tracing?
>> >
>> >        Ingo
>>
>>
>> There is a very straightforward example provided by Ananth in there:
>> http://lkml.org/lkml/2009/1/28/59
>
> I mean, how does the infrastructure patch look like - what code does this
> add to the kernel - just to get the syscall tracing bits. Lets get some
> progress here - it's clear that tracing syscalls is good, we just need to
> do it and look at actual patches.
>
>        Ingo
>

The latest snapshot version I've found is here:
http://people.redhat.com/roland/utrace/2.6-current/utrace.patch
This is mostly independent core code and a good number of hooks inside ptrace.

But I don't know much about the overhead it potentially brings on ptrace.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 15:09             ` Ingo Molnar
  2009-01-29 15:17               ` Frédéric Weisbecker
@ 2009-01-29 15:34               ` Frédéric Weisbecker
  2009-01-29 15:53                 ` Frank Ch. Eigler
  1 sibling, 1 reply; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-29 15:34 UTC (permalink / raw)
  To: Ingo Molnar, Roland McGrath, utrace-devel
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Ananth N Mavinakayanahalli

2009/1/29 Ingo Molnar <mingo@elte.hu>:
>
> * Frédéric Weisbecker <fweisbec@gmail.com> wrote:
>
>> 2009/1/29 Ingo Molnar <mingo@elte.hu>:
>> >>
>> >> Several people talked me about utrace and gave some examples about it in
>> >> this discussion. The Api is very convenient to fetch syscall numbers,
>> >> arguments and return values. And the hooks are done in the generic core
>> >> code, so it is arch independent.
>> >>
>> >> The only drawback I can see is that it is not yet merged upstream, in
>> >> need of in-kernel users. If it only depends on this condition, we could
>> >> be these users...
>> >>
>> >> What do you think?
>> >
>> > sure - how do the minimal bits/callbacks look like which enable syscall
>> > tracing?


I know you are talking about the only necessary bits from utrace to
have the syscalls tracing.
But I can't answer you better than would the utrace people.

And actually I'm not sure the utrace bits for syscall tracing can be
isolated from the rest of its
core.

Anyway, I will let the utrace guy answer to it :-)


>> There is a very straightforward example provided by Ananth in there:
>> http://lkml.org/lkml/2009/1/28/59
>
> I mean, how does the infrastructure patch look like - what code does this
> add to the kernel - just to get the syscall tracing bits. Lets get some
> progress here - it's clear that tracing syscalls is good, we just need to
> do it and look at actual patches.
>
>        Ingo
>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-29 15:34               ` Frédéric Weisbecker
@ 2009-01-29 15:53                 ` Frank Ch. Eigler
  0 siblings, 0 replies; 38+ messages in thread
From: Frank Ch. Eigler @ 2009-01-29 15:53 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Ingo Molnar, Roland McGrath, utrace-devel, Kok, Auke,
	Linux Kernel Mailing List, powertop ml, Arjan van de Ven,
	srostedt, Arnaldo Carvalho de Melo, Neil Horman,
	Ananth N Mavinakayanahalli

Hi -

On Thu, Jan 29, 2009 at 04:34:46PM +0100, Frédéric Weisbecker wrote:
> 2009/1/29 Ingo Molnar <mingo@elte.hu>:
> [...]
> >> > sure - how do the minimal bits/callbacks look like which enable syscall
> >> > tracing?

> I know you are talking about the only necessary bits from utrace to
> have the syscalls tracing.  But I can't answer you better than would
> the utrace people.  And actually I'm not sure the utrace bits for
> syscall tracing can be isolated from the rest of its core.

My understanding is that the parts of utrace that remain out-of-tree
are relatively integrated, and just present the programmatic callback
API to the already merged "tracehook" layer.

- FChE

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 22:43 ` Frederic Weisbecker
  2009-01-27 22:50   ` Frederic Weisbecker
@ 2009-01-28  0:43   ` Frank Ch. Eigler
  2009-01-28 13:58     ` Frédéric Weisbecker
  2009-01-28  9:38   ` Ananth N Mavinakayanahalli
  2009-01-28 22:19   ` Kok, Auke
  3 siblings, 1 reply; 38+ messages in thread
From: Frank Ch. Eigler @ 2009-01-28  0:43 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, Ingo Molnar, srostedt, Arnaldo Carvalho de Melo,
	Neil Horman, utrace-devel

Frederic Weisbecker <fweisbec@gmail.com> writes:

> [...]
> Speaking about a global syscall tracer, I made a patch to trace only the syscalls
> with the function-graph-tracer.
> http://lkml.org/lkml/2008/12/30/267 This low-level part can easily
> be used by all tracers that would like to inspect syscalls.
> [...]
> Just a change is needed: Steven requested that the part inside
> syscall_trace_enter become a tracepoint, making it totally shareable
> between tracers and easy to turn on and off.

Alternately, you could just rely on utrace's hooks.  They were thought
out more fully with respect to parameter access, manipulation, and
programmatic control befitting even a debugger.


- FChE

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-28  0:43   ` Frank Ch. Eigler
@ 2009-01-28 13:58     ` Frédéric Weisbecker
  2009-01-28 14:29       ` Arnaldo Carvalho de Melo
  0 siblings, 1 reply; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-28 13:58 UTC (permalink / raw)
  To: Frank Ch. Eigler
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, Ingo Molnar, srostedt, Arnaldo Carvalho de Melo,
	Neil Horman, utrace-devel

2009/1/28 Frank Ch. Eigler <fche@redhat.com>:
> Frederic Weisbecker <fweisbec@gmail.com> writes:
>
>> [...]
>> Speaking about a global syscall tracer, I made a patch to trace only the syscalls
>> with the function-graph-tracer.
>> http://lkml.org/lkml/2008/12/30/267 This low-level part can easily
>> be used by all tracers that would like to inspect syscalls.
>> [...]
>> Just a change is needed: Steven requested that the part inside
>> syscall_trace_enter become a tracepoint, making it totally shareable
>> between tracers and easy to turn on and off.
>
> Alternately, you could just rely on utrace's hooks.  They were thought
> out more fully with respect to parameter access, manipulation, and
> programmatic control befitting even a debugger.
>
>
> - FChE
>

I don't know much it. But I will soon have some time to look at your
patch which uses ftrace from utrace.
Anyway, are there some plans about utrace to be merged? Unless I
couldn't be able to use
it...

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-28 13:58     ` Frédéric Weisbecker
@ 2009-01-28 14:29       ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 38+ messages in thread
From: Arnaldo Carvalho de Melo @ 2009-01-28 14:29 UTC (permalink / raw)
  To: Frédéric Weisbecker
  Cc: Frank Ch. Eigler, Kok, Auke, Linux Kernel Mailing List,
	powertop ml, Arjan van de Ven, Ingo Molnar, srostedt, Neil Horman,
	utrace-devel

Em Wed, Jan 28, 2009 at 02:58:28PM +0100, Frédéric Weisbecker escreveu:
> 2009/1/28 Frank Ch. Eigler <fche@redhat.com>:
> > Frederic Weisbecker <fweisbec@gmail.com> writes:
> >
> >> [...]
> >> Speaking about a global syscall tracer, I made a patch to trace only the syscalls
> >> with the function-graph-tracer.
> >> http://lkml.org/lkml/2008/12/30/267 This low-level part can easily
> >> be used by all tracers that would like to inspect syscalls.
> >> [...]
> >> Just a change is needed: Steven requested that the part inside
> >> syscall_trace_enter become a tracepoint, making it totally shareable
> >> between tracers and easy to turn on and off.
> >
> > Alternately, you could just rely on utrace's hooks.  They were thought
> > out more fully with respect to parameter access, manipulation, and
> > programmatic control befitting even a debugger.
> >
> >
> > - FChE
> >
> 
> I don't know much it. But I will soon have some time to look at your
> patch which uses ftrace from utrace.
> Anyway, are there some plans about utrace to be merged? Unless I
> couldn't be able to use
> it...

Well, one of the reasons for utrace not to be merged, IIRC, was that
there would be no users in-kernel. With Frank's ftrace plugin that is
not true anymore.

- Arnaldo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 22:43 ` Frederic Weisbecker
  2009-01-27 22:50   ` Frederic Weisbecker
  2009-01-28  0:43   ` Frank Ch. Eigler
@ 2009-01-28  9:38   ` Ananth N Mavinakayanahalli
  2009-01-28 14:21     ` Frédéric Weisbecker
  2009-01-28 22:19   ` Kok, Auke
  3 siblings, 1 reply; 38+ messages in thread
From: Ananth N Mavinakayanahalli @ 2009-01-28  9:38 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, Ingo Molnar, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman

On Tue, Jan 27, 2009 at 11:43:05PM +0100, Frederic Weisbecker wrote:
> On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
> > 
> > This tracer monitors regular file open() syscalls. This is a fast
> > and low-overhead alternative to strace, and does not allow or
> > require to be attached to every process.
> > 
> > The tracer only logs succesfull calls, as those are the only ones we
> > are currently interested in, and we can determine the absolute path
> > of these files as we log.
> > 
> > Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
> 
> 
> Hi Auke,
> 
> Speaking about a global syscall tracer, I made a patch to trace only the syscalls
> with the function-graph-tracer.
> 
> http://lkml.org/lkml/2008/12/30/267
> 
> Its approach and purpose is different than a tracer dedicated only to syscalls.
> The function graph tracer traces execution graph of the functions and is more about
> execution time spent and code flow whereas a syscall tracer can provide more specific
> informations about syscalls.
> 
> So both are not overlaping.
> 
> But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers
> a ptrace hook when set.
> This low-level part can easily be used by all tracers that would like to inspect syscalls.
> 
> Just a change is needed: Steven requested that the part inside syscall_trace_enter become
> a tracepoint, making it totally shareable between tracers and easy to turn on and off.
> 
> And perhaps the parts that set/clear the flag on all tasks can be shared too.
> 
> So we can start with this low-level syscall tracing facility. If you want, I can adapt
> this low-level part and submit a patch this week or the next one to give you this base
> infrastructure.
> 
> 
> Once we have it, I think a syscall tracer can be fed with new syscalls events through
> several patch iterations, starting with the open and close one :-)

Here is something I did sometime ago that uses utrace. It is per task
doesn't use ftrace and is just intended as a prototype. It traces both
syscalls and returns.

---

Here is the beginnings of a simple utrace based strace. Right now, one
needs to invoke this program with a 'insmod <modname> tid=<tid>'.

The output looks something like this:
[267352.641112] Attached to 32604 => 0xd8f60090
[267353.981046] 2
[267353.981085] 197 0x1 0xbff8cd84 0x86cff4 0x86d4c0 0x86d4c0 0xbff8cd50a = 0
[267353.981097] 192 0x0 0x1000 0x3 0x22 0xffffffff 0x0 = b7f8d000
[267353.981124] 4 0x1 0xb7f8d000 0xd 0xd 0xb7f8d000 0xbff8cda8 = d
[267353.981174] 4 0x1 0xb7f8d000 0x3 0x3 0xb7f8d000 0xbff8c7dc = 3
[267353.981209] 252 0x0 0x0 0x86e0d0 0x0 0x86b274 0xbff8cee8 = 
[267353.981215] Task 32604 exited
[267355.460180] Cannot find PID 32604

I know strace does a pretty print, but this is a quick and dirty
prototype.
---

#include <linux/module.h>
#include <linux/utrace.h>
#include <linux/err.h>
#include <asm/syscall.h>

MODULE_DESCRIPTION("syscall trace");
MODULE_LICENSE("GPL");

static int target_tid;

module_param_named(tid, target_tid, int, 0);

static u32 task_syscall_entry(u32 action, struct utrace_attached_engine *engine,
		struct task_struct *task, struct pt_regs *regs)
{
	long callno;
	unsigned long args[6];

	callno = syscall_get_nr(task, regs);
	syscall_get_arguments(task, regs, 0, 6, args);

	printk(KERN_INFO "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx = ",
		callno, args[0], args[1], args[2], args[3], args[4], args[5]);
	return UTRACE_RESUME;
}

static u32 task_syscall_exit(u32 action, struct utrace_attached_engine *engine,
		struct task_struct *task, struct pt_regs *regs)
{
	long retval = syscall_get_return_value(task, regs);
	printk("%lx\n", retval);
	return UTRACE_RESUME;
}

static u32 task_exit(enum utrace_resume_action action,
		struct utrace_attached_engine *engine, struct task_struct *task,
		long orig_code, long *code)
{
	printk("\n");
	printk(KERN_INFO "Task %d exited\n", target_tid);
	return UTRACE_DETACH;
}

static const struct utrace_engine_ops syscall_ops =
{
	.report_syscall_entry = task_syscall_entry,
	.report_syscall_exit = task_syscall_exit,
	.report_exit = task_exit,
};

static int __init strace_init(void)
{
	struct pid *pid;
	int ret = 0;
	struct task_struct *target;
	struct utrace_attached_engine *engine;

	pid = find_get_pid(target_tid);
	if (pid == NULL) {
		printk(KERN_ERR "Cannot find PID %d\n", target_tid);
		ret = -ESRCH;
		goto out;
	}

	engine = utrace_attach_pid(pid, UTRACE_ATTACH_CREATE, &syscall_ops, 0);
	if (IS_ERR(engine)) {
		printk(KERN_ERR "utrace_attach_pid: %ld\n", PTR_ERR(engine));
		ret = -EINVAL;
		goto out;
	} else if (engine == NULL) {
		printk(KERN_ERR "utrace_attach_pid => NULL\n");
		ret = -EINVAL;
		goto out;
	} else
		printk(KERN_INFO "Attached to %d => 0x%p\n",
				target_tid, engine);

	/*
	 * If utrace_attach_pid() succeeded above, we are sure the target
	 * is valid here
	 */
	target = pid_task(pid, PIDTYPE_PID);
	put_pid(pid);

	ret = utrace_set_events(target, engine, UTRACE_EVENT_SYSCALL |
			UTRACE_EVENT(EXIT));
	if (ret)
		printk(KERN_ERR "utrace_set_events returned %d\n", ret);

out:
	return ret;
}

static void __exit strace_exit(void)
{
	int ret = 0;
	struct pid *pid;
	struct utrace_attached_engine *engine;
	struct task_struct *target;

	pid = find_get_pid(target_tid);
	if (pid == NULL) {
		printk(KERN_ERR "Cannot find PID %d\n", target_tid);
		return;
	}

	target = pid_task(pid, PIDTYPE_PID);
	put_pid(pid);
	engine = utrace_attach_task(target, UTRACE_ATTACH_MATCH_OPS,
			&syscall_ops, 0);
	if (IS_ERR(engine))
		printk(KERN_ERR "Can't find self: %ld\n", PTR_ERR(engine));
	else if (engine == NULL)
		printk(KERN_ERR "Can't find self: no match\n");
	else {
		printk(KERN_INFO "Trying detach 0x%p from %d\n",
				engine, target_tid);
		ret = utrace_control(target, engine, UTRACE_DETACH);
		if (ret)
			printk(KERN_ERR "utrace_control returned %d\n",
					ret);
	}
}

module_init(strace_init);
module_exit(strace_exit);

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-28  9:38   ` Ananth N Mavinakayanahalli
@ 2009-01-28 14:21     ` Frédéric Weisbecker
  2009-01-28 17:00       ` Ananth N Mavinakayanahalli
  0 siblings, 1 reply; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-28 14:21 UTC (permalink / raw)
  To: ananth
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, Ingo Molnar, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman

2009/1/28 Ananth N Mavinakayanahalli <ananth@in.ibm.com>:
> On Tue, Jan 27, 2009 at 11:43:05PM +0100, Frederic Weisbecker wrote:
>> On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
>> >
>> > This tracer monitors regular file open() syscalls. This is a fast
>> > and low-overhead alternative to strace, and does not allow or
>> > require to be attached to every process.
>> >
>> > The tracer only logs succesfull calls, as those are the only ones we
>> > are currently interested in, and we can determine the absolute path
>> > of these files as we log.
>> >
>> > Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
>>
>>
>> Hi Auke,
>>
>> Speaking about a global syscall tracer, I made a patch to trace only the syscalls
>> with the function-graph-tracer.
>>
>> http://lkml.org/lkml/2008/12/30/267
>>
>> Its approach and purpose is different than a tracer dedicated only to syscalls.
>> The function graph tracer traces execution graph of the functions and is more about
>> execution time spent and code flow whereas a syscall tracer can provide more specific
>> informations about syscalls.
>>
>> So both are not overlaping.
>>
>> But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers
>> a ptrace hook when set.
>> This low-level part can easily be used by all tracers that would like to inspect syscalls.
>>
>> Just a change is needed: Steven requested that the part inside syscall_trace_enter become
>> a tracepoint, making it totally shareable between tracers and easy to turn on and off.
>>
>> And perhaps the parts that set/clear the flag on all tasks can be shared too.
>>
>> So we can start with this low-level syscall tracing facility. If you want, I can adapt
>> this low-level part and submit a patch this week or the next one to give you this base
>> infrastructure.
>>
>>
>> Once we have it, I think a syscall tracer can be fed with new syscalls events through
>> several patch iterations, starting with the open and close one :-)
>
> Here is something I did sometime ago that uses utrace. It is per task
> doesn't use ftrace and is just intended as a prototype. It traces both
> syscalls and returns.
>
> ---
>
> Here is the beginnings of a simple utrace based strace. Right now, one
> needs to invoke this program with a 'insmod <modname> tid=<tid>'.
>
> The output looks something like this:
> [267352.641112] Attached to 32604 => 0xd8f60090
> [267353.981046] 2
> [267353.981085] 197 0x1 0xbff8cd84 0x86cff4 0x86d4c0 0x86d4c0 0xbff8cd50a = 0
> [267353.981097] 192 0x0 0x1000 0x3 0x22 0xffffffff 0x0 = b7f8d000
> [267353.981124] 4 0x1 0xb7f8d000 0xd 0xd 0xb7f8d000 0xbff8cda8 = d
> [267353.981174] 4 0x1 0xb7f8d000 0x3 0x3 0xb7f8d000 0xbff8c7dc = 3
> [267353.981209] 252 0x0 0x0 0x86e0d0 0x0 0x86b274 0xbff8cee8 =
> [267353.981215] Task 32604 exited
> [267355.460180] Cannot find PID 32604
>
> I know strace does a pretty print, but this is a quick and dirty
> prototype.
> ---
>
> #include <linux/module.h>
> #include <linux/utrace.h>
> #include <linux/err.h>
> #include <asm/syscall.h>
>
> MODULE_DESCRIPTION("syscall trace");
> MODULE_LICENSE("GPL");
>
> static int target_tid;
>
> module_param_named(tid, target_tid, int, 0);
>
> static u32 task_syscall_entry(u32 action, struct utrace_attached_engine *engine,
>                struct task_struct *task, struct pt_regs *regs)
> {
>        long callno;
>        unsigned long args[6];
>
>        callno = syscall_get_nr(task, regs);
>        syscall_get_arguments(task, regs, 0, 6, args);
>
>        printk(KERN_INFO "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx = ",
>                callno, args[0], args[1], args[2], args[3], args[4], args[5]);
>        return UTRACE_RESUME;
> }
>
> static u32 task_syscall_exit(u32 action, struct utrace_attached_engine *engine,
>                struct task_struct *task, struct pt_regs *regs)
> {
>        long retval = syscall_get_return_value(task, regs);
>        printk("%lx\n", retval);
>        return UTRACE_RESUME;
> }
>
> static u32 task_exit(enum utrace_resume_action action,
>                struct utrace_attached_engine *engine, struct task_struct *task,
>                long orig_code, long *code)
> {
>        printk("\n");
>        printk(KERN_INFO "Task %d exited\n", target_tid);
>        return UTRACE_DETACH;
> }
>
> static const struct utrace_engine_ops syscall_ops =
> {
>        .report_syscall_entry = task_syscall_entry,
>        .report_syscall_exit = task_syscall_exit,
>        .report_exit = task_exit,
> };
>
> static int __init strace_init(void)
> {
>        struct pid *pid;
>        int ret = 0;
>        struct task_struct *target;
>        struct utrace_attached_engine *engine;
>
>        pid = find_get_pid(target_tid);
>        if (pid == NULL) {
>                printk(KERN_ERR "Cannot find PID %d\n", target_tid);
>                ret = -ESRCH;
>                goto out;
>        }
>
>        engine = utrace_attach_pid(pid, UTRACE_ATTACH_CREATE, &syscall_ops, 0);
>        if (IS_ERR(engine)) {
>                printk(KERN_ERR "utrace_attach_pid: %ld\n", PTR_ERR(engine));
>                ret = -EINVAL;
>                goto out;
>        } else if (engine == NULL) {
>                printk(KERN_ERR "utrace_attach_pid => NULL\n");
>                ret = -EINVAL;
>                goto out;
>        } else
>                printk(KERN_INFO "Attached to %d => 0x%p\n",
>                                target_tid, engine);
>
>        /*
>         * If utrace_attach_pid() succeeded above, we are sure the target
>         * is valid here
>         */
>        target = pid_task(pid, PIDTYPE_PID);
>        put_pid(pid);
>
>        ret = utrace_set_events(target, engine, UTRACE_EVENT_SYSCALL |
>                        UTRACE_EVENT(EXIT));
>        if (ret)
>                printk(KERN_ERR "utrace_set_events returned %d\n", ret);
>
> out:
>        return ret;
> }
>
> static void __exit strace_exit(void)
> {
>        int ret = 0;
>        struct pid *pid;
>        struct utrace_attached_engine *engine;
>        struct task_struct *target;
>
>        pid = find_get_pid(target_tid);
>        if (pid == NULL) {
>                printk(KERN_ERR "Cannot find PID %d\n", target_tid);
>                return;
>        }
>
>        target = pid_task(pid, PIDTYPE_PID);
>        put_pid(pid);
>        engine = utrace_attach_task(target, UTRACE_ATTACH_MATCH_OPS,
>                        &syscall_ops, 0);
>        if (IS_ERR(engine))
>                printk(KERN_ERR "Can't find self: %ld\n", PTR_ERR(engine));
>        else if (engine == NULL)
>                printk(KERN_ERR "Can't find self: no match\n");
>        else {
>                printk(KERN_INFO "Trying detach 0x%p from %d\n",
>                                engine, target_tid);
>                ret = utrace_control(target, engine, UTRACE_DETACH);
>                if (ret)
>                        printk(KERN_ERR "utrace_control returned %d\n",
>                                        ret);
>        }
> }
>
> module_init(strace_init);
> module_exit(strace_exit);
>


This is a very convenient Api. I would be glad to use it with ftrace.
But, what is it's status currently? Some projects to merge it?

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-28 14:21     ` Frédéric Weisbecker
@ 2009-01-28 17:00       ` Ananth N Mavinakayanahalli
  2009-01-28 17:15         ` Frédéric Weisbecker
  0 siblings, 1 reply; 38+ messages in thread
From: Ananth N Mavinakayanahalli @ 2009-01-28 17:00 UTC (permalink / raw)
  To: =?iso-8859-1?Q?Fr=E9d=E9ric_Weisbecker_=3Cfweisbec=40gmail=2Ecom=3E?=
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, Ingo Molnar, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Roland McGrath

On Wed, Jan 28, 2009 at 03:21:24PM +0100, Frédéric Weisbecker wrote:
> 2009/1/28 Ananth N Mavinakayanahalli <ananth@in.ibm.com>:
> > On Tue, Jan 27, 2009 at 11:43:05PM +0100, Frederic Weisbecker wrote:
> >> On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:

...

> > Here is something I did sometime ago that uses utrace. It is per task
> > doesn't use ftrace and is just intended as a prototype. It traces both
> > syscalls and returns.
> >
> > ---
> >
> > Here is the beginnings of a simple utrace based strace. Right now, one
> > needs to invoke this program with a 'insmod <modname> tid=<tid>'.
> >
> > The output looks something like this:
> > [267352.641112] Attached to 32604 => 0xd8f60090
> > [267353.981046] 2
> > [267353.981085] 197 0x1 0xbff8cd84 0x86cff4 0x86d4c0 0x86d4c0 0xbff8cd50a = 0
> > [267353.981097] 192 0x0 0x1000 0x3 0x22 0xffffffff 0x0 = b7f8d000
> > [267353.981124] 4 0x1 0xb7f8d000 0xd 0xd 0xb7f8d000 0xbff8cda8 = d
> > [267353.981174] 4 0x1 0xb7f8d000 0x3 0x3 0xb7f8d000 0xbff8c7dc = 3
> > [267353.981209] 252 0x0 0x0 0x86e0d0 0x0 0x86b274 0xbff8cee8 =
> > [267353.981215] Task 32604 exited
> > [267355.460180] Cannot find PID 32604
> >
> > I know strace does a pretty print, but this is a quick and dirty
> > prototype.
> > ---
> >
> > #include <linux/module.h>
> > #include <linux/utrace.h>
> > #include <linux/err.h>
> > #include <asm/syscall.h>
> >
> > MODULE_DESCRIPTION("syscall trace");
> > MODULE_LICENSE("GPL");
> >
> > static int target_tid;
> >
> > module_param_named(tid, target_tid, int, 0);
> >
> > static u32 task_syscall_entry(u32 action, struct utrace_attached_engine *engine,
> >                struct task_struct *task, struct pt_regs *regs)
> > {
> >        long callno;
> >        unsigned long args[6];
> >
> >        callno = syscall_get_nr(task, regs);
> >        syscall_get_arguments(task, regs, 0, 6, args);
> >
> >        printk(KERN_INFO "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx = ",
> >                callno, args[0], args[1], args[2], args[3], args[4], args[5]);
> >        return UTRACE_RESUME;
> > }
> >
> > static u32 task_syscall_exit(u32 action, struct utrace_attached_engine *engine,
> >                struct task_struct *task, struct pt_regs *regs)
> > {
> >        long retval = syscall_get_return_value(task, regs);
> >        printk("%lx\n", retval);
> >        return UTRACE_RESUME;
> > }
> >
> > static u32 task_exit(enum utrace_resume_action action,
> >                struct utrace_attached_engine *engine, struct task_struct *task,
> >                long orig_code, long *code)
> > {
> >        printk("\n");
> >        printk(KERN_INFO "Task %d exited\n", target_tid);
> >        return UTRACE_DETACH;
> > }
> >
> > static const struct utrace_engine_ops syscall_ops =
> > {
> >        .report_syscall_entry = task_syscall_entry,
> >        .report_syscall_exit = task_syscall_exit,
> >        .report_exit = task_exit,
> > };
> >
> > static int __init strace_init(void)
> > {
> >        struct pid *pid;
> >        int ret = 0;
> >        struct task_struct *target;
> >        struct utrace_attached_engine *engine;
> >
> >        pid = find_get_pid(target_tid);
> >        if (pid == NULL) {
> >                printk(KERN_ERR "Cannot find PID %d\n", target_tid);
> >                ret = -ESRCH;
> >                goto out;
> >        }
> >
> >        engine = utrace_attach_pid(pid, UTRACE_ATTACH_CREATE, &syscall_ops, 0);
> >        if (IS_ERR(engine)) {
> >                printk(KERN_ERR "utrace_attach_pid: %ld\n", PTR_ERR(engine));
> >                ret = -EINVAL;
> >                goto out;
> >        } else if (engine == NULL) {
> >                printk(KERN_ERR "utrace_attach_pid => NULL\n");
> >                ret = -EINVAL;
> >                goto out;
> >        } else
> >                printk(KERN_INFO "Attached to %d => 0x%p\n",
> >                                target_tid, engine);
> >
> >        /*
> >         * If utrace_attach_pid() succeeded above, we are sure the target
> >         * is valid here
> >         */
> >        target = pid_task(pid, PIDTYPE_PID);
> >        put_pid(pid);
> >
> >        ret = utrace_set_events(target, engine, UTRACE_EVENT_SYSCALL |
> >                        UTRACE_EVENT(EXIT));
> >        if (ret)
> >                printk(KERN_ERR "utrace_set_events returned %d\n", ret);
> >
> > out:
> >        return ret;
> > }
> >
> > static void __exit strace_exit(void)
> > {
> >        int ret = 0;
> >        struct pid *pid;
> >        struct utrace_attached_engine *engine;
> >        struct task_struct *target;
> >
> >        pid = find_get_pid(target_tid);
> >        if (pid == NULL) {
> >                printk(KERN_ERR "Cannot find PID %d\n", target_tid);
> >                return;
> >        }
> >
> >        target = pid_task(pid, PIDTYPE_PID);
> >        put_pid(pid);
> >        engine = utrace_attach_task(target, UTRACE_ATTACH_MATCH_OPS,
> >                        &syscall_ops, 0);
> >        if (IS_ERR(engine))
> >                printk(KERN_ERR "Can't find self: %ld\n", PTR_ERR(engine));
> >        else if (engine == NULL)
> >                printk(KERN_ERR "Can't find self: no match\n");
> >        else {
> >                printk(KERN_INFO "Trying detach 0x%p from %d\n",
> >                                engine, target_tid);
> >                ret = utrace_control(target, engine, UTRACE_DETACH);
> >                if (ret)
> >                        printk(KERN_ERR "utrace_control returned %d\n",
> >                                        ret);
> >        }
> > }
> >
> > module_init(strace_init);
> > module_exit(strace_exit);
> >
> 
> 
> This is a very convenient Api. I would be glad to use it with ftrace.
> But, what is it's status currently? Some projects to merge it?

Its currently available as a git tree:
git://git.kernel.org/pub/scm/linux/kernel/git/frob/linux-2.6-utrace.git

There is work going on to merge it upstream. Roland McGrath will be able
to better answer when. However, one of the biggest requirements to merge
it is an in-kernel user. Hopefully Frank's prototype and/or an ftrace
integration of the above snippet should be incentive to push for some
in-tree users :-)

Ananth

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-28 17:00       ` Ananth N Mavinakayanahalli
@ 2009-01-28 17:15         ` Frédéric Weisbecker
  0 siblings, 0 replies; 38+ messages in thread
From: Frédéric Weisbecker @ 2009-01-28 17:15 UTC (permalink / raw)
  To: ananth
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, Ingo Molnar, srostedt, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Roland McGrath

2009/1/28 Ananth N Mavinakayanahalli <ananth@in.ibm.com>:
> On Wed, Jan 28, 2009 at 03:21:24PM +0100, Frédéric Weisbecker wrote:
>> 2009/1/28 Ananth N Mavinakayanahalli <ananth@in.ibm.com>:
>> > On Tue, Jan 27, 2009 at 11:43:05PM +0100, Frederic Weisbecker wrote:
>> >> On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
>
> ...
>
>> > Here is something I did sometime ago that uses utrace. It is per task
>> > doesn't use ftrace and is just intended as a prototype. It traces both
>> > syscalls and returns.
>> >
>> > ---
>> >
>> > Here is the beginnings of a simple utrace based strace. Right now, one
>> > needs to invoke this program with a 'insmod <modname> tid=<tid>'.
>> >
>> > The output looks something like this:
>> > [267352.641112] Attached to 32604 => 0xd8f60090
>> > [267353.981046] 2
>> > [267353.981085] 197 0x1 0xbff8cd84 0x86cff4 0x86d4c0 0x86d4c0 0xbff8cd50a = 0
>> > [267353.981097] 192 0x0 0x1000 0x3 0x22 0xffffffff 0x0 = b7f8d000
>> > [267353.981124] 4 0x1 0xb7f8d000 0xd 0xd 0xb7f8d000 0xbff8cda8 = d
>> > [267353.981174] 4 0x1 0xb7f8d000 0x3 0x3 0xb7f8d000 0xbff8c7dc = 3
>> > [267353.981209] 252 0x0 0x0 0x86e0d0 0x0 0x86b274 0xbff8cee8 =
>> > [267353.981215] Task 32604 exited
>> > [267355.460180] Cannot find PID 32604
>> >
>> > I know strace does a pretty print, but this is a quick and dirty
>> > prototype.
>> > ---
>> >
>> > #include <linux/module.h>
>> > #include <linux/utrace.h>
>> > #include <linux/err.h>
>> > #include <asm/syscall.h>
>> >
>> > MODULE_DESCRIPTION("syscall trace");
>> > MODULE_LICENSE("GPL");
>> >
>> > static int target_tid;
>> >
>> > module_param_named(tid, target_tid, int, 0);
>> >
>> > static u32 task_syscall_entry(u32 action, struct utrace_attached_engine *engine,
>> >                struct task_struct *task, struct pt_regs *regs)
>> > {
>> >        long callno;
>> >        unsigned long args[6];
>> >
>> >        callno = syscall_get_nr(task, regs);
>> >        syscall_get_arguments(task, regs, 0, 6, args);
>> >
>> >        printk(KERN_INFO "%ld 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx 0x%lx = ",
>> >                callno, args[0], args[1], args[2], args[3], args[4], args[5]);
>> >        return UTRACE_RESUME;
>> > }
>> >
>> > static u32 task_syscall_exit(u32 action, struct utrace_attached_engine *engine,
>> >                struct task_struct *task, struct pt_regs *regs)
>> > {
>> >        long retval = syscall_get_return_value(task, regs);
>> >        printk("%lx\n", retval);
>> >        return UTRACE_RESUME;
>> > }
>> >
>> > static u32 task_exit(enum utrace_resume_action action,
>> >                struct utrace_attached_engine *engine, struct task_struct *task,
>> >                long orig_code, long *code)
>> > {
>> >        printk("\n");
>> >        printk(KERN_INFO "Task %d exited\n", target_tid);
>> >        return UTRACE_DETACH;
>> > }
>> >
>> > static const struct utrace_engine_ops syscall_ops =
>> > {
>> >        .report_syscall_entry = task_syscall_entry,
>> >        .report_syscall_exit = task_syscall_exit,
>> >        .report_exit = task_exit,
>> > };
>> >
>> > static int __init strace_init(void)
>> > {
>> >        struct pid *pid;
>> >        int ret = 0;
>> >        struct task_struct *target;
>> >        struct utrace_attached_engine *engine;
>> >
>> >        pid = find_get_pid(target_tid);
>> >        if (pid == NULL) {
>> >                printk(KERN_ERR "Cannot find PID %d\n", target_tid);
>> >                ret = -ESRCH;
>> >                goto out;
>> >        }
>> >
>> >        engine = utrace_attach_pid(pid, UTRACE_ATTACH_CREATE, &syscall_ops, 0);
>> >        if (IS_ERR(engine)) {
>> >                printk(KERN_ERR "utrace_attach_pid: %ld\n", PTR_ERR(engine));
>> >                ret = -EINVAL;
>> >                goto out;
>> >        } else if (engine == NULL) {
>> >                printk(KERN_ERR "utrace_attach_pid => NULL\n");
>> >                ret = -EINVAL;
>> >                goto out;
>> >        } else
>> >                printk(KERN_INFO "Attached to %d => 0x%p\n",
>> >                                target_tid, engine);
>> >
>> >        /*
>> >         * If utrace_attach_pid() succeeded above, we are sure the target
>> >         * is valid here
>> >         */
>> >        target = pid_task(pid, PIDTYPE_PID);
>> >        put_pid(pid);
>> >
>> >        ret = utrace_set_events(target, engine, UTRACE_EVENT_SYSCALL |
>> >                        UTRACE_EVENT(EXIT));
>> >        if (ret)
>> >                printk(KERN_ERR "utrace_set_events returned %d\n", ret);
>> >
>> > out:
>> >        return ret;
>> > }
>> >
>> > static void __exit strace_exit(void)
>> > {
>> >        int ret = 0;
>> >        struct pid *pid;
>> >        struct utrace_attached_engine *engine;
>> >        struct task_struct *target;
>> >
>> >        pid = find_get_pid(target_tid);
>> >        if (pid == NULL) {
>> >                printk(KERN_ERR "Cannot find PID %d\n", target_tid);
>> >                return;
>> >        }
>> >
>> >        target = pid_task(pid, PIDTYPE_PID);
>> >        put_pid(pid);
>> >        engine = utrace_attach_task(target, UTRACE_ATTACH_MATCH_OPS,
>> >                        &syscall_ops, 0);
>> >        if (IS_ERR(engine))
>> >                printk(KERN_ERR "Can't find self: %ld\n", PTR_ERR(engine));
>> >        else if (engine == NULL)
>> >                printk(KERN_ERR "Can't find self: no match\n");
>> >        else {
>> >                printk(KERN_INFO "Trying detach 0x%p from %d\n",
>> >                                engine, target_tid);
>> >                ret = utrace_control(target, engine, UTRACE_DETACH);
>> >                if (ret)
>> >                        printk(KERN_ERR "utrace_control returned %d\n",
>> >                                        ret);
>> >        }
>> > }
>> >
>> > module_init(strace_init);
>> > module_exit(strace_exit);
>> >
>>
>>
>> This is a very convenient Api. I would be glad to use it with ftrace.
>> But, what is it's status currently? Some projects to merge it?
>
> Its currently available as a git tree:
> git://git.kernel.org/pub/scm/linux/kernel/git/frob/linux-2.6-utrace.git
>
> There is work going on to merge it upstream. Roland McGrath will be able
> to better answer when. However, one of the biggest requirements to merge
> it is an in-kernel user. Hopefully Frank's prototype and/or an ftrace
> integration of the above snippet should be incentive to push for some
> in-tree users :-)
>
> Ananth
>

I hope so. That would be used by the function graph tracer and a
syscall tracer, and probably for other special tracers
needs. If Ingo and Steven are ok with it.

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 22:43 ` Frederic Weisbecker
                     ` (2 preceding siblings ...)
  2009-01-28  9:38   ` Ananth N Mavinakayanahalli
@ 2009-01-28 22:19   ` Kok, Auke
  3 siblings, 0 replies; 38+ messages in thread
From: Kok, Auke @ 2009-01-28 22:19 UTC (permalink / raw)
  To: Frederic Weisbecker
  Cc: Linux Kernel Mailing List, powertop ml, Arjan van de Ven,
	Ingo Molnar, srostedt@redhat.com, Arnaldo Carvalho de Melo,
	Frank Ch. Eigler, Neil Horman, Kok, Auke

Frederic Weisbecker wrote:
> On Tue, Jan 27, 2009 at 12:08:04PM -0800, Kok, Auke wrote:
>> This tracer monitors regular file open() syscalls. This is a fast
>> and low-overhead alternative to strace, and does not allow or
>> require to be attached to every process.
>>
>> The tracer only logs succesfull calls, as those are the only ones we
>> are currently interested in, and we can determine the absolute path
>> of these files as we log.
>>
>> Signed-off-by: Auke Kok <auke-jan.h.kok@intel.com>
> 
> 
> Hi Auke,
> 
> Speaking about a global syscall tracer, I made a patch to trace only the syscalls
> with the function-graph-tracer.
> 
> http://lkml.org/lkml/2008/12/30/267
> 
> Its approach and purpose is different than a tracer dedicated only to syscalls.
> The function graph tracer traces execution graph of the functions and is more about
> execution time spent and code flow whereas a syscall tracer can provide more specific
> informations about syscalls.
> 
> So both are not overlaping.
> 
> But the low level part of my patch creates a thread flag _TIF_SYSCALL_TRACE which triggers
> a ptrace hook when set.
> This low-level part can easily be used by all tracers that would like to inspect syscalls.
> 
> Just a change is needed: Steven requested that the part inside syscall_trace_enter become
> a tracepoint, making it totally shareable between tracers and easy to turn on and off.
> 
> And perhaps the parts that set/clear the flag on all tasks can be shared too.
> 
> So we can start with this low-level syscall tracing facility. If you want, I can adapt
> this low-level part and submit a patch this week or the next one to give you this base
> infrastructure.
> 
> 
> Once we have it, I think a syscall tracer can be fed with new syscalls events through
> several patch iterations, starting with the open and close one :-)
> 
> Are you ok with that?

That sounds like a good start. one note of concern is that (for sreadahead) we're
really interested in the _absolute_ path, not the relative ones that are passed to
the syscall by userspace.

This may significantly complicate things when it comes to fs syscalls, and is one
of the reasons I wrote the quick-n-dirty alternative.


Auke

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-27 20:08 [PATCH] tracer for sys_open() - sreadahead Kok, Auke
  2009-01-27 20:51 ` Arnaldo Carvalho de Melo
  2009-01-27 22:43 ` Frederic Weisbecker
@ 2009-01-30 20:22 ` Pavel Machek
  2009-02-03 13:32   ` Ingo Molnar
  2 siblings, 1 reply; 38+ messages in thread
From: Pavel Machek @ 2009-01-30 20:22 UTC (permalink / raw)
  To: Kok, Auke
  Cc: Linux Kernel Mailing List, powertop ml, Arjan van de Ven,
	Ingo Molnar, srostedt

On Tue 2009-01-27 12:08:04, Kok, Auke wrote:
> 
> This tracer monitors regular file open() syscalls. This is a fast
> and low-overhead alternative to strace, and does not allow or
> require to be attached to every process.
> 
> The tracer only logs succesfull calls, as those are the only ones we
> are currently interested in, and we can determine the absolute path
> of these files as we log.

Maybe fanotify() should be used instead?

Or maybe just plain strace? One slow boot should not really hurt...
								Pavel
 
-- 
(english) http://www.livejournal.com/~pavelmachek
(cesky, pictures) http://atrey.karlin.mff.cuni.cz/~pavel/picture/horses/blog.html

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-01-30 20:22 ` Pavel Machek
@ 2009-02-03 13:32   ` Ingo Molnar
  2009-02-05 14:44     ` Harald Hoyer
  0 siblings, 1 reply; 38+ messages in thread
From: Ingo Molnar @ 2009-02-03 13:32 UTC (permalink / raw)
  To: Pavel Machek
  Cc: Kok, Auke, Linux Kernel Mailing List, powertop ml,
	Arjan van de Ven, srostedt


* Pavel Machek <pavel@suse.cz> wrote:

> On Tue 2009-01-27 12:08:04, Kok, Auke wrote:
> > 
> > This tracer monitors regular file open() syscalls. This is a fast
> > and low-overhead alternative to strace, and does not allow or
> > require to be attached to every process.
> > 
> > The tracer only logs succesfull calls, as those are the only ones we
> > are currently interested in, and we can determine the absolute path
> > of these files as we log.
> 
> Maybe fanotify() should be used instead?
> 
> Or maybe just plain strace? One slow boot should not really hurt...

ptrace is out of question for good tracing because it's not a transparent 
probe. (ptrace monopolizes the traced task - if we use that then we break 
regular strace usage.)

	Ingo

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-03 13:32   ` Ingo Molnar
@ 2009-02-05 14:44     ` Harald Hoyer
  2009-02-05 15:07       ` Bill Nottingham
  2009-02-09 13:13       ` Karel Zak
  0 siblings, 2 replies; 38+ messages in thread
From: Harald Hoyer @ 2009-02-05 14:44 UTC (permalink / raw)
  To: linux-kernel; +Cc: power

Ingo Molnar wrote:
> * Pavel Machek <pavel@suse.cz> wrote:
> 
>> On Tue 2009-01-27 12:08:04, Kok, Auke wrote:
>>> This tracer monitors regular file open() syscalls. This is a fast
>>> and low-overhead alternative to strace, and does not allow or
>>> require to be attached to every process.
>>>
>>> The tracer only logs succesfull calls, as those are the only ones we
>>> are currently interested in, and we can determine the absolute path
>>> of these files as we log.
>> Maybe fanotify() should be used instead?
>>
>> Or maybe just plain strace? One slow boot should not really hurt...
> 
> ptrace is out of question for good tracing because it's not a transparent 
> probe. (ptrace monopolizes the traced task - if we use that then we break 
> regular strace usage.)
> 
> 	Ingo

Can strace can be used on init?

$ man strace
...
        On Linux, exciting as it would be, tracing the init process is forbidden.
...

Any hope getting _any_ mechanism in the kernel??


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-05 14:44     ` Harald Hoyer
@ 2009-02-05 15:07       ` Bill Nottingham
  2009-02-05 15:14         ` Arjan van de Ven
  2009-02-09 13:13       ` Karel Zak
  1 sibling, 1 reply; 38+ messages in thread
From: Bill Nottingham @ 2009-02-05 15:07 UTC (permalink / raw)
  To: Harald Hoyer; +Cc: linux-kernel, power

Harald Hoyer (harald@redhat.com) said: 
> Can strace can be used on init?
>
> $ man strace
> ...
>        On Linux, exciting as it would be, tracing the init process is forbidden.
> ...
>
> Any hope getting _any_ mechanism in the kernel??

The man page probably needs fixed - this works as of a few kernel releases
ago.

Bill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-05 15:07       ` Bill Nottingham
@ 2009-02-05 15:14         ` Arjan van de Ven
  2009-02-05 15:24           ` Bill Nottingham
  0 siblings, 1 reply; 38+ messages in thread
From: Arjan van de Ven @ 2009-02-05 15:14 UTC (permalink / raw)
  To: Bill Nottingham; +Cc: Harald Hoyer, linux-kernel, power

On Thu, 5 Feb 2009 10:07:05 -0500
Bill Nottingham <notting@redhat.com> wrote:

> Harald Hoyer (harald@redhat.com) said: 
> > Can strace can be used on init?
> >
> > $ man strace
> > ...
> >        On Linux, exciting as it would be, tracing the init process
> > is forbidden. ...
> >
> > Any hope getting _any_ mechanism in the kernel??
> 
> The man page probably needs fixed - this works as of a few kernel
> releases ago.
> 

you can strace init but you can't strace X
(at least... last I tried it locked my box ;0)


-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-05 15:14         ` Arjan van de Ven
@ 2009-02-05 15:24           ` Bill Nottingham
  2009-02-05 15:47             ` Arjan van de Ven
  0 siblings, 1 reply; 38+ messages in thread
From: Bill Nottingham @ 2009-02-05 15:24 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Harald Hoyer, linux-kernel, power

Arjan van de Ven (arjan@infradead.org) said: 
> you can strace init but you can't strace X
> (at least... last I tried it locked my box ;0)

You should be able to strace X, as long as you're not doing it *from* X.

Bill

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-05 15:24           ` Bill Nottingham
@ 2009-02-05 15:47             ` Arjan van de Ven
  2009-02-06 23:18               ` Corrado Zoccolo
  0 siblings, 1 reply; 38+ messages in thread
From: Arjan van de Ven @ 2009-02-05 15:47 UTC (permalink / raw)
  To: Bill Nottingham; +Cc: Harald Hoyer, linux-kernel, power

On Thu, 5 Feb 2009 10:24:42 -0500
Bill Nottingham <notting@redhat.com> wrote:

> Arjan van de Ven (arjan@infradead.org) said: 
> > you can strace init but you can't strace X
> > (at least... last I tried it locked my box ;0)
> 
> You should be able to strace X, as long as you're not doing it *from*
> X.
> 
last I tried it b0rked (yeah I'm not stupid enuogh to do that from X,
at least not a second time ;-)

another issue with the "use strace" approach is selinux... 
(this is in addition to the performance and behavioral changes)
-- 
Arjan van de Ven 	Intel Open Source Technology Centre
For development, discussion and tips for power savings, 
visit http://www.lesswatts.org


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-05 15:47             ` Arjan van de Ven
@ 2009-02-06 23:18               ` Corrado Zoccolo
  0 siblings, 0 replies; 38+ messages in thread
From: Corrado Zoccolo @ 2009-02-06 23:18 UTC (permalink / raw)
  To: Arjan van de Ven; +Cc: Bill Nottingham, Harald Hoyer, linux-kernel, power

On Thu, Feb 5, 2009 at 4:47 PM, Arjan van de Ven <arjan@infradead.org> wrote:
>
> another issue with the "use strace" approach is selinux...
> (this is in addition to the performance and behavioral changes)

Regarding behavioral changes, we should consider that just introducing
sreadahead in the boot process will change the system behaviour,
especially with distros that try and start multiple processes in
parallel during boot.

This means that, in order to approximate the optimal boot, sreadahead
should monitor every boot (even when it is performing readahead), and
modify the order of the opened files accordingly.
I'm using a modified sreadahead version that implements this (on top
of original ext3 patch), and the number of I/O waits shown by
bootchart reduced noticeably w.r.t. naive sreadahead.

So the assumption that just 1 slow boot is enough doesn't hold. We
should try to have a low overhead tracing system in the kernel, to be
able to implement this more efficiently.

Corrado

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-05 14:44     ` Harald Hoyer
  2009-02-05 15:07       ` Bill Nottingham
@ 2009-02-09 13:13       ` Karel Zak
  2009-02-09 13:23         ` Harald Hoyer
  1 sibling, 1 reply; 38+ messages in thread
From: Karel Zak @ 2009-02-09 13:13 UTC (permalink / raw)
  To: Harald Hoyer; +Cc: linux-kernel, power

On Thu, Feb 05, 2009 at 03:44:42PM +0100, Harald Hoyer wrote:
> Ingo Molnar wrote:
>> * Pavel Machek <pavel@suse.cz> wrote:
>>
>>> On Tue 2009-01-27 12:08:04, Kok, Auke wrote:
>>>> This tracer monitors regular file open() syscalls. This is a fast
>>>> and low-overhead alternative to strace, and does not allow or
>>>> require to be attached to every process.
>>>>
>>>> The tracer only logs succesfull calls, as those are the only ones we
>>>> are currently interested in, and we can determine the absolute path
>>>> of these files as we log.
>>> Maybe fanotify() should be used instead?
>>>
>>> Or maybe just plain strace? One slow boot should not really hurt...
>>
>> ptrace is out of question for good tracing because it's not a 
>> transparent probe. (ptrace monopolizes the traced task - if we use that 
>> then we break regular strace usage.)
>>
>> 	Ingo
>
> Can strace can be used on init?
>
> $ man strace
> ...
>        On Linux, exciting as it would be, tracing the init process is forbidden.
> ...
>
> Any hope getting _any_ mechanism in the kernel??

 Do you remember Linux Auditing System? That's RH's baby with hooks to
 all relevant syscalls. It would be better to fix/improve the current
 kernel mechanisms that introduce a new one.

    Karel

-- 
 Karel Zak  <kzak@redhat.com>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-09 13:13       ` Karel Zak
@ 2009-02-09 13:23         ` Harald Hoyer
  2009-02-09 13:54           ` Karel Zak
  0 siblings, 1 reply; 38+ messages in thread
From: Harald Hoyer @ 2009-02-09 13:23 UTC (permalink / raw)
  To: linux-kernel; +Cc: power

Karel Zak wrote:
> On Thu, Feb 05, 2009 at 03:44:42PM +0100, Harald Hoyer wrote:
>> Ingo Molnar wrote:
>>> * Pavel Machek <pavel@suse.cz> wrote:
>>>
>>>> On Tue 2009-01-27 12:08:04, Kok, Auke wrote:
>>>>> This tracer monitors regular file open() syscalls. This is a fast
>>>>> and low-overhead alternative to strace, and does not allow or
>>>>> require to be attached to every process.
>>>>>
>>>>> The tracer only logs succesfull calls, as those are the only ones we
>>>>> are currently interested in, and we can determine the absolute path
>>>>> of these files as we log.
>>>> Maybe fanotify() should be used instead?
>>>>
>>>> Or maybe just plain strace? One slow boot should not really hurt...
>>> ptrace is out of question for good tracing because it's not a 
>>> transparent probe. (ptrace monopolizes the traced task - if we use that 
>>> then we break regular strace usage.)
>>>
>>> 	Ingo
>> Can strace can be used on init?
>>
>> $ man strace
>> ...
>>        On Linux, exciting as it would be, tracing the init process is forbidden.
>> ...
>>
>> Any hope getting _any_ mechanism in the kernel??
> 
>  Do you remember Linux Auditing System? That's RH's baby with hooks to
>  all relevant syscalls. It would be better to fix/improve the current
>  kernel mechanisms that introduce a new one.
> 
>     Karel
> 


Yes, I do remember it, because this is how the current fedora readahead gathers 
its data. It delays the audit daemon, because there is no clean way to hook into 
the stream. I asked to add a second "channel" (auditd wants the kernel socket 
for its own)...


^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-09 13:23         ` Harald Hoyer
@ 2009-02-09 13:54           ` Karel Zak
  2009-02-11 10:44             ` Harald Hoyer
  0 siblings, 1 reply; 38+ messages in thread
From: Karel Zak @ 2009-02-09 13:54 UTC (permalink / raw)
  To: Harald Hoyer; +Cc: linux-kernel, power

On Mon, Feb 09, 2009 at 02:23:35PM +0100, Harald Hoyer wrote:
> Karel Zak wrote:
>> On Thu, Feb 05, 2009 at 03:44:42PM +0100, Harald Hoyer wrote:
>>> Ingo Molnar wrote:
>>>> * Pavel Machek <pavel@suse.cz> wrote:
>>>>
>>>>> On Tue 2009-01-27 12:08:04, Kok, Auke wrote:
>>>>>> This tracer monitors regular file open() syscalls. This is a fast
>>>>>> and low-overhead alternative to strace, and does not allow or
>>>>>> require to be attached to every process.
>>>>>>
>>>>>> The tracer only logs succesfull calls, as those are the only ones we
>>>>>> are currently interested in, and we can determine the absolute path
>>>>>> of these files as we log.
>>>>> Maybe fanotify() should be used instead?
>>>>>
>>>>> Or maybe just plain strace? One slow boot should not really hurt...
>>>> ptrace is out of question for good tracing because it's not a  
>>>> transparent probe. (ptrace monopolizes the traced task - if we use 
>>>> that then we break regular strace usage.)
>>>>
>>>> 	Ingo
>>> Can strace can be used on init?
>>>
>>> $ man strace
>>> ...
>>>        On Linux, exciting as it would be, tracing the init process is forbidden.
>>> ...
>>>
>>> Any hope getting _any_ mechanism in the kernel??
>>
>>  Do you remember Linux Auditing System? That's RH's baby with hooks to
>>  all relevant syscalls. It would be better to fix/improve the current
>>  kernel mechanisms that introduce a new one.
>
> Yes, I do remember it, because this is how the current fedora readahead
> gathers its data. It delays the audit daemon, because there is no clean 
> way to hook into the stream. I asked to add a second "channel" (auditd 
> wants the kernel socket for its own)...

 yes, it'd be nice to support arbitrary number of connections and
 rules per connection. (.. or export audit stuff to userspace by a
 special pseudo filesystem (see cgroups, debugfs, ...)).

    Karel

-- 
 Karel Zak  <kzak@redhat.com>

^ permalink raw reply	[flat|nested] 38+ messages in thread

* Re: [PATCH] tracer for sys_open() - sreadahead
  2009-02-09 13:54           ` Karel Zak
@ 2009-02-11 10:44             ` Harald Hoyer
  0 siblings, 0 replies; 38+ messages in thread
From: Harald Hoyer @ 2009-02-11 10:44 UTC (permalink / raw)
  To: linux-kernel; +Cc: power

Karel Zak wrote:
> On Mon, Feb 09, 2009 at 02:23:35PM +0100, Harald Hoyer wrote:
>> Karel Zak wrote:
>>> On Thu, Feb 05, 2009 at 03:44:42PM +0100, Harald Hoyer wrote:
>>>> Ingo Molnar wrote:
>>>>> * Pavel Machek <pavel@suse.cz> wrote:
>>>>>
>>>>>> On Tue 2009-01-27 12:08:04, Kok, Auke wrote:
>>>>>>> This tracer monitors regular file open() syscalls. This is a fast
>>>>>>> and low-overhead alternative to strace, and does not allow or
>>>>>>> require to be attached to every process.
>>>>>>>
>>>>>>> The tracer only logs succesfull calls, as those are the only ones we
>>>>>>> are currently interested in, and we can determine the absolute path
>>>>>>> of these files as we log.
>>>>>> Maybe fanotify() should be used instead?
>>>>>>
>>>>>> Or maybe just plain strace? One slow boot should not really hurt...
>>>>> ptrace is out of question for good tracing because it's not a  
>>>>> transparent probe. (ptrace monopolizes the traced task - if we use 
>>>>> that then we break regular strace usage.)
>>>>>
>>>>> 	Ingo
>>>> Can strace can be used on init?
>>>>
>>>> $ man strace
>>>> ...
>>>>        On Linux, exciting as it would be, tracing the init process is forbidden.
>>>> ...
>>>>
>>>> Any hope getting _any_ mechanism in the kernel??
>>>  Do you remember Linux Auditing System? That's RH's baby with hooks to
>>>  all relevant syscalls. It would be better to fix/improve the current
>>>  kernel mechanisms that introduce a new one.
>> Yes, I do remember it, because this is how the current fedora readahead
>> gathers its data. It delays the audit daemon, because there is no clean 
>> way to hook into the stream. I asked to add a second "channel" (auditd 
>> wants the kernel socket for its own)...
> 
>  yes, it'd be nice to support arbitrary number of connections and
>  rules per connection. (.. or export audit stuff to userspace by a
>  special pseudo filesystem (see cgroups, debugfs, ...)).
> 
>     Karel
> 

right! if only someone would implement that *hint, hint* :-/


^ permalink raw reply	[flat|nested] 38+ messages in thread

end of thread, other threads:[~2009-02-11 10:50 UTC | newest]

Thread overview: 38+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-01-27 20:08 [PATCH] tracer for sys_open() - sreadahead Kok, Auke
2009-01-27 20:51 ` Arnaldo Carvalho de Melo
2009-01-27 21:14   ` Frederic Weisbecker
2009-01-28 22:05     ` Kok, Auke
2009-01-29  0:45       ` Arnaldo Carvalho de Melo
2009-01-29 13:39         ` Frédéric Weisbecker
2009-01-29 13:40           ` Frédéric Weisbecker
2009-01-27 22:43 ` Frederic Weisbecker
2009-01-27 22:50   ` Frederic Weisbecker
2009-01-29 14:04     ` Ingo Molnar
2009-01-29 14:29       ` Frédéric Weisbecker
2009-01-29 14:31         ` Ingo Molnar
2009-01-29 14:40           ` Frédéric Weisbecker
2009-01-29 14:48             ` Frédéric Weisbecker
2009-01-29 15:09             ` Ingo Molnar
2009-01-29 15:17               ` Frédéric Weisbecker
2009-01-29 15:34               ` Frédéric Weisbecker
2009-01-29 15:53                 ` Frank Ch. Eigler
2009-01-28  0:43   ` Frank Ch. Eigler
2009-01-28 13:58     ` Frédéric Weisbecker
2009-01-28 14:29       ` Arnaldo Carvalho de Melo
2009-01-28  9:38   ` Ananth N Mavinakayanahalli
2009-01-28 14:21     ` Frédéric Weisbecker
2009-01-28 17:00       ` Ananth N Mavinakayanahalli
2009-01-28 17:15         ` Frédéric Weisbecker
2009-01-28 22:19   ` Kok, Auke
2009-01-30 20:22 ` Pavel Machek
2009-02-03 13:32   ` Ingo Molnar
2009-02-05 14:44     ` Harald Hoyer
2009-02-05 15:07       ` Bill Nottingham
2009-02-05 15:14         ` Arjan van de Ven
2009-02-05 15:24           ` Bill Nottingham
2009-02-05 15:47             ` Arjan van de Ven
2009-02-06 23:18               ` Corrado Zoccolo
2009-02-09 13:13       ` Karel Zak
2009-02-09 13:23         ` Harald Hoyer
2009-02-09 13:54           ` Karel Zak
2009-02-11 10:44             ` Harald Hoyer

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox