linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
* [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit
@ 2016-04-01 23:50 Milian Wolff
  2016-05-09 17:56 ` Milian Wolff
                   ` (2 more replies)
  0 siblings, 3 replies; 4+ messages in thread
From: Milian Wolff @ 2016-04-01 23:50 UTC (permalink / raw)
  To: linux-perf-users; +Cc: acme, Milian Wolff

This makes it much more useful to report data recorded with
`perf trace record`. E.g.:

    #include <unistd.h>

    int main()
    {
        sleep(1);
        usleep(1000);
        usleep(1);
        return 0;
    }

    Compile and trace it:

# gcc -g sleep.c -o ex_sleep
# perf trace record --call-graph dwarf ex_sleep

Previously, the report is not so useful as it aggregates by the number
of syscalls:

# perf report --stdio --show-nr-samples --show-total-period \
              --no-children --sort sym --max-stack 1

    #
    # Samples: 46  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 46
    #
    # Overhead       Samples        Period  Symbol
    # ........  ............  ............  ..............................
    #
    100.00%            46            46  [k] syscall_trace_enter_phase2
                |
                |--30.43%--open64
                |
                |--17.39%--_xstat
                |
                |--15.22%--mmap64
                |
                |--8.70%--mprotect
                |
                |--6.52%--__nanosleep
                |
                |--4.35%--close
                |
                |--4.35%--_fxstat
                |
                |--2.17%--_exit
                |
                |--2.17%--munmap
                |
                |--2.17%--access
                |
                |--2.17%--read
                |
                |--2.17%--brk
                |
                |--2.17%--init_tls

And then the same for sys_exit. Note how the period count equals the
sample count. Yes, open, stat and mmap are called often, but are
these syscalls really expensive? Hard to tell.

This patch adds a new inject command to aggregate the
sys_enter and sys_exit events and sets the period to the elapsed time:

# perf inject -t -i perf.data -o perf.data.merged

This makes the report much nicer:

# perf report --stdio --show-total-period --no-children --sort sym \
              --max-stack 1  -i perf.data.merged

    #
    # Total Lost Samples: 0
    #
    # Samples: 45  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 1001520568
    #
    # Overhead        Period  Symbol
    # ........  ............  ..............................
    #
    100.00%    1001520568  [k] syscall_trace_enter_phase2
                |
                |--99.99%--__nanosleep

Note that the other syscalls in this example are so cheap they are
filtered out of the report by default. You can bring them back via
e.g. `--call-graph graph,0`, or by omitting `--no-children` or
via --percent-limit 0.

The report of the merged data can also again use the raw number
of syscalls for merging, with a pretty unwieldy command:

# perf report -i perf.data.merged --sort sym --show-nr-samples \
              -g graph,0,caller,function,count --stdio --max-stack 1

    #
    # Samples: 45  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 1001520568
    #
    # Children      Self       Samples  Symbol
    # ........  ........  ............  ..............................
    #
    100.00%   100.00%            45  [k] syscall_trace_enter_phase2
                |
                |--3--__nanosleep
                |
                |--14--open64
                |
                |--7--mmap64
                |
                |--8--_xstat
                |
                |--4--mprotect
                |
                |--1--munmap
                |
                |--2--_fxstat
                |
                |--1--access
                |
                |--2--close
                |
                |--1--read
                |
                |--1--init_tls
                |
                |--1--brk

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
---
 tools/perf/builtin-inject.c | 141 +++++++++++++++++++++++++++++++++++---------
 1 file changed, 113 insertions(+), 28 deletions(-)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index e5afa8f..3bad330 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -31,6 +31,7 @@ struct perf_inject {
 	bool			have_auxtrace;
 	bool			strip;
 	bool			jit_mode;
+	bool			trace;
 	const char		*input_name;
 	struct perf_data_file	output;
 	u64			bytes_written;
@@ -451,17 +452,14 @@ repipe:
 	return 0;
 }
 
-static int perf_inject__sched_process_exit(struct perf_tool *tool,
-					   union perf_event *event __maybe_unused,
-					   struct perf_sample *sample,
-					   struct perf_evsel *evsel __maybe_unused,
-					   struct machine *machine __maybe_unused)
+static int perf_inject__free_cloned_event(struct perf_tool *tool,
+					  u32 tid)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	struct event_entry *ent;
 
 	list_for_each_entry(ent, &inject->samples, node) {
-		if (sample->tid == ent->tid) {
+		if (tid == ent->tid) {
 			list_del_init(&ent->node);
 			free(ent);
 			break;
@@ -471,21 +469,18 @@ static int perf_inject__sched_process_exit(struct perf_tool *tool,
 	return 0;
 }
 
-static int perf_inject__sched_switch(struct perf_tool *tool,
-				     union perf_event *event,
-				     struct perf_sample *sample,
-				     struct perf_evsel *evsel,
-				     struct machine *machine)
+static int perf_inject__clone_event(struct perf_tool *tool,
+				    union perf_event *event,
+				    struct perf_sample *sample)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	struct event_entry *ent;
 
-	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
-
+	perf_inject__free_cloned_event(tool, sample->tid);
 	ent = malloc(event->header.size + sizeof(struct event_entry));
 	if (ent == NULL) {
 		color_fprintf(stderr, PERF_COLOR_RED,
-			     "Not enough memory to process sched switch event!");
+			     "Not enough memory to clone event!");
 		return -1;
 	}
 
@@ -495,35 +490,104 @@ static int perf_inject__sched_switch(struct perf_tool *tool,
 	return 0;
 }
 
+static struct event_entry *perf_inject__find_cloned_event(struct perf_tool *tool,
+							  u32 tid)
+{
+	struct event_entry *ent;
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+	list_for_each_entry(ent, &inject->samples, node) {
+		if (tid == ent->tid)
+			return ent;
+	}
+	return NULL;
+}
+
+static int perf_inject__repipe_cloned_event_sample(struct perf_tool *tool,
+						   struct perf_evsel *evsel,
+						   struct machine *machine,
+						   union perf_event *event_sw,
+						   struct perf_sample sample_sw)
+{
+	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+				      evsel->attr.read_format, &sample_sw,
+				      false);
+	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+					   union perf_event *event __maybe_unused,
+					   struct perf_sample *sample,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct machine *machine __maybe_unused)
+{
+	perf_inject__free_cloned_event(tool, sample->tid);
+	return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	return perf_inject__clone_event(tool, event, sample);
+}
+
 static int perf_inject__sched_stat(struct perf_tool *tool,
 				   union perf_event *event __maybe_unused,
 				   struct perf_sample *sample,
 				   struct perf_evsel *evsel,
 				   struct machine *machine)
 {
-	struct event_entry *ent;
 	union perf_event *event_sw;
 	struct perf_sample sample_sw;
-	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	struct event_entry *ent = perf_inject__find_cloned_event(tool, pid);
 
-	list_for_each_entry(ent, &inject->samples, node) {
-		if (pid == ent->tid)
-			goto found;
-	}
+	if (ent == NULL)
+		return 0;
 
-	return 0;
-found:
 	event_sw = &ent->event[0];
 	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
 
 	sample_sw.period = sample->period;
 	sample_sw.time	 = sample->time;
-	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
-				      evsel->attr.read_format, &sample_sw,
-				      false);
-	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
-	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+
+	return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+						       event_sw, sample_sw);
+}
+
+static int perf_inject__raw_syscalls_sys_enter(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	return perf_inject__clone_event(tool, event, sample);
+}
+
+static int perf_inject__raw_syscalls_sys_exit(struct perf_tool *tool,
+				   union perf_event *event __maybe_unused,
+				   struct perf_sample *sample,
+				   struct perf_evsel *evsel,
+				   struct machine *machine)
+{
+	union perf_event *event_sw;
+	struct perf_sample sample_sw;
+	struct event_entry *ent = perf_inject__find_cloned_event(tool, sample->tid);
+
+	if (ent == NULL)
+		return 0;
+
+	event_sw = &ent->event[0];
+	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+	sample_sw.period = sample->time - sample_sw.time;
+
+	return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+						       event_sw, sample_sw);
 }
 
 static void sig_handler(int sig __maybe_unused)
@@ -656,6 +720,24 @@ static int __cmd_inject(struct perf_inject *inject)
 			else if (!strncmp(name, "sched:sched_stat_", 17))
 				evsel->handler = perf_inject__sched_stat;
 		}
+	} else if (inject->trace) {
+		struct perf_evsel *evsel;
+
+		evlist__for_each(session->evlist, evsel) {
+			const char *name = perf_evsel__name(evsel);
+
+			if (!strcmp(name, "raw_syscalls:sys_exit")) {
+				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+					return -EINVAL;
+
+				evsel->handler = perf_inject__raw_syscalls_sys_exit;
+			} else if (!strcmp(name, "raw_syscalls:sys_enter")) {
+				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+					return -EINVAL;
+
+				evsel->handler = perf_inject__raw_syscalls_sys_enter;
+			}
+		}
 	} else if (inject->itrace_synth_opts.set) {
 		session->itrace_synth_opts = &inject->itrace_synth_opts;
 		inject->itrace_synth_opts.inject = true;
@@ -775,6 +857,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
 			    "Merge sched-stat and sched-switch for getting events "
 			    "where and how long tasks slept"),
+		OPT_BOOLEAN('t', "trace", &inject.trace,
+			    "Merge raw_syscalls:sys_enter and raw_syscalls:sys_exit trace "
+			    "events to measure how long the individual syscalls took"),
 #ifdef HAVE_JITDUMP
 		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
 #endif
@@ -815,7 +900,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		return -1;
 	}
 
-	inject.tool.ordered_events = inject.sched_stat;
+	inject.tool.ordered_events = inject.sched_stat || inject.trace;
 
 	file.path = inject.input_name;
 	inject.session = perf_session__new(&file, true, &inject.tool);
-- 
2.7.4

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* Re: [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit
  2016-04-01 23:50 [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit Milian Wolff
@ 2016-05-09 17:56 ` Milian Wolff
  2016-08-12 21:38 ` [PATCH v2] " Milian Wolff
  2016-08-30 22:42 ` [PATCH v3] perf inject: Make it possible to merge sys_enter/exit events Milian Wolff
  2 siblings, 0 replies; 4+ messages in thread
From: Milian Wolff @ 2016-05-09 17:56 UTC (permalink / raw)
  To: linux-perf-users; +Cc: acme

[-- Attachment #1: Type: text/plain, Size: 12457 bytes --]

Hey Arnaldo,

I think the patch below got lost, as it is not yet applied upstream and I 
never got any feedback on it. Could you please have a look at it?

Thanks

On Saturday, April 2, 2016 1:50:37 AM CEST Milian Wolff wrote:
> This makes it much more useful to report data recorded with
> `perf trace record`. E.g.:
> 
>     #include <unistd.h>
> 
>     int main()
>     {
>         sleep(1);
>         usleep(1000);
>         usleep(1);
>         return 0;
>     }
> 
>     Compile and trace it:
> 
> # gcc -g sleep.c -o ex_sleep
> # perf trace record --call-graph dwarf ex_sleep
> 
> Previously, the report is not so useful as it aggregates by the number
> of syscalls:
> 
> # perf report --stdio --show-nr-samples --show-total-period \
>               --no-children --sort sym --max-stack 1
> 
>     #
>     # Samples: 46  of event 'raw_syscalls:sys_enter'
>     # Event count (approx.): 46
>     #
>     # Overhead       Samples        Period  Symbol
>     # ........  ............  ............  ..............................
>     #
>     100.00%            46            46  [k] syscall_trace_enter_phase2
> 
>                 |--30.43%--open64
>                 |
>                 |--17.39%--_xstat
>                 |
>                 |--15.22%--mmap64
>                 |
>                 |--8.70%--mprotect
>                 |
>                 |--6.52%--__nanosleep
>                 |
>                 |--4.35%--close
>                 |
>                 |--4.35%--_fxstat
>                 |
>                 |--2.17%--_exit
>                 |
>                 |--2.17%--munmap
>                 |
>                 |--2.17%--access
>                 |
>                 |--2.17%--read
>                 |
>                 |--2.17%--brk
>                 |
>                 |--2.17%--init_tls
> 
> And then the same for sys_exit. Note how the period count equals the
> sample count. Yes, open, stat and mmap are called often, but are
> these syscalls really expensive? Hard to tell.
> 
> This patch adds a new inject command to aggregate the
> sys_enter and sys_exit events and sets the period to the elapsed time:
> 
> # perf inject -t -i perf.data -o perf.data.merged
> 
> This makes the report much nicer:
> 
> # perf report --stdio --show-total-period --no-children --sort sym \
>               --max-stack 1  -i perf.data.merged
> 
>     #
>     # Total Lost Samples: 0
>     #
>     # Samples: 45  of event 'raw_syscalls:sys_enter'
>     # Event count (approx.): 1001520568
>     #
>     # Overhead        Period  Symbol
>     # ........  ............  ..............................
>     #
>     100.00%    1001520568  [k] syscall_trace_enter_phase2
> 
>                 |--99.99%--__nanosleep
> 
> Note that the other syscalls in this example are so cheap they are
> filtered out of the report by default. You can bring them back via
> e.g. `--call-graph graph,0`, or by omitting `--no-children` or
> via --percent-limit 0.
> 
> The report of the merged data can also again use the raw number
> of syscalls for merging, with a pretty unwieldy command:
> 
> # perf report -i perf.data.merged --sort sym --show-nr-samples \
>               -g graph,0,caller,function,count --stdio --max-stack 1
> 
>     #
>     # Samples: 45  of event 'raw_syscalls:sys_enter'
>     # Event count (approx.): 1001520568
>     #
>     # Children      Self       Samples  Symbol
>     # ........  ........  ............  ..............................
>     #
>     100.00%   100.00%            45  [k] syscall_trace_enter_phase2
> 
>                 |--3--__nanosleep
>                 |
>                 |--14--open64
>                 |
>                 |--7--mmap64
>                 |
>                 |--8--_xstat
>                 |
>                 |--4--mprotect
>                 |
>                 |--1--munmap
>                 |
>                 |--2--_fxstat
>                 |
>                 |--1--access
>                 |
>                 |--2--close
>                 |
>                 |--1--read
>                 |
>                 |--1--init_tls
>                 |
>                 |--1--brk
> 
> Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
> ---
>  tools/perf/builtin-inject.c | 141
> +++++++++++++++++++++++++++++++++++--------- 1 file changed, 113
> insertions(+), 28 deletions(-)
> 
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index e5afa8f..3bad330 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -31,6 +31,7 @@ struct perf_inject {
>  	bool			have_auxtrace;
>  	bool			strip;
>  	bool			jit_mode;
> +	bool			trace;
>  	const char		*input_name;
>  	struct perf_data_file	output;
>  	u64			bytes_written;
> @@ -451,17 +452,14 @@ repipe:
>  	return 0;
>  }
> 
> -static int perf_inject__sched_process_exit(struct perf_tool *tool,
> -					   union perf_event *event __maybe_unused,
> -					   struct perf_sample *sample,
> -					   struct perf_evsel *evsel __maybe_unused,
> -					   struct machine *machine __maybe_unused)
> +static int perf_inject__free_cloned_event(struct perf_tool *tool,
> +					  u32 tid)
>  {
>  	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> struct event_entry *ent;
> 
>  	list_for_each_entry(ent, &inject->samples, node) {
> -		if (sample->tid == ent->tid) {
> +		if (tid == ent->tid) {
>  			list_del_init(&ent->node);
>  			free(ent);
>  			break;
> @@ -471,21 +469,18 @@ static int perf_inject__sched_process_exit(struct
> perf_tool *tool, return 0;
>  }
> 
> -static int perf_inject__sched_switch(struct perf_tool *tool,
> -				     union perf_event *event,
> -				     struct perf_sample *sample,
> -				     struct perf_evsel *evsel,
> -				     struct machine *machine)
> +static int perf_inject__clone_event(struct perf_tool *tool,
> +				    union perf_event *event,
> +				    struct perf_sample *sample)
>  {
>  	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> struct event_entry *ent;
> 
> -	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
> -
> +	perf_inject__free_cloned_event(tool, sample->tid);
>  	ent = malloc(event->header.size + sizeof(struct event_entry));
>  	if (ent == NULL) {
>  		color_fprintf(stderr, PERF_COLOR_RED,
> -			     "Not enough memory to process sched switch event!");
> +			     "Not enough memory to clone event!");
>  		return -1;
>  	}
> 
> @@ -495,35 +490,104 @@ static int perf_inject__sched_switch(struct perf_tool
> *tool, return 0;
>  }
> 
> +static struct event_entry *perf_inject__find_cloned_event(struct perf_tool
> *tool, +							  u32 tid)
> +{
> +	struct event_entry *ent;
> +	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> +
> +	list_for_each_entry(ent, &inject->samples, node) {
> +		if (tid == ent->tid)
> +			return ent;
> +	}
> +	return NULL;
> +}
> +
> +static int perf_inject__repipe_cloned_event_sample(struct perf_tool *tool,
> +						   struct perf_evsel *evsel,
> +						   struct machine *machine,
> +						   union perf_event *event_sw,
> +						   struct perf_sample sample_sw)
> +{
> +	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
> +				      evsel->attr.read_format, &sample_sw,
> +				      false);
> +	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
> +	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
> +}
> +
> +static int perf_inject__sched_process_exit(struct perf_tool *tool,
> +					   union perf_event *event __maybe_unused,
> +					   struct perf_sample *sample,
> +					   struct perf_evsel *evsel __maybe_unused,
> +					   struct machine *machine __maybe_unused)
> +{
> +	perf_inject__free_cloned_event(tool, sample->tid);
> +	return 0;
> +}
> +
> +static int perf_inject__sched_switch(struct perf_tool *tool,
> +				     union perf_event *event,
> +				     struct perf_sample *sample,
> +				     struct perf_evsel *evsel __maybe_unused,
> +				     struct machine *machine __maybe_unused)
> +{
> +	return perf_inject__clone_event(tool, event, sample);
> +}
> +
>  static int perf_inject__sched_stat(struct perf_tool *tool,
>  				   union perf_event *event __maybe_unused,
>  				   struct perf_sample *sample,
>  				   struct perf_evsel *evsel,
>  				   struct machine *machine)
>  {
> -	struct event_entry *ent;
>  	union perf_event *event_sw;
>  	struct perf_sample sample_sw;
> -	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> u32 pid = perf_evsel__intval(evsel, sample, "pid");
> +	struct event_entry *ent = perf_inject__find_cloned_event(tool, pid);
> 
> -	list_for_each_entry(ent, &inject->samples, node) {
> -		if (pid == ent->tid)
> -			goto found;
> -	}
> +	if (ent == NULL)
> +		return 0;
> 
> -	return 0;
> -found:
>  	event_sw = &ent->event[0];
>  	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
> 
>  	sample_sw.period = sample->period;
>  	sample_sw.time	 = sample->time;
> -	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
> -				      evsel->attr.read_format, &sample_sw,
> -				      false);
> -	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
> -	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
> +
> +	return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
> +						       event_sw, sample_sw);
> +}
> +
> +static int perf_inject__raw_syscalls_sys_enter(struct perf_tool *tool,
> +				     union perf_event *event,
> +				     struct perf_sample *sample,
> +				     struct perf_evsel *evsel __maybe_unused,
> +				     struct machine *machine __maybe_unused)
> +{
> +	return perf_inject__clone_event(tool, event, sample);
> +}
> +
> +static int perf_inject__raw_syscalls_sys_exit(struct perf_tool *tool,
> +				   union perf_event *event __maybe_unused,
> +				   struct perf_sample *sample,
> +				   struct perf_evsel *evsel,
> +				   struct machine *machine)
> +{
> +	union perf_event *event_sw;
> +	struct perf_sample sample_sw;
> +	struct event_entry *ent = perf_inject__find_cloned_event(tool,
> sample->tid); +
> +	if (ent == NULL)
> +		return 0;
> +
> +	event_sw = &ent->event[0];
> +	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
> +
> +	sample_sw.period = sample->time - sample_sw.time;
> +
> +	return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
> +						       event_sw, sample_sw);
>  }
> 
>  static void sig_handler(int sig __maybe_unused)
> @@ -656,6 +720,24 @@ static int __cmd_inject(struct perf_inject *inject)
>  			else if (!strncmp(name, "sched:sched_stat_", 17))
>  				evsel->handler = perf_inject__sched_stat;
>  		}
> +	} else if (inject->trace) {
> +		struct perf_evsel *evsel;
> +
> +		evlist__for_each(session->evlist, evsel) {
> +			const char *name = perf_evsel__name(evsel);
> +
> +			if (!strcmp(name, "raw_syscalls:sys_exit")) {
> +				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
> +					return -EINVAL;
> +
> +				evsel->handler = perf_inject__raw_syscalls_sys_exit;
> +			} else if (!strcmp(name, "raw_syscalls:sys_enter")) {
> +				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
> +					return -EINVAL;
> +
> +				evsel->handler = perf_inject__raw_syscalls_sys_enter;
> +			}
> +		}
>  	} else if (inject->itrace_synth_opts.set) {
>  		session->itrace_synth_opts = &inject->itrace_synth_opts;
>  		inject->itrace_synth_opts.inject = true;
> @@ -775,6 +857,9 @@ int cmd_inject(int argc, const char **argv, const char
> *prefix __maybe_unused) OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
>  			    "Merge sched-stat and sched-switch for getting events "
>  			    "where and how long tasks slept"),
> +		OPT_BOOLEAN('t', "trace", &inject.trace,
> +			    "Merge raw_syscalls:sys_enter and raw_syscalls:sys_exit trace 
"
> +			    "events to measure how long the individual syscalls took"),
>  #ifdef HAVE_JITDUMP
>  		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into
> perf.data file"), #endif
> @@ -815,7 +900,7 @@ int cmd_inject(int argc, const char **argv, const char
> *prefix __maybe_unused) return -1;
>  	}
> 
> -	inject.tool.ordered_events = inject.sched_stat;
> +	inject.tool.ordered_events = inject.sched_stat || inject.trace;
> 
>  	file.path = inject.input_name;
>  	inject.session = perf_session__new(&file, true, &inject.tool);


-- 
Milian Wolff | milian.wolff@kdab.com | Software Engineer
KDAB (Deutschland) GmbH&Co KG, a KDAB Group company
Tel: +49-30-521325470
KDAB - The Qt Experts

[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5903 bytes --]

^ permalink raw reply	[flat|nested] 4+ messages in thread

* [PATCH v2] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit
  2016-04-01 23:50 [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit Milian Wolff
  2016-05-09 17:56 ` Milian Wolff
@ 2016-08-12 21:38 ` Milian Wolff
  2016-08-30 22:42 ` [PATCH v3] perf inject: Make it possible to merge sys_enter/exit events Milian Wolff
  2 siblings, 0 replies; 4+ messages in thread
From: Milian Wolff @ 2016-08-12 21:38 UTC (permalink / raw)
  To: linux-perf-users; +Cc: acme, Milian Wolff

This makes it much more useful to report data recorded with
`perf trace record`. E.g.:

    #include <unistd.h>

    int main()
    {
        sleep(1);
        usleep(1000);
        usleep(1);
        return 0;
    }

    Compile and trace it:

# gcc -g sleep.c -o ex_sleep
# perf trace record --call-graph dwarf ex_sleep

Previously, the report is not so useful as it aggregates by the number
of syscalls:

# perf report --stdio --show-nr-samples --show-total-period \
              --no-children --sort sym --max-stack 1

    #
    # Samples: 46  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 46
    #
    # Overhead       Samples        Period  Symbol
    # ........  ............  ............  ..............................
    #
    100.00%            46            46  [k] syscall_trace_enter_phase2
                |
                |--30.43%--open64
                |
                |--17.39%--_xstat
                |
                |--15.22%--mmap64
                |
                |--8.70%--mprotect
                |
                |--6.52%--__nanosleep
                |
                |--4.35%--close
                |
                |--4.35%--_fxstat
                |
                |--2.17%--_exit
                |
                |--2.17%--munmap
                |
                |--2.17%--access
                |
                |--2.17%--read
                |
                |--2.17%--brk
                |
                |--2.17%--init_tls

And then the same for sys_exit. Note how the period count equals the
sample count. Yes, open, stat and mmap are called often, but are
these syscalls really expensive? Hard to tell.

This patch adds a new inject command to aggregate the
sys_enter and sys_exit events and sets the period to the elapsed time:

# perf inject -t -i perf.data -o perf.data.merged

This makes the report much nicer:

# perf report --stdio --show-total-period --no-children --sort sym \
              --max-stack 1  -i perf.data.merged

    #
    # Total Lost Samples: 0
    #
    # Samples: 45  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 1001520568
    #
    # Overhead        Period  Symbol
    # ........  ............  ..............................
    #
    100.00%    1001520568  [k] syscall_trace_enter_phase2
                |
                |--99.99%--__nanosleep

Note that the other syscalls in this example are so cheap they are
filtered out of the report by default. You can bring them back via
e.g. `--call-graph graph,0`, or by omitting `--no-children` or
via --percent-limit 0.

The report of the merged data can also again use the raw number
of syscalls for merging, with a pretty unwieldy command:

# perf report -i perf.data.merged --sort sym --show-nr-samples \
              -g graph,0,caller,function,count --stdio --max-stack 1

    #
    # Samples: 45  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 1001520568
    #
    # Children      Self       Samples  Symbol
    # ........  ........  ............  ..............................
    #
    100.00%   100.00%            45  [k] syscall_trace_enter_phase2
                |
                |--3--__nanosleep
                |
                |--14--open64
                |
                |--7--mmap64
                |
                |--8--_xstat
                |
                |--4--mprotect
                |
                |--1--munmap
                |
                |--2--_fxstat
                |
                |--1--access
                |
                |--2--close
                |
                |--1--read
                |
                |--1--init_tls
                |
                |--1--brk

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
---
 tools/perf/builtin-inject.c | 154 ++++++++++++++++++++++++++++++++++++--------
 1 file changed, 126 insertions(+), 28 deletions(-)

diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4c..f1d5a8d 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -31,6 +31,7 @@ struct perf_inject {
 	bool			have_auxtrace;
 	bool			strip;
 	bool			jit_mode;
+	bool			trace;
 	const char		*input_name;
 	struct perf_data_file	output;
 	u64			bytes_written;
@@ -451,17 +452,14 @@ repipe:
 	return 0;
 }
 
-static int perf_inject__sched_process_exit(struct perf_tool *tool,
-					   union perf_event *event __maybe_unused,
-					   struct perf_sample *sample,
-					   struct perf_evsel *evsel __maybe_unused,
-					   struct machine *machine __maybe_unused)
+static int perf_inject__free_cloned_event(struct perf_tool *tool,
+					  u32 tid)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	struct event_entry *ent;
 
 	list_for_each_entry(ent, &inject->samples, node) {
-		if (sample->tid == ent->tid) {
+		if (tid == ent->tid) {
 			list_del_init(&ent->node);
 			free(ent);
 			break;
@@ -471,21 +469,18 @@ static int perf_inject__sched_process_exit(struct perf_tool *tool,
 	return 0;
 }
 
-static int perf_inject__sched_switch(struct perf_tool *tool,
-				     union perf_event *event,
-				     struct perf_sample *sample,
-				     struct perf_evsel *evsel,
-				     struct machine *machine)
+static int perf_inject__clone_event(struct perf_tool *tool,
+				    union perf_event *event,
+				    struct perf_sample *sample)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	struct event_entry *ent;
 
-	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
-
+	perf_inject__free_cloned_event(tool, sample->tid);
 	ent = malloc(event->header.size + sizeof(struct event_entry));
 	if (ent == NULL) {
 		color_fprintf(stderr, PERF_COLOR_RED,
-			     "Not enough memory to process sched switch event!");
+			     "Not enough memory to clone event!");
 		return -1;
 	}
 
@@ -495,35 +490,104 @@ static int perf_inject__sched_switch(struct perf_tool *tool,
 	return 0;
 }
 
+static struct event_entry *perf_inject__find_cloned_event(struct perf_tool *tool,
+							  u32 tid)
+{
+	struct event_entry *ent;
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+	list_for_each_entry(ent, &inject->samples, node) {
+		if (tid == ent->tid)
+			return ent;
+	}
+	return NULL;
+}
+
+static int perf_inject__repipe_cloned_event_sample(struct perf_tool *tool,
+						   struct perf_evsel *evsel,
+						   struct machine *machine,
+						   union perf_event *event_sw,
+						   struct perf_sample sample_sw)
+{
+	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+				      evsel->attr.read_format, &sample_sw,
+				      false);
+	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+					   union perf_event *event __maybe_unused,
+					   struct perf_sample *sample,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct machine *machine __maybe_unused)
+{
+	perf_inject__free_cloned_event(tool, sample->tid);
+	return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	return perf_inject__clone_event(tool, event, sample);
+}
+
 static int perf_inject__sched_stat(struct perf_tool *tool,
 				   union perf_event *event __maybe_unused,
 				   struct perf_sample *sample,
 				   struct perf_evsel *evsel,
 				   struct machine *machine)
 {
-	struct event_entry *ent;
 	union perf_event *event_sw;
 	struct perf_sample sample_sw;
-	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	struct event_entry *ent = perf_inject__find_cloned_event(tool, pid);
 
-	list_for_each_entry(ent, &inject->samples, node) {
-		if (pid == ent->tid)
-			goto found;
-	}
+	if (ent == NULL)
+		return 0;
 
-	return 0;
-found:
 	event_sw = &ent->event[0];
 	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
 
 	sample_sw.period = sample->period;
 	sample_sw.time	 = sample->time;
-	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
-				      evsel->attr.read_format, &sample_sw,
-				      false);
-	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
-	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+
+	return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+						       event_sw, sample_sw);
+}
+
+static int perf_inject__raw_syscalls_sys_enter(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	return perf_inject__clone_event(tool, event, sample);
+}
+
+static int perf_inject__raw_syscalls_sys_exit(struct perf_tool *tool,
+				   union perf_event *event __maybe_unused,
+				   struct perf_sample *sample,
+				   struct perf_evsel *evsel,
+				   struct machine *machine)
+{
+	union perf_event *event_sw;
+	struct perf_sample sample_sw;
+	struct event_entry *ent = perf_inject__find_cloned_event(tool, sample->tid);
+
+	if (ent == NULL)
+		return 0;
+
+	event_sw = &ent->event[0];
+	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+	sample_sw.period = sample->time - sample_sw.time;
+
+	return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+						       event_sw, sample_sw);
 }
 
 static void sig_handler(int sig __maybe_unused)
@@ -656,6 +720,24 @@ static int __cmd_inject(struct perf_inject *inject)
 			else if (!strncmp(name, "sched:sched_stat_", 17))
 				evsel->handler = perf_inject__sched_stat;
 		}
+	} else if (inject->trace) {
+		struct perf_evsel *evsel;
+
+		evlist__for_each_entry(session->evlist, evsel) {
+			const char *name = perf_evsel__name(evsel);
+
+			if (!strcmp(name, "raw_syscalls:sys_exit")) {
+				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+					return -EINVAL;
+
+				evsel->handler = perf_inject__raw_syscalls_sys_exit;
+			} else if (!strcmp(name, "raw_syscalls:sys_enter")) {
+				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+					return -EINVAL;
+
+				evsel->handler = perf_inject__raw_syscalls_sys_enter;
+			}
+		}
 	} else if (inject->itrace_synth_opts.set) {
 		session->itrace_synth_opts = &inject->itrace_synth_opts;
 		inject->itrace_synth_opts.inject = true;
@@ -717,6 +799,19 @@ static int __cmd_inject(struct perf_inject *inject)
 			if (inject->strip)
 				strip_fini(inject);
 		}
+		if (inject->trace) {
+			// remove the now empty sys_exit event list
+			struct perf_evsel *evsel, *tmp;
+
+			evlist__for_each_entry_safe(session->evlist, tmp, evsel) {
+				const char *name = perf_evsel__name(evsel);
+
+				if (!strcmp(name, "raw_syscalls:sys_exit")) {
+				    perf_evlist__remove(session->evlist, evsel);
+				    perf_evsel__delete(evsel);
+				}
+			}
+		}
 		session->header.data_offset = output_data_offset;
 		session->header.data_size = inject->bytes_written;
 		perf_session__write_header(session, session->evlist, fd, true);
@@ -775,6 +870,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
 			    "Merge sched-stat and sched-switch for getting events "
 			    "where and how long tasks slept"),
+		OPT_BOOLEAN('t', "trace", &inject.trace,
+			    "Merge raw_syscalls:sys_enter and raw_syscalls:sys_exit trace "
+			    "events to measure how long the individual syscalls took"),
 #ifdef HAVE_JITDUMP
 		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
 #endif
@@ -815,7 +913,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		return -1;
 	}
 
-	inject.tool.ordered_events = inject.sched_stat;
+	inject.tool.ordered_events = inject.sched_stat || inject.trace;
 
 	file.path = inject.input_name;
 	inject.session = perf_session__new(&file, true, &inject.tool);
-- 
2.9.2

^ permalink raw reply related	[flat|nested] 4+ messages in thread

* [PATCH v3] perf inject: Make it possible to merge sys_enter/exit events
  2016-04-01 23:50 [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit Milian Wolff
  2016-05-09 17:56 ` Milian Wolff
  2016-08-12 21:38 ` [PATCH v2] " Milian Wolff
@ 2016-08-30 22:42 ` Milian Wolff
  2 siblings, 0 replies; 4+ messages in thread
From: Milian Wolff @ 2016-08-30 22:42 UTC (permalink / raw)
  To: linux-perf-users; +Cc: linux-kernel, acme, Milian Wolff

This makes it much more useful to report data recorded with
`perf trace record`. E.g.:

    #include <unistd.h>

    int main()
    {
        sleep(1);
        usleep(1000);
        usleep(1);
        return 0;
    }

    Compile and trace it:

# gcc -g sleep.c -o ex_sleep
# perf trace record --call-graph dwarf ex_sleep

Previously, the report is not so useful as it aggregates by the number
of syscalls:

# perf report --stdio --show-nr-samples --show-total-period \
              --no-children --sort sym --max-stack 1

    #
    # Samples: 46  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 46
    #
    # Overhead       Samples        Period  Symbol
    # ........  ............  ............  ..............................
    #
    100.00%            46            46  [k] syscall_trace_enter_phase2
                |
                |--30.43%--open64
                |
                |--17.39%--_xstat
                |
                |--15.22%--mmap64
                |
                |--8.70%--mprotect
                |
                |--6.52%--__nanosleep
                |
                |--4.35%--close
                |
                |--4.35%--_fxstat
                |
                |--2.17%--_exit
                |
                |--2.17%--munmap
                |
                |--2.17%--access
                |
                |--2.17%--read
                |
                |--2.17%--brk
                |
                |--2.17%--init_tls

And then the same for sys_exit. Note how the period count equals the
sample count. Yes, open, stat and mmap are called often, but are
these syscalls really expensive? Hard to tell.

This patch adds a new inject command to aggregate the
sys_enter and sys_exit events and sets the period to the elapsed time:

# perf inject -t -i perf.data -o perf.data.merged

This makes the report much nicer:

# perf report --stdio --show-total-period --no-children --sort sym \
              --max-stack 1  -i perf.data.merged

    #
    # Total Lost Samples: 0
    #
    # Samples: 45  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 1001520568
    #
    # Overhead        Period  Symbol
    # ........  ............  ..............................
    #
    100.00%    1001520568  [k] syscall_trace_enter_phase2
                |
                |--99.99%--__nanosleep

Note that the other syscalls in this example are so cheap they are
filtered out of the report by default. You can bring them back via
e.g. `--call-graph graph,0`, or by omitting `--no-children` or
via --percent-limit 0.

The report of the merged data can also again use the raw number
of syscalls for merging, with a pretty unwieldy command:

# perf report -i perf.data.merged --sort sym --show-nr-samples \
              -g graph,0,caller,function,count --stdio --max-stack 1

    #
    # Samples: 45  of event 'raw_syscalls:sys_enter'
    # Event count (approx.): 1001520568
    #
    # Children      Self       Samples  Symbol
    # ........  ........  ............  ..............................
    #
    100.00%   100.00%            45  [k] syscall_trace_enter_phase2
                |
                |--3--__nanosleep
                |
                |--14--open64
                |
                |--7--mmap64
                |
                |--8--_xstat
                |
                |--4--mprotect
                |
                |--1--munmap
                |
                |--2--_fxstat
                |
                |--1--access
                |
                |--2--close
                |
                |--1--read
                |
                |--1--init_tls
                |
                |--1--brk

Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
---
 tools/perf/Documentation/perf-inject.txt |   5 +
 tools/perf/builtin-inject.c              | 157 +++++++++++++++++++++++++------
 2 files changed, 134 insertions(+), 28 deletions(-)

diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 87b2588..ecfa1ea 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -41,6 +41,11 @@ OPTIONS
 	tasks slept. sched_switch contains a callchain where a task slept and
 	sched_stat contains a timeslice how long a task slept.
 
+-t::
+--trace::
+	Merge sys_enter and sys_exit trace events to measure how long the
+	individual syscalls took.
+
 --kallsyms=<file>::
 	kallsyms pathname
 
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4c..8b959bf 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -31,6 +31,7 @@ struct perf_inject {
 	bool			have_auxtrace;
 	bool			strip;
 	bool			jit_mode;
+	bool			trace;
 	const char		*input_name;
 	struct perf_data_file	output;
 	u64			bytes_written;
@@ -451,17 +452,14 @@ repipe:
 	return 0;
 }
 
-static int perf_inject__sched_process_exit(struct perf_tool *tool,
-					   union perf_event *event __maybe_unused,
-					   struct perf_sample *sample,
-					   struct perf_evsel *evsel __maybe_unused,
-					   struct machine *machine __maybe_unused)
+static int perf_inject__free_cloned_event(struct perf_tool *tool,
+					  u32 tid)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	struct event_entry *ent;
 
 	list_for_each_entry(ent, &inject->samples, node) {
-		if (sample->tid == ent->tid) {
+		if (tid == ent->tid) {
 			list_del_init(&ent->node);
 			free(ent);
 			break;
@@ -471,21 +469,18 @@ static int perf_inject__sched_process_exit(struct perf_tool *tool,
 	return 0;
 }
 
-static int perf_inject__sched_switch(struct perf_tool *tool,
-				     union perf_event *event,
-				     struct perf_sample *sample,
-				     struct perf_evsel *evsel,
-				     struct machine *machine)
+static int perf_inject__clone_event(struct perf_tool *tool,
+				    union perf_event *event,
+				    struct perf_sample *sample)
 {
 	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	struct event_entry *ent;
 
-	perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
-
+	perf_inject__free_cloned_event(tool, sample->tid);
 	ent = malloc(event->header.size + sizeof(struct event_entry));
 	if (ent == NULL) {
 		color_fprintf(stderr, PERF_COLOR_RED,
-			     "Not enough memory to process sched switch event!");
+			     "Not enough memory to clone event!");
 		return -1;
 	}
 
@@ -495,35 +490,104 @@ static int perf_inject__sched_switch(struct perf_tool *tool,
 	return 0;
 }
 
+static struct event_entry *perf_inject__find_cloned_event(struct perf_tool *tool,
+							  u32 tid)
+{
+	struct event_entry *ent;
+	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+	list_for_each_entry(ent, &inject->samples, node) {
+		if (tid == ent->tid)
+			return ent;
+	}
+	return NULL;
+}
+
+static int perf_inject__repipe_cloned_event_sample(struct perf_tool *tool,
+						   struct perf_evsel *evsel,
+						   struct machine *machine,
+						   union perf_event *event_sw,
+						   struct perf_sample sample_sw)
+{
+	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+				      evsel->attr.read_format, &sample_sw,
+				      false);
+	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+					   union perf_event *event __maybe_unused,
+					   struct perf_sample *sample,
+					   struct perf_evsel *evsel __maybe_unused,
+					   struct machine *machine __maybe_unused)
+{
+	perf_inject__free_cloned_event(tool, sample->tid);
+	return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+				     union perf_event *event,
+				     struct perf_sample *sample,
+				     struct perf_evsel *evsel __maybe_unused,
+				     struct machine *machine __maybe_unused)
+{
+	return perf_inject__clone_event(tool, event, sample);
+}
+
 static int perf_inject__sched_stat(struct perf_tool *tool,
 				   union perf_event *event __maybe_unused,
 				   struct perf_sample *sample,
 				   struct perf_evsel *evsel,
 				   struct machine *machine)
 {
-	struct event_entry *ent;
 	union perf_event *event_sw;
 	struct perf_sample sample_sw;
-	struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
 	u32 pid = perf_evsel__intval(evsel, sample, "pid");
+	struct event_entry *ent = perf_inject__find_cloned_event(tool, pid);
 
-	list_for_each_entry(ent, &inject->samples, node) {
-		if (pid == ent->tid)
-			goto found;
-	}
+	if (ent == NULL)
+		return 0;
 
-	return 0;
-found:
 	event_sw = &ent->event[0];
 	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
 
 	sample_sw.period = sample->period;
 	sample_sw.time	 = sample->time;
-	perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
-				      evsel->attr.read_format, &sample_sw,
-				      false);
-	build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
-	return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+
+	return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+						       event_sw, sample_sw);
+}
+
+static int perf_inject__sys_enter(struct perf_tool *tool,
+				  union perf_event *event,
+				  struct perf_sample *sample,
+				  struct perf_evsel *evsel __maybe_unused,
+				  struct machine *machine __maybe_unused)
+{
+	return perf_inject__clone_event(tool, event, sample);
+}
+
+static int perf_inject__sys_exit(struct perf_tool *tool,
+				 union perf_event *event __maybe_unused,
+				 struct perf_sample *sample,
+				 struct perf_evsel *evsel,
+				 struct machine *machine)
+{
+	union perf_event *event_sw;
+	struct perf_sample sample_sw;
+	struct event_entry *ent = perf_inject__find_cloned_event(tool, sample->tid);
+
+	if (ent == NULL)
+		return 0;
+
+	event_sw = &ent->event[0];
+	perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+	sample_sw.period = sample->time - sample_sw.time;
+
+	return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+						       event_sw, sample_sw);
 }
 
 static void sig_handler(int sig __maybe_unused)
@@ -656,6 +720,26 @@ static int __cmd_inject(struct perf_inject *inject)
 			else if (!strncmp(name, "sched:sched_stat_", 17))
 				evsel->handler = perf_inject__sched_stat;
 		}
+	} else if (inject->trace) {
+		struct perf_evsel *evsel;
+
+		evlist__for_each_entry(session->evlist, evsel) {
+			const char *name = perf_evsel__name(evsel);
+
+			if (!strcmp(name, "raw_syscalls:sys_exit") ||
+			    !strncmp(name, "syscalls:sys_exit_", sizeof("syscalls:sys_exit_") - 1)) {
+				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+					return -EINVAL;
+
+				evsel->handler = perf_inject__sys_exit;
+			} else if (!strcmp(name, "raw_syscalls:sys_enter") ||
+			    !strncmp(name, "syscalls:sys_enter_", sizeof("syscalls:sys_enter_") - 1)) {
+				if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+					return -EINVAL;
+
+				evsel->handler = perf_inject__sys_enter;
+			}
+		}
 	} else if (inject->itrace_synth_opts.set) {
 		session->itrace_synth_opts = &inject->itrace_synth_opts;
 		inject->itrace_synth_opts.inject = true;
@@ -717,6 +801,20 @@ static int __cmd_inject(struct perf_inject *inject)
 			if (inject->strip)
 				strip_fini(inject);
 		}
+		if (inject->trace) {
+			// remove the now empty sys_exit event list
+			struct perf_evsel *evsel, *tmp;
+
+			evlist__for_each_entry_safe(session->evlist, tmp, evsel) {
+				const char *name = perf_evsel__name(evsel);
+
+				if (!strcmp(name, "raw_syscalls:sys_exit") ||
+				    !strncmp(name, "syscalls:sys_exit_", sizeof("syscalls:sys_exit_") - 1)) {
+				    perf_evlist__remove(session->evlist, evsel);
+				    perf_evsel__delete(evsel);
+				}
+			}
+		}
 		session->header.data_offset = output_data_offset;
 		session->header.data_size = inject->bytes_written;
 		perf_session__write_header(session, session->evlist, fd, true);
@@ -775,6 +873,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
 			    "Merge sched-stat and sched-switch for getting events "
 			    "where and how long tasks slept"),
+		OPT_BOOLEAN('t', "trace", &inject.trace,
+			    "Merge sys_enter and sys_exit trace events to measure "
+			    "how long the individual syscalls took"),
 #ifdef HAVE_JITDUMP
 		OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
 #endif
@@ -815,7 +916,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
 		return -1;
 	}
 
-	inject.tool.ordered_events = inject.sched_stat;
+	inject.tool.ordered_events = inject.sched_stat || inject.trace;
 
 	file.path = inject.input_name;
 	inject.session = perf_session__new(&file, true, &inject.tool);
-- 
2.9.3

^ permalink raw reply related	[flat|nested] 4+ messages in thread

end of thread, other threads:[~2016-08-30 22:43 UTC | newest]

Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-01 23:50 [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit Milian Wolff
2016-05-09 17:56 ` Milian Wolff
2016-08-12 21:38 ` [PATCH v2] " Milian Wolff
2016-08-30 22:42 ` [PATCH v3] perf inject: Make it possible to merge sys_enter/exit events Milian Wolff

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).