* [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit
@ 2016-04-01 23:50 Milian Wolff
2016-05-09 17:56 ` Milian Wolff
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Milian Wolff @ 2016-04-01 23:50 UTC (permalink / raw)
To: linux-perf-users; +Cc: acme, Milian Wolff
This makes it much more useful to report data recorded with
`perf trace record`. E.g.:
#include <unistd.h>
int main()
{
sleep(1);
usleep(1000);
usleep(1);
return 0;
}
Compile and trace it:
# gcc -g sleep.c -o ex_sleep
# perf trace record --call-graph dwarf ex_sleep
Previously, the report is not so useful as it aggregates by the number
of syscalls:
# perf report --stdio --show-nr-samples --show-total-period \
--no-children --sort sym --max-stack 1
#
# Samples: 46 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 46
#
# Overhead Samples Period Symbol
# ........ ............ ............ ..............................
#
100.00% 46 46 [k] syscall_trace_enter_phase2
|
|--30.43%--open64
|
|--17.39%--_xstat
|
|--15.22%--mmap64
|
|--8.70%--mprotect
|
|--6.52%--__nanosleep
|
|--4.35%--close
|
|--4.35%--_fxstat
|
|--2.17%--_exit
|
|--2.17%--munmap
|
|--2.17%--access
|
|--2.17%--read
|
|--2.17%--brk
|
|--2.17%--init_tls
And then the same for sys_exit. Note how the period count equals the
sample count. Yes, open, stat and mmap are called often, but are
these syscalls really expensive? Hard to tell.
This patch adds a new inject command to aggregate the
sys_enter and sys_exit events and sets the period to the elapsed time:
# perf inject -t -i perf.data -o perf.data.merged
This makes the report much nicer:
# perf report --stdio --show-total-period --no-children --sort sym \
--max-stack 1 -i perf.data.merged
#
# Total Lost Samples: 0
#
# Samples: 45 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 1001520568
#
# Overhead Period Symbol
# ........ ............ ..............................
#
100.00% 1001520568 [k] syscall_trace_enter_phase2
|
|--99.99%--__nanosleep
Note that the other syscalls in this example are so cheap they are
filtered out of the report by default. You can bring them back via
e.g. `--call-graph graph,0`, or by omitting `--no-children` or
via --percent-limit 0.
The report of the merged data can also again use the raw number
of syscalls for merging, with a pretty unwieldy command:
# perf report -i perf.data.merged --sort sym --show-nr-samples \
-g graph,0,caller,function,count --stdio --max-stack 1
#
# Samples: 45 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 1001520568
#
# Children Self Samples Symbol
# ........ ........ ............ ..............................
#
100.00% 100.00% 45 [k] syscall_trace_enter_phase2
|
|--3--__nanosleep
|
|--14--open64
|
|--7--mmap64
|
|--8--_xstat
|
|--4--mprotect
|
|--1--munmap
|
|--2--_fxstat
|
|--1--access
|
|--2--close
|
|--1--read
|
|--1--init_tls
|
|--1--brk
Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
---
tools/perf/builtin-inject.c | 141 +++++++++++++++++++++++++++++++++++---------
1 file changed, 113 insertions(+), 28 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index e5afa8f..3bad330 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -31,6 +31,7 @@ struct perf_inject {
bool have_auxtrace;
bool strip;
bool jit_mode;
+ bool trace;
const char *input_name;
struct perf_data_file output;
u64 bytes_written;
@@ -451,17 +452,14 @@ repipe:
return 0;
}
-static int perf_inject__sched_process_exit(struct perf_tool *tool,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample,
- struct perf_evsel *evsel __maybe_unused,
- struct machine *machine __maybe_unused)
+static int perf_inject__free_cloned_event(struct perf_tool *tool,
+ u32 tid)
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
struct event_entry *ent;
list_for_each_entry(ent, &inject->samples, node) {
- if (sample->tid == ent->tid) {
+ if (tid == ent->tid) {
list_del_init(&ent->node);
free(ent);
break;
@@ -471,21 +469,18 @@ static int perf_inject__sched_process_exit(struct perf_tool *tool,
return 0;
}
-static int perf_inject__sched_switch(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct perf_evsel *evsel,
- struct machine *machine)
+static int perf_inject__clone_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample)
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
struct event_entry *ent;
- perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
-
+ perf_inject__free_cloned_event(tool, sample->tid);
ent = malloc(event->header.size + sizeof(struct event_entry));
if (ent == NULL) {
color_fprintf(stderr, PERF_COLOR_RED,
- "Not enough memory to process sched switch event!");
+ "Not enough memory to clone event!");
return -1;
}
@@ -495,35 +490,104 @@ static int perf_inject__sched_switch(struct perf_tool *tool,
return 0;
}
+static struct event_entry *perf_inject__find_cloned_event(struct perf_tool *tool,
+ u32 tid)
+{
+ struct event_entry *ent;
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+ list_for_each_entry(ent, &inject->samples, node) {
+ if (tid == ent->tid)
+ return ent;
+ }
+ return NULL;
+}
+
+static int perf_inject__repipe_cloned_event_sample(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ union perf_event *event_sw,
+ struct perf_sample sample_sw)
+{
+ perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+ evsel->attr.read_format, &sample_sw,
+ false);
+ build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+ return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ perf_inject__free_cloned_event(tool, sample->tid);
+ return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return perf_inject__clone_event(tool, event, sample);
+}
+
static int perf_inject__sched_stat(struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
- struct event_entry *ent;
union perf_event *event_sw;
struct perf_sample sample_sw;
- struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ struct event_entry *ent = perf_inject__find_cloned_event(tool, pid);
- list_for_each_entry(ent, &inject->samples, node) {
- if (pid == ent->tid)
- goto found;
- }
+ if (ent == NULL)
+ return 0;
- return 0;
-found:
event_sw = &ent->event[0];
perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
sample_sw.period = sample->period;
sample_sw.time = sample->time;
- perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
- evsel->attr.read_format, &sample_sw,
- false);
- build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
- return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+
+ return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+ event_sw, sample_sw);
+}
+
+static int perf_inject__raw_syscalls_sys_enter(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return perf_inject__clone_event(tool, event, sample);
+}
+
+static int perf_inject__raw_syscalls_sys_exit(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ union perf_event *event_sw;
+ struct perf_sample sample_sw;
+ struct event_entry *ent = perf_inject__find_cloned_event(tool, sample->tid);
+
+ if (ent == NULL)
+ return 0;
+
+ event_sw = &ent->event[0];
+ perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+ sample_sw.period = sample->time - sample_sw.time;
+
+ return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+ event_sw, sample_sw);
}
static void sig_handler(int sig __maybe_unused)
@@ -656,6 +720,24 @@ static int __cmd_inject(struct perf_inject *inject)
else if (!strncmp(name, "sched:sched_stat_", 17))
evsel->handler = perf_inject__sched_stat;
}
+ } else if (inject->trace) {
+ struct perf_evsel *evsel;
+
+ evlist__for_each(session->evlist, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "raw_syscalls:sys_exit")) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ return -EINVAL;
+
+ evsel->handler = perf_inject__raw_syscalls_sys_exit;
+ } else if (!strcmp(name, "raw_syscalls:sys_enter")) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ return -EINVAL;
+
+ evsel->handler = perf_inject__raw_syscalls_sys_enter;
+ }
+ }
} else if (inject->itrace_synth_opts.set) {
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
@@ -775,6 +857,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
"Merge sched-stat and sched-switch for getting events "
"where and how long tasks slept"),
+ OPT_BOOLEAN('t', "trace", &inject.trace,
+ "Merge raw_syscalls:sys_enter and raw_syscalls:sys_exit trace "
+ "events to measure how long the individual syscalls took"),
#ifdef HAVE_JITDUMP
OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
#endif
@@ -815,7 +900,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
return -1;
}
- inject.tool.ordered_events = inject.sched_stat;
+ inject.tool.ordered_events = inject.sched_stat || inject.trace;
file.path = inject.input_name;
inject.session = perf_session__new(&file, true, &inject.tool);
--
2.7.4
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit
2016-04-01 23:50 [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit Milian Wolff
@ 2016-05-09 17:56 ` Milian Wolff
2016-08-12 21:38 ` [PATCH v2] " Milian Wolff
2016-08-30 22:42 ` [PATCH v3] perf inject: Make it possible to merge sys_enter/exit events Milian Wolff
2 siblings, 0 replies; 4+ messages in thread
From: Milian Wolff @ 2016-05-09 17:56 UTC (permalink / raw)
To: linux-perf-users; +Cc: acme
[-- Attachment #1: Type: text/plain, Size: 12457 bytes --]
Hey Arnaldo,
I think the patch below got lost, as it is not yet applied upstream and I
never got any feedback on it. Could you please have a look at it?
Thanks
On Saturday, April 2, 2016 1:50:37 AM CEST Milian Wolff wrote:
> This makes it much more useful to report data recorded with
> `perf trace record`. E.g.:
>
> #include <unistd.h>
>
> int main()
> {
> sleep(1);
> usleep(1000);
> usleep(1);
> return 0;
> }
>
> Compile and trace it:
>
> # gcc -g sleep.c -o ex_sleep
> # perf trace record --call-graph dwarf ex_sleep
>
> Previously, the report is not so useful as it aggregates by the number
> of syscalls:
>
> # perf report --stdio --show-nr-samples --show-total-period \
> --no-children --sort sym --max-stack 1
>
> #
> # Samples: 46 of event 'raw_syscalls:sys_enter'
> # Event count (approx.): 46
> #
> # Overhead Samples Period Symbol
> # ........ ............ ............ ..............................
> #
> 100.00% 46 46 [k] syscall_trace_enter_phase2
>
> |--30.43%--open64
> |
> |--17.39%--_xstat
> |
> |--15.22%--mmap64
> |
> |--8.70%--mprotect
> |
> |--6.52%--__nanosleep
> |
> |--4.35%--close
> |
> |--4.35%--_fxstat
> |
> |--2.17%--_exit
> |
> |--2.17%--munmap
> |
> |--2.17%--access
> |
> |--2.17%--read
> |
> |--2.17%--brk
> |
> |--2.17%--init_tls
>
> And then the same for sys_exit. Note how the period count equals the
> sample count. Yes, open, stat and mmap are called often, but are
> these syscalls really expensive? Hard to tell.
>
> This patch adds a new inject command to aggregate the
> sys_enter and sys_exit events and sets the period to the elapsed time:
>
> # perf inject -t -i perf.data -o perf.data.merged
>
> This makes the report much nicer:
>
> # perf report --stdio --show-total-period --no-children --sort sym \
> --max-stack 1 -i perf.data.merged
>
> #
> # Total Lost Samples: 0
> #
> # Samples: 45 of event 'raw_syscalls:sys_enter'
> # Event count (approx.): 1001520568
> #
> # Overhead Period Symbol
> # ........ ............ ..............................
> #
> 100.00% 1001520568 [k] syscall_trace_enter_phase2
>
> |--99.99%--__nanosleep
>
> Note that the other syscalls in this example are so cheap they are
> filtered out of the report by default. You can bring them back via
> e.g. `--call-graph graph,0`, or by omitting `--no-children` or
> via --percent-limit 0.
>
> The report of the merged data can also again use the raw number
> of syscalls for merging, with a pretty unwieldy command:
>
> # perf report -i perf.data.merged --sort sym --show-nr-samples \
> -g graph,0,caller,function,count --stdio --max-stack 1
>
> #
> # Samples: 45 of event 'raw_syscalls:sys_enter'
> # Event count (approx.): 1001520568
> #
> # Children Self Samples Symbol
> # ........ ........ ............ ..............................
> #
> 100.00% 100.00% 45 [k] syscall_trace_enter_phase2
>
> |--3--__nanosleep
> |
> |--14--open64
> |
> |--7--mmap64
> |
> |--8--_xstat
> |
> |--4--mprotect
> |
> |--1--munmap
> |
> |--2--_fxstat
> |
> |--1--access
> |
> |--2--close
> |
> |--1--read
> |
> |--1--init_tls
> |
> |--1--brk
>
> Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
> ---
> tools/perf/builtin-inject.c | 141
> +++++++++++++++++++++++++++++++++++--------- 1 file changed, 113
> insertions(+), 28 deletions(-)
>
> diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
> index e5afa8f..3bad330 100644
> --- a/tools/perf/builtin-inject.c
> +++ b/tools/perf/builtin-inject.c
> @@ -31,6 +31,7 @@ struct perf_inject {
> bool have_auxtrace;
> bool strip;
> bool jit_mode;
> + bool trace;
> const char *input_name;
> struct perf_data_file output;
> u64 bytes_written;
> @@ -451,17 +452,14 @@ repipe:
> return 0;
> }
>
> -static int perf_inject__sched_process_exit(struct perf_tool *tool,
> - union perf_event *event __maybe_unused,
> - struct perf_sample *sample,
> - struct perf_evsel *evsel __maybe_unused,
> - struct machine *machine __maybe_unused)
> +static int perf_inject__free_cloned_event(struct perf_tool *tool,
> + u32 tid)
> {
> struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> struct event_entry *ent;
>
> list_for_each_entry(ent, &inject->samples, node) {
> - if (sample->tid == ent->tid) {
> + if (tid == ent->tid) {
> list_del_init(&ent->node);
> free(ent);
> break;
> @@ -471,21 +469,18 @@ static int perf_inject__sched_process_exit(struct
> perf_tool *tool, return 0;
> }
>
> -static int perf_inject__sched_switch(struct perf_tool *tool,
> - union perf_event *event,
> - struct perf_sample *sample,
> - struct perf_evsel *evsel,
> - struct machine *machine)
> +static int perf_inject__clone_event(struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample)
> {
> struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> struct event_entry *ent;
>
> - perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
> -
> + perf_inject__free_cloned_event(tool, sample->tid);
> ent = malloc(event->header.size + sizeof(struct event_entry));
> if (ent == NULL) {
> color_fprintf(stderr, PERF_COLOR_RED,
> - "Not enough memory to process sched switch event!");
> + "Not enough memory to clone event!");
> return -1;
> }
>
> @@ -495,35 +490,104 @@ static int perf_inject__sched_switch(struct perf_tool
> *tool, return 0;
> }
>
> +static struct event_entry *perf_inject__find_cloned_event(struct perf_tool
> *tool, + u32 tid)
> +{
> + struct event_entry *ent;
> + struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> +
> + list_for_each_entry(ent, &inject->samples, node) {
> + if (tid == ent->tid)
> + return ent;
> + }
> + return NULL;
> +}
> +
> +static int perf_inject__repipe_cloned_event_sample(struct perf_tool *tool,
> + struct perf_evsel *evsel,
> + struct machine *machine,
> + union perf_event *event_sw,
> + struct perf_sample sample_sw)
> +{
> + perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
> + evsel->attr.read_format, &sample_sw,
> + false);
> + build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
> + return perf_event__repipe(tool, event_sw, &sample_sw, machine);
> +}
> +
> +static int perf_inject__sched_process_exit(struct perf_tool *tool,
> + union perf_event *event __maybe_unused,
> + struct perf_sample *sample,
> + struct perf_evsel *evsel __maybe_unused,
> + struct machine *machine __maybe_unused)
> +{
> + perf_inject__free_cloned_event(tool, sample->tid);
> + return 0;
> +}
> +
> +static int perf_inject__sched_switch(struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct perf_evsel *evsel __maybe_unused,
> + struct machine *machine __maybe_unused)
> +{
> + return perf_inject__clone_event(tool, event, sample);
> +}
> +
> static int perf_inject__sched_stat(struct perf_tool *tool,
> union perf_event *event __maybe_unused,
> struct perf_sample *sample,
> struct perf_evsel *evsel,
> struct machine *machine)
> {
> - struct event_entry *ent;
> union perf_event *event_sw;
> struct perf_sample sample_sw;
> - struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
> u32 pid = perf_evsel__intval(evsel, sample, "pid");
> + struct event_entry *ent = perf_inject__find_cloned_event(tool, pid);
>
> - list_for_each_entry(ent, &inject->samples, node) {
> - if (pid == ent->tid)
> - goto found;
> - }
> + if (ent == NULL)
> + return 0;
>
> - return 0;
> -found:
> event_sw = &ent->event[0];
> perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
>
> sample_sw.period = sample->period;
> sample_sw.time = sample->time;
> - perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
> - evsel->attr.read_format, &sample_sw,
> - false);
> - build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
> - return perf_event__repipe(tool, event_sw, &sample_sw, machine);
> +
> + return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
> + event_sw, sample_sw);
> +}
> +
> +static int perf_inject__raw_syscalls_sys_enter(struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct perf_evsel *evsel __maybe_unused,
> + struct machine *machine __maybe_unused)
> +{
> + return perf_inject__clone_event(tool, event, sample);
> +}
> +
> +static int perf_inject__raw_syscalls_sys_exit(struct perf_tool *tool,
> + union perf_event *event __maybe_unused,
> + struct perf_sample *sample,
> + struct perf_evsel *evsel,
> + struct machine *machine)
> +{
> + union perf_event *event_sw;
> + struct perf_sample sample_sw;
> + struct event_entry *ent = perf_inject__find_cloned_event(tool,
> sample->tid); +
> + if (ent == NULL)
> + return 0;
> +
> + event_sw = &ent->event[0];
> + perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
> +
> + sample_sw.period = sample->time - sample_sw.time;
> +
> + return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
> + event_sw, sample_sw);
> }
>
> static void sig_handler(int sig __maybe_unused)
> @@ -656,6 +720,24 @@ static int __cmd_inject(struct perf_inject *inject)
> else if (!strncmp(name, "sched:sched_stat_", 17))
> evsel->handler = perf_inject__sched_stat;
> }
> + } else if (inject->trace) {
> + struct perf_evsel *evsel;
> +
> + evlist__for_each(session->evlist, evsel) {
> + const char *name = perf_evsel__name(evsel);
> +
> + if (!strcmp(name, "raw_syscalls:sys_exit")) {
> + if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
> + return -EINVAL;
> +
> + evsel->handler = perf_inject__raw_syscalls_sys_exit;
> + } else if (!strcmp(name, "raw_syscalls:sys_enter")) {
> + if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
> + return -EINVAL;
> +
> + evsel->handler = perf_inject__raw_syscalls_sys_enter;
> + }
> + }
> } else if (inject->itrace_synth_opts.set) {
> session->itrace_synth_opts = &inject->itrace_synth_opts;
> inject->itrace_synth_opts.inject = true;
> @@ -775,6 +857,9 @@ int cmd_inject(int argc, const char **argv, const char
> *prefix __maybe_unused) OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
> "Merge sched-stat and sched-switch for getting events "
> "where and how long tasks slept"),
> + OPT_BOOLEAN('t', "trace", &inject.trace,
> + "Merge raw_syscalls:sys_enter and raw_syscalls:sys_exit trace
"
> + "events to measure how long the individual syscalls took"),
> #ifdef HAVE_JITDUMP
> OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into
> perf.data file"), #endif
> @@ -815,7 +900,7 @@ int cmd_inject(int argc, const char **argv, const char
> *prefix __maybe_unused) return -1;
> }
>
> - inject.tool.ordered_events = inject.sched_stat;
> + inject.tool.ordered_events = inject.sched_stat || inject.trace;
>
> file.path = inject.input_name;
> inject.session = perf_session__new(&file, true, &inject.tool);
--
Milian Wolff | milian.wolff@kdab.com | Software Engineer
KDAB (Deutschland) GmbH&Co KG, a KDAB Group company
Tel: +49-30-521325470
KDAB - The Qt Experts
[-- Attachment #2: smime.p7s --]
[-- Type: application/pkcs7-signature, Size: 5903 bytes --]
^ permalink raw reply [flat|nested] 4+ messages in thread
* [PATCH v2] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit
2016-04-01 23:50 [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit Milian Wolff
2016-05-09 17:56 ` Milian Wolff
@ 2016-08-12 21:38 ` Milian Wolff
2016-08-30 22:42 ` [PATCH v3] perf inject: Make it possible to merge sys_enter/exit events Milian Wolff
2 siblings, 0 replies; 4+ messages in thread
From: Milian Wolff @ 2016-08-12 21:38 UTC (permalink / raw)
To: linux-perf-users; +Cc: acme, Milian Wolff
This makes it much more useful to report data recorded with
`perf trace record`. E.g.:
#include <unistd.h>
int main()
{
sleep(1);
usleep(1000);
usleep(1);
return 0;
}
Compile and trace it:
# gcc -g sleep.c -o ex_sleep
# perf trace record --call-graph dwarf ex_sleep
Previously, the report is not so useful as it aggregates by the number
of syscalls:
# perf report --stdio --show-nr-samples --show-total-period \
--no-children --sort sym --max-stack 1
#
# Samples: 46 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 46
#
# Overhead Samples Period Symbol
# ........ ............ ............ ..............................
#
100.00% 46 46 [k] syscall_trace_enter_phase2
|
|--30.43%--open64
|
|--17.39%--_xstat
|
|--15.22%--mmap64
|
|--8.70%--mprotect
|
|--6.52%--__nanosleep
|
|--4.35%--close
|
|--4.35%--_fxstat
|
|--2.17%--_exit
|
|--2.17%--munmap
|
|--2.17%--access
|
|--2.17%--read
|
|--2.17%--brk
|
|--2.17%--init_tls
And then the same for sys_exit. Note how the period count equals the
sample count. Yes, open, stat and mmap are called often, but are
these syscalls really expensive? Hard to tell.
This patch adds a new inject command to aggregate the
sys_enter and sys_exit events and sets the period to the elapsed time:
# perf inject -t -i perf.data -o perf.data.merged
This makes the report much nicer:
# perf report --stdio --show-total-period --no-children --sort sym \
--max-stack 1 -i perf.data.merged
#
# Total Lost Samples: 0
#
# Samples: 45 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 1001520568
#
# Overhead Period Symbol
# ........ ............ ..............................
#
100.00% 1001520568 [k] syscall_trace_enter_phase2
|
|--99.99%--__nanosleep
Note that the other syscalls in this example are so cheap they are
filtered out of the report by default. You can bring them back via
e.g. `--call-graph graph,0`, or by omitting `--no-children` or
via --percent-limit 0.
The report of the merged data can also again use the raw number
of syscalls for merging, with a pretty unwieldy command:
# perf report -i perf.data.merged --sort sym --show-nr-samples \
-g graph,0,caller,function,count --stdio --max-stack 1
#
# Samples: 45 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 1001520568
#
# Children Self Samples Symbol
# ........ ........ ............ ..............................
#
100.00% 100.00% 45 [k] syscall_trace_enter_phase2
|
|--3--__nanosleep
|
|--14--open64
|
|--7--mmap64
|
|--8--_xstat
|
|--4--mprotect
|
|--1--munmap
|
|--2--_fxstat
|
|--1--access
|
|--2--close
|
|--1--read
|
|--1--init_tls
|
|--1--brk
Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
---
tools/perf/builtin-inject.c | 154 ++++++++++++++++++++++++++++++++++++--------
1 file changed, 126 insertions(+), 28 deletions(-)
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4c..f1d5a8d 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -31,6 +31,7 @@ struct perf_inject {
bool have_auxtrace;
bool strip;
bool jit_mode;
+ bool trace;
const char *input_name;
struct perf_data_file output;
u64 bytes_written;
@@ -451,17 +452,14 @@ repipe:
return 0;
}
-static int perf_inject__sched_process_exit(struct perf_tool *tool,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample,
- struct perf_evsel *evsel __maybe_unused,
- struct machine *machine __maybe_unused)
+static int perf_inject__free_cloned_event(struct perf_tool *tool,
+ u32 tid)
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
struct event_entry *ent;
list_for_each_entry(ent, &inject->samples, node) {
- if (sample->tid == ent->tid) {
+ if (tid == ent->tid) {
list_del_init(&ent->node);
free(ent);
break;
@@ -471,21 +469,18 @@ static int perf_inject__sched_process_exit(struct perf_tool *tool,
return 0;
}
-static int perf_inject__sched_switch(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct perf_evsel *evsel,
- struct machine *machine)
+static int perf_inject__clone_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample)
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
struct event_entry *ent;
- perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
-
+ perf_inject__free_cloned_event(tool, sample->tid);
ent = malloc(event->header.size + sizeof(struct event_entry));
if (ent == NULL) {
color_fprintf(stderr, PERF_COLOR_RED,
- "Not enough memory to process sched switch event!");
+ "Not enough memory to clone event!");
return -1;
}
@@ -495,35 +490,104 @@ static int perf_inject__sched_switch(struct perf_tool *tool,
return 0;
}
+static struct event_entry *perf_inject__find_cloned_event(struct perf_tool *tool,
+ u32 tid)
+{
+ struct event_entry *ent;
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+ list_for_each_entry(ent, &inject->samples, node) {
+ if (tid == ent->tid)
+ return ent;
+ }
+ return NULL;
+}
+
+static int perf_inject__repipe_cloned_event_sample(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ union perf_event *event_sw,
+ struct perf_sample sample_sw)
+{
+ perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+ evsel->attr.read_format, &sample_sw,
+ false);
+ build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+ return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ perf_inject__free_cloned_event(tool, sample->tid);
+ return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return perf_inject__clone_event(tool, event, sample);
+}
+
static int perf_inject__sched_stat(struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
- struct event_entry *ent;
union perf_event *event_sw;
struct perf_sample sample_sw;
- struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ struct event_entry *ent = perf_inject__find_cloned_event(tool, pid);
- list_for_each_entry(ent, &inject->samples, node) {
- if (pid == ent->tid)
- goto found;
- }
+ if (ent == NULL)
+ return 0;
- return 0;
-found:
event_sw = &ent->event[0];
perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
sample_sw.period = sample->period;
sample_sw.time = sample->time;
- perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
- evsel->attr.read_format, &sample_sw,
- false);
- build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
- return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+
+ return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+ event_sw, sample_sw);
+}
+
+static int perf_inject__raw_syscalls_sys_enter(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return perf_inject__clone_event(tool, event, sample);
+}
+
+static int perf_inject__raw_syscalls_sys_exit(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ union perf_event *event_sw;
+ struct perf_sample sample_sw;
+ struct event_entry *ent = perf_inject__find_cloned_event(tool, sample->tid);
+
+ if (ent == NULL)
+ return 0;
+
+ event_sw = &ent->event[0];
+ perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+ sample_sw.period = sample->time - sample_sw.time;
+
+ return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+ event_sw, sample_sw);
}
static void sig_handler(int sig __maybe_unused)
@@ -656,6 +720,24 @@ static int __cmd_inject(struct perf_inject *inject)
else if (!strncmp(name, "sched:sched_stat_", 17))
evsel->handler = perf_inject__sched_stat;
}
+ } else if (inject->trace) {
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "raw_syscalls:sys_exit")) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ return -EINVAL;
+
+ evsel->handler = perf_inject__raw_syscalls_sys_exit;
+ } else if (!strcmp(name, "raw_syscalls:sys_enter")) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ return -EINVAL;
+
+ evsel->handler = perf_inject__raw_syscalls_sys_enter;
+ }
+ }
} else if (inject->itrace_synth_opts.set) {
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
@@ -717,6 +799,19 @@ static int __cmd_inject(struct perf_inject *inject)
if (inject->strip)
strip_fini(inject);
}
+ if (inject->trace) {
+ // remove the now empty sys_exit event list
+ struct perf_evsel *evsel, *tmp;
+
+ evlist__for_each_entry_safe(session->evlist, tmp, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "raw_syscalls:sys_exit")) {
+ perf_evlist__remove(session->evlist, evsel);
+ perf_evsel__delete(evsel);
+ }
+ }
+ }
session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written;
perf_session__write_header(session, session->evlist, fd, true);
@@ -775,6 +870,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
"Merge sched-stat and sched-switch for getting events "
"where and how long tasks slept"),
+ OPT_BOOLEAN('t', "trace", &inject.trace,
+ "Merge raw_syscalls:sys_enter and raw_syscalls:sys_exit trace "
+ "events to measure how long the individual syscalls took"),
#ifdef HAVE_JITDUMP
OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
#endif
@@ -815,7 +913,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
return -1;
}
- inject.tool.ordered_events = inject.sched_stat;
+ inject.tool.ordered_events = inject.sched_stat || inject.trace;
file.path = inject.input_name;
inject.session = perf_session__new(&file, true, &inject.tool);
--
2.9.2
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH v3] perf inject: Make it possible to merge sys_enter/exit events
2016-04-01 23:50 [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit Milian Wolff
2016-05-09 17:56 ` Milian Wolff
2016-08-12 21:38 ` [PATCH v2] " Milian Wolff
@ 2016-08-30 22:42 ` Milian Wolff
2 siblings, 0 replies; 4+ messages in thread
From: Milian Wolff @ 2016-08-30 22:42 UTC (permalink / raw)
To: linux-perf-users; +Cc: linux-kernel, acme, Milian Wolff
This makes it much more useful to report data recorded with
`perf trace record`. E.g.:
#include <unistd.h>
int main()
{
sleep(1);
usleep(1000);
usleep(1);
return 0;
}
Compile and trace it:
# gcc -g sleep.c -o ex_sleep
# perf trace record --call-graph dwarf ex_sleep
Previously, the report is not so useful as it aggregates by the number
of syscalls:
# perf report --stdio --show-nr-samples --show-total-period \
--no-children --sort sym --max-stack 1
#
# Samples: 46 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 46
#
# Overhead Samples Period Symbol
# ........ ............ ............ ..............................
#
100.00% 46 46 [k] syscall_trace_enter_phase2
|
|--30.43%--open64
|
|--17.39%--_xstat
|
|--15.22%--mmap64
|
|--8.70%--mprotect
|
|--6.52%--__nanosleep
|
|--4.35%--close
|
|--4.35%--_fxstat
|
|--2.17%--_exit
|
|--2.17%--munmap
|
|--2.17%--access
|
|--2.17%--read
|
|--2.17%--brk
|
|--2.17%--init_tls
And then the same for sys_exit. Note how the period count equals the
sample count. Yes, open, stat and mmap are called often, but are
these syscalls really expensive? Hard to tell.
This patch adds a new inject command to aggregate the
sys_enter and sys_exit events and sets the period to the elapsed time:
# perf inject -t -i perf.data -o perf.data.merged
This makes the report much nicer:
# perf report --stdio --show-total-period --no-children --sort sym \
--max-stack 1 -i perf.data.merged
#
# Total Lost Samples: 0
#
# Samples: 45 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 1001520568
#
# Overhead Period Symbol
# ........ ............ ..............................
#
100.00% 1001520568 [k] syscall_trace_enter_phase2
|
|--99.99%--__nanosleep
Note that the other syscalls in this example are so cheap they are
filtered out of the report by default. You can bring them back via
e.g. `--call-graph graph,0`, or by omitting `--no-children` or
via --percent-limit 0.
The report of the merged data can also again use the raw number
of syscalls for merging, with a pretty unwieldy command:
# perf report -i perf.data.merged --sort sym --show-nr-samples \
-g graph,0,caller,function,count --stdio --max-stack 1
#
# Samples: 45 of event 'raw_syscalls:sys_enter'
# Event count (approx.): 1001520568
#
# Children Self Samples Symbol
# ........ ........ ............ ..............................
#
100.00% 100.00% 45 [k] syscall_trace_enter_phase2
|
|--3--__nanosleep
|
|--14--open64
|
|--7--mmap64
|
|--8--_xstat
|
|--4--mprotect
|
|--1--munmap
|
|--2--_fxstat
|
|--1--access
|
|--2--close
|
|--1--read
|
|--1--init_tls
|
|--1--brk
Signed-off-by: Milian Wolff <milian.wolff@kdab.com>
---
tools/perf/Documentation/perf-inject.txt | 5 +
tools/perf/builtin-inject.c | 157 +++++++++++++++++++++++++------
2 files changed, 134 insertions(+), 28 deletions(-)
diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt
index 87b2588..ecfa1ea 100644
--- a/tools/perf/Documentation/perf-inject.txt
+++ b/tools/perf/Documentation/perf-inject.txt
@@ -41,6 +41,11 @@ OPTIONS
tasks slept. sched_switch contains a callchain where a task slept and
sched_stat contains a timeslice how long a task slept.
+-t::
+--trace::
+ Merge sys_enter and sys_exit trace events to measure how long the
+ individual syscalls took.
+
--kallsyms=<file>::
kallsyms pathname
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index 73c1c4c..8b959bf 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -31,6 +31,7 @@ struct perf_inject {
bool have_auxtrace;
bool strip;
bool jit_mode;
+ bool trace;
const char *input_name;
struct perf_data_file output;
u64 bytes_written;
@@ -451,17 +452,14 @@ repipe:
return 0;
}
-static int perf_inject__sched_process_exit(struct perf_tool *tool,
- union perf_event *event __maybe_unused,
- struct perf_sample *sample,
- struct perf_evsel *evsel __maybe_unused,
- struct machine *machine __maybe_unused)
+static int perf_inject__free_cloned_event(struct perf_tool *tool,
+ u32 tid)
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
struct event_entry *ent;
list_for_each_entry(ent, &inject->samples, node) {
- if (sample->tid == ent->tid) {
+ if (tid == ent->tid) {
list_del_init(&ent->node);
free(ent);
break;
@@ -471,21 +469,18 @@ static int perf_inject__sched_process_exit(struct perf_tool *tool,
return 0;
}
-static int perf_inject__sched_switch(struct perf_tool *tool,
- union perf_event *event,
- struct perf_sample *sample,
- struct perf_evsel *evsel,
- struct machine *machine)
+static int perf_inject__clone_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample)
{
struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
struct event_entry *ent;
- perf_inject__sched_process_exit(tool, event, sample, evsel, machine);
-
+ perf_inject__free_cloned_event(tool, sample->tid);
ent = malloc(event->header.size + sizeof(struct event_entry));
if (ent == NULL) {
color_fprintf(stderr, PERF_COLOR_RED,
- "Not enough memory to process sched switch event!");
+ "Not enough memory to clone event!");
return -1;
}
@@ -495,35 +490,104 @@ static int perf_inject__sched_switch(struct perf_tool *tool,
return 0;
}
+static struct event_entry *perf_inject__find_cloned_event(struct perf_tool *tool,
+ u32 tid)
+{
+ struct event_entry *ent;
+ struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
+
+ list_for_each_entry(ent, &inject->samples, node) {
+ if (tid == ent->tid)
+ return ent;
+ }
+ return NULL;
+}
+
+static int perf_inject__repipe_cloned_event_sample(struct perf_tool *tool,
+ struct perf_evsel *evsel,
+ struct machine *machine,
+ union perf_event *event_sw,
+ struct perf_sample sample_sw)
+{
+ perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
+ evsel->attr.read_format, &sample_sw,
+ false);
+ build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
+ return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+}
+
+static int perf_inject__sched_process_exit(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ perf_inject__free_cloned_event(tool, sample->tid);
+ return 0;
+}
+
+static int perf_inject__sched_switch(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return perf_inject__clone_event(tool, event, sample);
+}
+
static int perf_inject__sched_stat(struct perf_tool *tool,
union perf_event *event __maybe_unused,
struct perf_sample *sample,
struct perf_evsel *evsel,
struct machine *machine)
{
- struct event_entry *ent;
union perf_event *event_sw;
struct perf_sample sample_sw;
- struct perf_inject *inject = container_of(tool, struct perf_inject, tool);
u32 pid = perf_evsel__intval(evsel, sample, "pid");
+ struct event_entry *ent = perf_inject__find_cloned_event(tool, pid);
- list_for_each_entry(ent, &inject->samples, node) {
- if (pid == ent->tid)
- goto found;
- }
+ if (ent == NULL)
+ return 0;
- return 0;
-found:
event_sw = &ent->event[0];
perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
sample_sw.period = sample->period;
sample_sw.time = sample->time;
- perf_event__synthesize_sample(event_sw, evsel->attr.sample_type,
- evsel->attr.read_format, &sample_sw,
- false);
- build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine);
- return perf_event__repipe(tool, event_sw, &sample_sw, machine);
+
+ return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+ event_sw, sample_sw);
+}
+
+static int perf_inject__sys_enter(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+ return perf_inject__clone_event(tool, event, sample);
+}
+
+static int perf_inject__sys_exit(struct perf_tool *tool,
+ union perf_event *event __maybe_unused,
+ struct perf_sample *sample,
+ struct perf_evsel *evsel,
+ struct machine *machine)
+{
+ union perf_event *event_sw;
+ struct perf_sample sample_sw;
+ struct event_entry *ent = perf_inject__find_cloned_event(tool, sample->tid);
+
+ if (ent == NULL)
+ return 0;
+
+ event_sw = &ent->event[0];
+ perf_evsel__parse_sample(evsel, event_sw, &sample_sw);
+
+ sample_sw.period = sample->time - sample_sw.time;
+
+ return perf_inject__repipe_cloned_event_sample(tool, evsel, machine,
+ event_sw, sample_sw);
}
static void sig_handler(int sig __maybe_unused)
@@ -656,6 +720,26 @@ static int __cmd_inject(struct perf_inject *inject)
else if (!strncmp(name, "sched:sched_stat_", 17))
evsel->handler = perf_inject__sched_stat;
}
+ } else if (inject->trace) {
+ struct perf_evsel *evsel;
+
+ evlist__for_each_entry(session->evlist, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "raw_syscalls:sys_exit") ||
+ !strncmp(name, "syscalls:sys_exit_", sizeof("syscalls:sys_exit_") - 1)) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ return -EINVAL;
+
+ evsel->handler = perf_inject__sys_exit;
+ } else if (!strcmp(name, "raw_syscalls:sys_enter") ||
+ !strncmp(name, "syscalls:sys_enter_", sizeof("syscalls:sys_enter_") - 1)) {
+ if (perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID"))
+ return -EINVAL;
+
+ evsel->handler = perf_inject__sys_enter;
+ }
+ }
} else if (inject->itrace_synth_opts.set) {
session->itrace_synth_opts = &inject->itrace_synth_opts;
inject->itrace_synth_opts.inject = true;
@@ -717,6 +801,20 @@ static int __cmd_inject(struct perf_inject *inject)
if (inject->strip)
strip_fini(inject);
}
+ if (inject->trace) {
+ // remove the now empty sys_exit event list
+ struct perf_evsel *evsel, *tmp;
+
+ evlist__for_each_entry_safe(session->evlist, tmp, evsel) {
+ const char *name = perf_evsel__name(evsel);
+
+ if (!strcmp(name, "raw_syscalls:sys_exit") ||
+ !strncmp(name, "syscalls:sys_exit_", sizeof("syscalls:sys_exit_") - 1)) {
+ perf_evlist__remove(session->evlist, evsel);
+ perf_evsel__delete(evsel);
+ }
+ }
+ }
session->header.data_offset = output_data_offset;
session->header.data_size = inject->bytes_written;
perf_session__write_header(session, session->evlist, fd, true);
@@ -775,6 +873,9 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
OPT_BOOLEAN('s', "sched-stat", &inject.sched_stat,
"Merge sched-stat and sched-switch for getting events "
"where and how long tasks slept"),
+ OPT_BOOLEAN('t', "trace", &inject.trace,
+ "Merge sys_enter and sys_exit trace events to measure "
+ "how long the individual syscalls took"),
#ifdef HAVE_JITDUMP
OPT_BOOLEAN('j', "jit", &inject.jit_mode, "merge jitdump files into perf.data file"),
#endif
@@ -815,7 +916,7 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused)
return -1;
}
- inject.tool.ordered_events = inject.sched_stat;
+ inject.tool.ordered_events = inject.sched_stat || inject.trace;
file.path = inject.input_name;
inject.session = perf_session__new(&file, true, &inject.tool);
--
2.9.3
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-08-30 22:43 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-04-01 23:50 [PATCH] perf inject: Make it possible to merge raw_syscalls:sys_enter/exit Milian Wolff
2016-05-09 17:56 ` Milian Wolff
2016-08-12 21:38 ` [PATCH v2] " Milian Wolff
2016-08-30 22:42 ` [PATCH v3] perf inject: Make it possible to merge sys_enter/exit events Milian Wolff
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).