* [PATCH 1/3] perf, tools: Add support for skipping itrace instructions
@ 2016-02-29 23:05 Andi Kleen
2016-02-29 23:05 ` [PATCH 2/3] perf, tools: Add support for probing for udev86 Andi Kleen
` (2 more replies)
0 siblings, 3 replies; 4+ messages in thread
From: Andi Kleen @ 2016-02-29 23:05 UTC (permalink / raw)
To: acme; +Cc: jolsa, linux-kernel, Andi Kleen, Adrian Hunter
From: Andi Kleen <ak@linux.intel.com>
When using perf script to look at PT traces it is often
useful to ignore the initialization code at the beginning.
On larger traces which may have many millions of instructions
in initialization code doing that in a pipeline can be very
slow, with perf script spending a lot of CPU time calling
printf and writing data.
This patch adds an extension to the --itrace argument
that skips 'n' events (instructions, branches or transactions)
at the beginning. This is much more efficient.
Cc: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
tools/perf/Documentation/intel-pt.txt | 7 +++++++
tools/perf/util/auxtrace.c | 7 +++++++
tools/perf/util/auxtrace.h | 2 ++
tools/perf/util/intel-pt.c | 14 ++++++++++++--
4 files changed, 28 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index be764f9..c6c8318 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -672,6 +672,7 @@ The letters are:
d create a debug log
g synthesize a call chain (use with i or x)
l synthesize last branch entries (use with i or x)
+ s skip initial number of events
"Instructions" events look like they were recorded by "perf record -e
instructions".
@@ -730,6 +731,12 @@ from one sample to the next.
To disable trace decoding entirely, use the option --no-itrace.
+It is also possible to skip events generated (instructions, branches, transactions)
+at the beginning. This is useful to ignore initialization code.
+
+ --itrace=i0nss1000000
+
+skips the first million instructions.
dump option
-----------
diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
index 360fda0..a013c1f 100644
--- a/tools/perf/util/auxtrace.c
+++ b/tools/perf/util/auxtrace.c
@@ -939,6 +939,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
+ synth_opts->initial_skip = 0;
}
/*
@@ -1063,6 +1064,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
synth_opts->last_branch_sz = val;
}
break;
+ case 's':
+ synth_opts->initial_skip = strtoul(p, &endptr, 10);
+ if (p == endptr)
+ goto out_err;
+ p = endptr;
+ break;
case ' ':
case ',':
break;
diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
index b86f90db..235d3c3 100644
--- a/tools/perf/util/auxtrace.h
+++ b/tools/perf/util/auxtrace.h
@@ -68,6 +68,7 @@ enum itrace_period_type {
* @last_branch_sz: branch context size
* @period: 'instructions' events period
* @period_type: 'instructions' events period type
+ * @initial_skip: skip N events at the beginning.
*/
struct itrace_synth_opts {
bool set;
@@ -86,6 +87,7 @@ struct itrace_synth_opts {
unsigned int last_branch_sz;
unsigned long long period;
enum itrace_period_type period_type;
+ unsigned long initial_skip;
};
/**
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 05d8158..a0c5e7d 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -100,6 +100,8 @@ struct intel_pt {
u64 cyc_bit;
u64 noretcomp_bit;
unsigned max_non_turbo_ratio;
+
+ unsigned long num_events;
};
enum switch_state {
@@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
+ if (pt->synth_opts.initial_skip &&
+ pt->num_events++ >= pt->synth_opts.initial_skip)
+ return 0;
+
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = PERF_RECORD_MISC_USER;
event->sample.header.size = sizeof(struct perf_event_header);
@@ -1196,14 +1202,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
ptq->have_sample = false;
if (pt->sample_instructions &&
- (state->type & INTEL_PT_INSTRUCTION)) {
+ (state->type & INTEL_PT_INSTRUCTION) &&
+ (!pt->synth_opts.initial_skip ||
+ pt->num_events++ >= pt->synth_opts.initial_skip)) {
err = intel_pt_synth_instruction_sample(ptq);
if (err)
return err;
}
if (pt->sample_transactions &&
- (state->type & INTEL_PT_TRANSACTION)) {
+ (state->type & INTEL_PT_TRANSACTION) &&
+ (!pt->synth_opts.initial_skip ||
+ pt->num_events++ >= pt->synth_opts.initial_skip)) {
err = intel_pt_synth_transaction_sample(ptq);
if (err)
return err;
--
2.5.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 2/3] perf, tools: Add support for probing for udev86
2016-02-29 23:05 [PATCH 1/3] perf, tools: Add support for skipping itrace instructions Andi Kleen
@ 2016-02-29 23:05 ` Andi Kleen
2016-02-29 23:05 ` [PATCH 3/3] perf, tools, script: Add support for printing assembler Andi Kleen
2016-03-02 9:43 ` [PATCH 1/3] perf, tools: Add support for skipping itrace instructions Adrian Hunter
2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2016-02-29 23:05 UTC (permalink / raw)
To: acme; +Cc: jolsa, linux-kernel, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Add support for autoprobing for the udev86 disassembler library.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
tools/build/Makefile.feature | 6 ++++--
tools/build/feature/Makefile | 8 ++++++--
tools/build/feature/test-all.c | 5 +++++
tools/perf/config/Makefile | 5 +++++
4 files changed, 20 insertions(+), 4 deletions(-)
diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature
index 02db3cd..7de4fcb 100644
--- a/tools/build/Makefile.feature
+++ b/tools/build/Makefile.feature
@@ -54,7 +54,8 @@ FEATURE_TESTS ?= \
zlib \
lzma \
get_cpuid \
- bpf
+ bpf \
+ udis86
FEATURE_DISPLAY ?= \
dwarf \
@@ -73,7 +74,8 @@ FEATURE_DISPLAY ?= \
zlib \
lzma \
get_cpuid \
- bpf
+ bpf \
+ udis86
# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features.
# If in the future we need per-feature checks/flags for features not
diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile
index bf8f035..c89941e 100644
--- a/tools/build/feature/Makefile
+++ b/tools/build/feature/Makefile
@@ -35,7 +35,8 @@ FILES= \
test-zlib.bin \
test-lzma.bin \
test-bpf.bin \
- test-get_cpuid.bin
+ test-get_cpuid.bin \
+ test-udis86.bin
FILES := $(addprefix $(OUTPUT),$(FILES))
@@ -50,7 +51,7 @@ __BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $@ $(patsubst %.bin,%.c,$(@F)) $(LDFL
###############################
$(OUTPUT)test-all.bin:
- $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma
+ $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma -ludis86
$(OUTPUT)test-hello.bin:
$(BUILD)
@@ -96,6 +97,9 @@ $(OUTPUT)test-numa_num_possible_cpus.bin:
$(OUTPUT)test-libunwind.bin:
$(BUILD) -lelf
+$(OUTPUT)test-udis86.bin:
+ $(BUILD) -ludis86
+
$(OUTPUT)test-libunwind-debug-frame.bin:
$(BUILD) -lelf
diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c
index 81025ca..d67b1d5 100644
--- a/tools/build/feature/test-all.c
+++ b/tools/build/feature/test-all.c
@@ -129,6 +129,10 @@
# include "test-bpf.c"
#undef main
+#define main main_test_udis86
+# include "test-udis86.c"
+#endif
+
int main(int argc, char *argv[])
{
main_test_libpython();
@@ -158,6 +162,7 @@ int main(int argc, char *argv[])
main_test_lzma();
main_test_get_cpuid();
main_test_bpf();
+ main_test_udis86();
return 0;
}
diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile
index 511141b..19bac7c 100644
--- a/tools/perf/config/Makefile
+++ b/tools/perf/config/Makefile
@@ -576,6 +576,11 @@ ifneq ($(filter -lbfd,$(EXTLIBS)),)
CFLAGS += -DHAVE_LIBBFD_SUPPORT
endif
+ifeq ($(feature-udis86), 1)
+ CFLAGS += -DHAVE_UDIS86
+ EXTLIBS += -ludis86
+endif
+
ifndef NO_ZLIB
ifeq ($(feature-zlib), 1)
CFLAGS += -DHAVE_ZLIB_SUPPORT
--
2.5.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* [PATCH 3/3] perf, tools, script: Add support for printing assembler
2016-02-29 23:05 [PATCH 1/3] perf, tools: Add support for skipping itrace instructions Andi Kleen
2016-02-29 23:05 ` [PATCH 2/3] perf, tools: Add support for probing for udev86 Andi Kleen
@ 2016-02-29 23:05 ` Andi Kleen
2016-03-02 9:43 ` [PATCH 1/3] perf, tools: Add support for skipping itrace instructions Adrian Hunter
2 siblings, 0 replies; 4+ messages in thread
From: Andi Kleen @ 2016-02-29 23:05 UTC (permalink / raw)
To: acme; +Cc: jolsa, linux-kernel, Andi Kleen, adrian.hunter
From: Andi Kleen <ak@linux.intel.com>
When dumping PT traces with perf script it is very useful to see the
assembler for each sample, so that it is easily possible to follow
the control flow.
As using objdump is difficult and inefficient from perf script this
patch uses the udis86 library to implement assembler output.
The library can be downloaded from http://udis86.sourceforge.net/
The library is probed as an external dependency in the usual way. Then perf
script calls into it when needed, and handles callbacks to resolve
symbols.
% perf record -e intel_pt//u true
% perf script -F sym,symoff,ip,asm --itrace=i0ns | head
7fc7188b4190 _start+0x0 mov %rsp, %rdi
7fc7188b4193 _start+0x3 call _dl_start
7fc7188b7710 _dl_start+0x0 push %rbp
7fc7188b7711 _dl_start+0x1 mov %rsp, %rbp
7fc7188b7714 _dl_start+0x4 push %r15
7fc7188b7716 _dl_start+0x6 push %r14
7fc7188b7718 _dl_start+0x8 push %r13
7fc7188b771a _dl_start+0xa push %r12
7fc7188b771c _dl_start+0xc mov %rdi, %r12
7fc7188b771f _dl_start+0xf push %rbx
Current issues:
- Some jump references do not get resolved to symbols.
- udis86 release does not support STAC/CLAC, which are used in the kernel,
but there is a pending patch for it.
Cc: adrian.hunter@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
tools/perf/Documentation/perf-script.txt | 4 +-
tools/perf/builtin-script.c | 108 +++++++++++++++++++++++++++++--
2 files changed, 106 insertions(+), 6 deletions(-)
diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt
index 382ddfb..c834f4d 100644
--- a/tools/perf/Documentation/perf-script.txt
+++ b/tools/perf/Documentation/perf-script.txt
@@ -116,7 +116,7 @@ OPTIONS
--fields::
Comma separated list of fields to print. Options are:
comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff,
- srcline, period, iregs, brstack, brstacksym, flags.
+ srcline, period, iregs, brstack, brstacksym, flags, asm.
Field list can be prepended with the type, trace, sw or hw,
to indicate to which event type the field list applies.
e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace
@@ -185,6 +185,8 @@ OPTIONS
The brstacksym is identical to brstack, except that the FROM and TO addresses are printed in a symbolic form if possible.
+ When asm is specified the assembler instruction of each sample is printed in disassembled form.
+
-k::
--vmlinux=<file>::
vmlinux pathname
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index c691214..9fd8cb3 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -24,6 +24,10 @@
#include <linux/bitmap.h>
#include "asm/bug.h"
+#ifdef HAVE_UDIS86
+#include <udis86.h>
+#endif
+
static char const *script_name;
static char const *generate_script_lang;
static bool debug_mode;
@@ -58,6 +62,7 @@ enum perf_output_field {
PERF_OUTPUT_IREGS = 1U << 14,
PERF_OUTPUT_BRSTACK = 1U << 15,
PERF_OUTPUT_BRSTACKSYM = 1U << 16,
+ PERF_OUTPUT_ASM = 1U << 17,
};
struct output_option {
@@ -81,6 +86,7 @@ struct output_option {
{.str = "iregs", .field = PERF_OUTPUT_IREGS},
{.str = "brstack", .field = PERF_OUTPUT_BRSTACK},
{.str = "brstacksym", .field = PERF_OUTPUT_BRSTACKSYM},
+ {.str = "asm", .field = PERF_OUTPUT_ASM},
};
/* default set to maintain compatibility with current format */
@@ -264,7 +270,11 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
"selected. Hence, no address to lookup the source line number.\n");
return -EINVAL;
}
-
+ if (PRINT_FIELD(ASM) && !PRINT_FIELD(IP)) {
+ pr_err("Display of assembler requested but sample IP is not\n"
+ "selected.\n");
+ return -EINVAL;
+ }
if ((PRINT_FIELD(PID) || PRINT_FIELD(TID)) &&
perf_evsel__check_stype(evsel, PERF_SAMPLE_TID, "TID",
PERF_OUTPUT_TID|PERF_OUTPUT_PID))
@@ -405,6 +415,89 @@ static void print_sample_iregs(union perf_event *event __maybe_unused,
}
}
+#ifdef HAVE_UDIS86
+
+struct perf_ud {
+ ud_t ud_obj;
+ struct thread *thread;
+ u8 cpumode;
+ int cpu;
+};
+
+static const char *dis_resolve(struct ud *u, uint64_t addr, int64_t *off)
+{
+ struct perf_ud *ud = container_of(u, struct perf_ud, ud_obj);
+ struct addr_location al;
+
+ memset(&al, 0, sizeof(struct addr_location));
+
+ thread__find_addr_map(ud->thread, ud->cpumode, MAP__FUNCTION, addr, &al);
+ if (!al.map)
+ thread__find_addr_map(ud->thread, ud->cpumode, MAP__VARIABLE,
+ addr, &al);
+ al.cpu = ud->cpu;
+ al.sym = NULL;
+
+ if (al.map)
+ al.sym = map__find_symbol(al.map, al.addr, NULL);
+
+ if (!al.sym)
+ return NULL;
+
+ if (addr < al.sym->end)
+ *off = addr - al.sym->start;
+ else
+ *off = addr - al.map->start - al.sym->start;
+ return al.sym->name;
+}
+#endif
+
+static void print_sample_asm(union perf_event *event __maybe_unused,
+ struct perf_sample *sample __maybe_unused,
+ struct thread *thread __maybe_unused,
+ struct perf_event_attr *attr __maybe_unused,
+ struct addr_location *al __maybe_unused,
+ struct machine *machine __maybe_unused)
+{
+#ifdef HAVE_UDIS86
+ static bool ud_initialized = false;
+ static struct perf_ud ud;
+ u8 buffer[32];
+ int len;
+ u64 offset;
+
+ if (!ud_initialized) {
+ ud_initialized = true;
+ ud_init(&ud.ud_obj);
+ ud_set_syntax(&ud.ud_obj, UD_SYN_ATT);
+ ud_set_sym_resolver(&ud.ud_obj, dis_resolve);
+ }
+ ud.thread = thread;
+ ud.cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ ud.cpu = sample->cpu;
+
+ if (!al->map || !al->map->dso)
+ return;
+ if (al->map->dso->data.status == DSO_DATA_STATUS_ERROR)
+ return;
+
+ /* Load maps to ensure dso->is_64_bit has been updated */
+ map__load(al->map, machine->symbol_filter);
+
+ offset = al->map->map_ip(al->map, sample->ip);
+ len = dso__data_read_offset(al->map->dso, machine,
+ offset, buffer, 32);
+ if (len <= 0)
+ return;
+
+ ud_set_mode(&ud.ud_obj, al->map->dso->is_64_bit ? 64 : 32);
+ ud_set_pc(&ud.ud_obj, sample->ip);
+ ud_set_input_buffer(&ud.ud_obj, buffer, len);
+ ud_disassemble(&ud.ud_obj);
+ printf("\t%s", ud_insn_asm(&ud.ud_obj));
+#endif
+}
+
static void print_sample_start(struct perf_sample *sample,
struct thread *thread,
struct perf_evsel *evsel)
@@ -636,7 +729,8 @@ static int perf_evlist__max_name_len(struct perf_evlist *evlist)
static void process_event(struct perf_script *script, union perf_event *event,
struct perf_sample *sample, struct perf_evsel *evsel,
- struct addr_location *al)
+ struct addr_location *al,
+ struct machine *machine)
{
struct thread *thread = al->thread;
struct perf_event_attr *attr = &evsel->attr;
@@ -664,7 +758,7 @@ static void process_event(struct perf_script *script, union perf_event *event,
if (is_bts_event(attr)) {
print_sample_bts(event, sample, evsel, thread, al);
- return;
+ goto print_rest;
}
if (PRINT_FIELD(TRACE))
@@ -687,11 +781,15 @@ static void process_event(struct perf_script *script, union perf_event *event,
if (PRINT_FIELD(IREGS))
print_sample_iregs(event, sample, thread, attr);
+print_rest:
if (PRINT_FIELD(BRSTACK))
print_sample_brstack(event, sample, thread, attr);
else if (PRINT_FIELD(BRSTACKSYM))
print_sample_brstacksym(event, sample, thread, attr);
+ if (PRINT_FIELD(ASM))
+ print_sample_asm(event, sample, thread, attr, al, machine);
+
printf("\n");
}
@@ -798,7 +896,7 @@ static int process_sample_event(struct perf_tool *tool,
if (scripting_ops)
scripting_ops->process_event(event, sample, evsel, &al);
else
- process_event(scr, event, sample, evsel, &al);
+ process_event(scr, event, sample, evsel, &al, machine);
out_put:
addr_location__put(&al);
@@ -1913,7 +2011,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
"comma separated output fields prepend with 'type:'. "
"Valid types: hw,sw,trace,raw. "
"Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso,"
- "addr,symoff,period,iregs,brstack,brstacksym,flags", parse_output_fields),
+ "addr,symoff,period,iregs,brstack,brstacksym,flags,asm", parse_output_fields),
OPT_BOOLEAN('a', "all-cpus", &system_wide,
"system-wide collection from all CPUs"),
OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]",
--
2.5.0
^ permalink raw reply related [flat|nested] 4+ messages in thread
* Re: [PATCH 1/3] perf, tools: Add support for skipping itrace instructions
2016-02-29 23:05 [PATCH 1/3] perf, tools: Add support for skipping itrace instructions Andi Kleen
2016-02-29 23:05 ` [PATCH 2/3] perf, tools: Add support for probing for udev86 Andi Kleen
2016-02-29 23:05 ` [PATCH 3/3] perf, tools, script: Add support for printing assembler Andi Kleen
@ 2016-03-02 9:43 ` Adrian Hunter
2 siblings, 0 replies; 4+ messages in thread
From: Adrian Hunter @ 2016-03-02 9:43 UTC (permalink / raw)
To: Andi Kleen, acme; +Cc: jolsa, linux-kernel, Andi Kleen
On 01/03/16 01:05, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> When using perf script to look at PT traces it is often
> useful to ignore the initialization code at the beginning.
>
> On larger traces which may have many millions of instructions
> in initialization code doing that in a pipeline can be very
> slow, with perf script spending a lot of CPU time calling
> printf and writing data.
>
> This patch adds an extension to the --itrace argument
> that skips 'n' events (instructions, branches or transactions)
> at the beginning. This is much more efficient.
>
> Cc: Adrian Hunter <adrian.hunter@intel.com>
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
> tools/perf/Documentation/intel-pt.txt | 7 +++++++
> tools/perf/util/auxtrace.c | 7 +++++++
> tools/perf/util/auxtrace.h | 2 ++
> tools/perf/util/intel-pt.c | 14 ++++++++++++--
> 4 files changed, 28 insertions(+), 2 deletions(-)
>
> diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
> index be764f9..c6c8318 100644
> --- a/tools/perf/Documentation/intel-pt.txt
> +++ b/tools/perf/Documentation/intel-pt.txt
> @@ -672,6 +672,7 @@ The letters are:
> d create a debug log
> g synthesize a call chain (use with i or x)
> l synthesize last branch entries (use with i or x)
> + s skip initial number of events
>
> "Instructions" events look like they were recorded by "perf record -e
> instructions".
> @@ -730,6 +731,12 @@ from one sample to the next.
>
> To disable trace decoding entirely, use the option --no-itrace.
>
> +It is also possible to skip events generated (instructions, branches, transactions)
> +at the beginning. This is useful to ignore initialization code.
> +
> + --itrace=i0nss1000000
> +
> +skips the first million instructions.
>
> dump option
> -----------
Also need to update tools/perf/Documentation/itrace.txt
> diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
> index 360fda0..a013c1f 100644
> --- a/tools/perf/util/auxtrace.c
> +++ b/tools/perf/util/auxtrace.c
> @@ -939,6 +939,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
> synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
> synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
> synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
> + synth_opts->initial_skip = 0;
> }
>
> /*
> @@ -1063,6 +1064,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
> synth_opts->last_branch_sz = val;
> }
> break;
> + case 's':
> + synth_opts->initial_skip = strtoul(p, &endptr, 10);
> + if (p == endptr)
> + goto out_err;
> + p = endptr;
> + break;
> case ' ':
> case ',':
> break;
> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
> index b86f90db..235d3c3 100644
> --- a/tools/perf/util/auxtrace.h
> +++ b/tools/perf/util/auxtrace.h
> @@ -68,6 +68,7 @@ enum itrace_period_type {
> * @last_branch_sz: branch context size
> * @period: 'instructions' events period
> * @period_type: 'instructions' events period type
> + * @initial_skip: skip N events at the beginning.
> */
> struct itrace_synth_opts {
> bool set;
> @@ -86,6 +87,7 @@ struct itrace_synth_opts {
> unsigned int last_branch_sz;
> unsigned long long period;
> enum itrace_period_type period_type;
> + unsigned long initial_skip;
> };
>
> /**
> diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
> index 05d8158..a0c5e7d 100644
> --- a/tools/perf/util/intel-pt.c
> +++ b/tools/perf/util/intel-pt.c
> @@ -100,6 +100,8 @@ struct intel_pt {
> u64 cyc_bit;
> u64 noretcomp_bit;
> unsigned max_non_turbo_ratio;
> +
> + unsigned long num_events;
> };
>
> enum switch_state {
> @@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
> if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
> return 0;
>
> + if (pt->synth_opts.initial_skip &&
> + pt->num_events++ >= pt->synth_opts.initial_skip)
The '>' looks the wrong way around
> + return 0;
> +
For consistency, wouldn't it be better to do it this way for "instructions"
and "transactions" too. i.e. in intel_pt_synth_instruction_sample() and
intel_pt_synth_transaction_sample()
> event->sample.header.type = PERF_RECORD_SAMPLE;
> event->sample.header.misc = PERF_RECORD_MISC_USER;
> event->sample.header.size = sizeof(struct perf_event_header);
> @@ -1196,14 +1202,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
> ptq->have_sample = false;
>
> if (pt->sample_instructions &&
> - (state->type & INTEL_PT_INSTRUCTION)) {
> + (state->type & INTEL_PT_INSTRUCTION) &&
> + (!pt->synth_opts.initial_skip ||
> + pt->num_events++ >= pt->synth_opts.initial_skip)) {
> err = intel_pt_synth_instruction_sample(ptq);
> if (err)
> return err;
> }
>
> if (pt->sample_transactions &&
> - (state->type & INTEL_PT_TRANSACTION)) {
> + (state->type & INTEL_PT_TRANSACTION) &&
> + (!pt->synth_opts.initial_skip ||
> + pt->num_events++ >= pt->synth_opts.initial_skip)) {
> err = intel_pt_synth_transaction_sample(ptq);
> if (err)
> return err;
>
The BTS code is almost the same, so I would suggest doing BTS as well as PT.
i.e.
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index eb0e7f8bf515..e3cc1057a097 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -66,6 +66,7 @@ struct intel_bts {
u64 branches_id;
size_t branches_event_size;
bool synth_needs_swap;
+ unsigned long num_events;
};
struct intel_bts_queue {
@@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
union perf_event event;
struct perf_sample sample = { .ip = 0, };
+ if (bts->synth_opts.initial_skip &&
+ bts->num_events++ <= bts->synth_opts.initial_skip)
+ return 0;
+
event.sample.header.type = PERF_RECORD_SAMPLE;
event.sample.header.misc = PERF_RECORD_MISC_USER;
event.sample.header.size = sizeof(struct perf_event_header);
^ permalink raw reply related [flat|nested] 4+ messages in thread
end of thread, other threads:[~2016-03-02 9:47 UTC | newest]
Thread overview: 4+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2016-02-29 23:05 [PATCH 1/3] perf, tools: Add support for skipping itrace instructions Andi Kleen
2016-02-29 23:05 ` [PATCH 2/3] perf, tools: Add support for probing for udev86 Andi Kleen
2016-02-29 23:05 ` [PATCH 3/3] perf, tools, script: Add support for printing assembler Andi Kleen
2016-03-02 9:43 ` [PATCH 1/3] perf, tools: Add support for skipping itrace instructions Adrian Hunter
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox