From: Adrian Hunter <adrian.hunter@intel.com>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>, Andi Kleen <ak@linux.intel.com>,
linux-kernel@vger.kernel.org
Subject: [PATCH 7/9] perf intel-pt: Add support for synthesizing branch stacks for regular events
Date: Wed, 29 Apr 2020 18:07:49 +0300 [thread overview]
Message-ID: <20200429150751.12570-8-adrian.hunter@intel.com> (raw)
In-Reply-To: <20200429150751.12570-1-adrian.hunter@intel.com>
Use the new thread_stack__br_sample_late() function to create a thread
stack for regular events.
Example:
# perf record --kcore --aux-sample -e '{intel_pt//,cycles:ppp}' -c 10000 uname
Linux
[ perf record: Woken up 2 times to write data ]
[ perf record: Captured and wrote 0.743 MB perf.data ]
# perf report --itrace=Le --stdio | head -30 | tail -18
# Samples: 11K of event 'cycles:ppp'
# Event count (approx.): 11648
#
# Overhead Command Source Shared Object Source Symbol Target Symbol Basic Block Cycles
# ........ ....... .................... .............................................. .............................................. ..................
#
5.49% uname libc-2.30.so [.] _dl_addr [.] _dl_addr -
2.41% uname ld-2.30.so [.] _dl_relocate_object [.] _dl_relocate_object -
2.31% uname ld-2.30.so [.] do_lookup_x [.] do_lookup_x -
2.17% uname [kernel.kallsyms] [k] unmap_page_range [k] unmap_page_range -
2.05% uname ld-2.30.so [k] _dl_start [k] _dl_start -
1.97% uname ld-2.30.so [.] _dl_lookup_symbol_x [.] _dl_lookup_symbol_x -
1.94% uname [kernel.kallsyms] [k] filemap_map_pages [k] filemap_map_pages -
1.60% uname [kernel.kallsyms] [k] __handle_mm_fault [k] __handle_mm_fault -
1.44% uname [kernel.kallsyms] [k] page_add_file_rmap [k] page_add_file_rmap -
1.12% uname [kernel.kallsyms] [k] vma_interval_tree_insert [k] vma_interval_tree_insert -
0.94% uname [kernel.kallsyms] [k] perf_iterate_ctx [k] perf_iterate_ctx -
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
---
tools/perf/util/intel-pt.c | 73 ++++++++++++++++++++++++++++++++++----
1 file changed, 66 insertions(+), 7 deletions(-)
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 248a39fd4d0e..7fb807b91f73 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -71,6 +71,7 @@ struct intel_pt {
bool use_thread_stack;
bool callstack;
unsigned int br_stack_sz;
+ unsigned int br_stack_sz_plus;
int have_sched_switch;
u32 pmu_type;
u64 kernel_start;
@@ -129,6 +130,7 @@ struct intel_pt {
unsigned int range_cnt;
struct ip_callchain *chain;
+ struct branch_stack *br_stack;
};
enum switch_state {
@@ -910,6 +912,44 @@ static void intel_pt_add_callchain(struct intel_pt *pt,
sample->callchain = pt->chain;
}
+static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt)
+{
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += pt->br_stack_sz * sizeof(struct branch_entry);
+ return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+ struct evsel *evsel;
+
+ evlist__for_each_entry(pt->session->evlist, evsel) {
+ if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+ evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ }
+
+ pt->br_stack = intel_pt_alloc_br_stack(pt);
+ if (!pt->br_stack)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct thread *thread = machine__findnew_thread(pt->machine,
+ sample->pid,
+ sample->tid);
+
+ thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+ pt->br_stack_sz, sample->ip,
+ pt->kernel_start);
+
+ sample->branch_stack = pt->br_stack;
+}
+
static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
unsigned int queue_nr)
{
@@ -928,10 +968,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
}
if (pt->synth_opts.last_branch) {
- size_t sz = sizeof(struct branch_stack);
-
- sz += pt->br_stack_sz * sizeof(struct branch_entry);
- ptq->last_branch = zalloc(sz);
+ ptq->last_branch = intel_pt_alloc_br_stack(pt);
if (!ptq->last_branch)
goto out_free;
}
@@ -1962,7 +1999,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
state->from_ip, state->to_ip, ptq->insn_len,
state->trace_nr, pt->callstack,
- pt->br_stack_sz,
+ pt->br_stack_sz_plus,
pt->mispred_all);
} else {
thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
@@ -2608,6 +2645,8 @@ static int intel_pt_process_event(struct perf_session *session,
if (event->header.type == PERF_RECORD_SAMPLE) {
if (pt->synth_opts.add_callchain && !sample->callchain)
intel_pt_add_callchain(pt, sample);
+ if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+ intel_pt_add_br_stack(pt, sample);
}
if (event->header.type == PERF_RECORD_AUX &&
@@ -3369,13 +3408,33 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
goto err_delete_thread;
}
- if (pt->synth_opts.last_branch)
+ if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+ pt->br_stack_sz_plus = pt->br_stack_sz;
+ }
+
+ if (pt->synth_opts.add_last_branch) {
+ err = intel_pt_br_stack_init(pt);
+ if (err)
+ goto err_delete_thread;
+ /*
+ * Additional branch stack size to cater for tracing from the
+ * actual sample ip to where the sample time is recorded.
+ * Measured at about 200 branches, but generously set to 1024.
+ * If kernel space is not being traced, then add just 1 for the
+ * branch to kernel space.
+ */
+ if (intel_pt_tracing_kernel(pt))
+ pt->br_stack_sz_plus += 1024;
+ else
+ pt->br_stack_sz_plus += 1;
+ }
pt->use_thread_stack = pt->synth_opts.callchain ||
pt->synth_opts.add_callchain ||
pt->synth_opts.thread_stack ||
- pt->synth_opts.last_branch;
+ pt->synth_opts.last_branch ||
+ pt->synth_opts.add_last_branch;
pt->callstack = pt->synth_opts.callchain ||
pt->synth_opts.add_callchain ||
--
2.17.1
next prev parent reply other threads:[~2020-04-29 15:07 UTC|newest]
Thread overview: 23+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-29 15:07 [PATCH 0/9] perf intel-pt: Add support for synthesizing branch stacks for regular events Adrian Hunter
2020-04-29 15:07 ` [PATCH 1/9] perf thread-stack: Add branch stack support Adrian Hunter
2020-05-08 13:04 ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 2/9] perf intel-pt: Consolidate thread-stack use condition Adrian Hunter
2020-05-08 13:04 ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 3/9] perf intel-pt: Change branch stack support to use thread-stacks Adrian Hunter
2020-05-08 13:04 ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 4/9] perf auxtrace: Add option to synthesize branch stack for regular events Adrian Hunter
2020-05-08 13:04 ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 5/9] perf evsel: Add support for synthesized branch stack sample type Adrian Hunter
2020-05-08 13:04 ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 6/9] perf thread-stack: Add thread_stack__br_sample_late() Adrian Hunter
2020-05-08 13:04 ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` Adrian Hunter [this message]
2020-05-08 13:04 ` [tip: perf/core] perf intel-pt: Add support for synthesizing branch stacks for regular events tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 8/9] perf intel-pt: Update documentation about itrace G and L options Adrian Hunter
2020-04-29 23:03 ` Andi Kleen
2020-04-30 5:36 ` Adrian Hunter
2020-04-30 12:28 ` Andi Kleen
2020-05-08 13:04 ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 15:07 ` [PATCH 9/9] perf intel-pt: Update documentation about using /proc/kcore Adrian Hunter
2020-05-08 13:04 ` [tip: perf/core] " tip-bot2 for Adrian Hunter
2020-04-29 18:29 ` [PATCH 0/9] perf intel-pt: Add support for synthesizing branch stacks for regular events Arnaldo Carvalho de Melo
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200429150751.12570-8-adrian.hunter@intel.com \
--to=adrian.hunter@intel.com \
--cc=acme@kernel.org \
--cc=ak@linux.intel.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox