From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org,
Adrian Hunter <adrian.hunter@intel.com>,
Jiri Olsa <jolsa@redhat.com>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 25/39] perf intel-pt: Support generating branch stack
Date: Mon, 28 Sep 2015 18:08:10 -0300 [thread overview]
Message-ID: <1443474504-16528-26-git-send-email-acme@kernel.org> (raw)
In-Reply-To: <1443474504-16528-1-git-send-email-acme@kernel.org>
From: Adrian Hunter <adrian.hunter@intel.com>
Add support for generating branch stack context for PT samples. The
decoder reports a configurable number of branches as branch context for
each sample. Internally it keeps track of them by using a simple sliding
window. We also flush the last branch buffer on each sample to avoid
overlapping intervals.
This is useful for:
- Reporting accurate basic block edge frequencies through the perf
report branch view
- Using with --branch-history to get the wider context of samples
- Other users of LBRs
Also the Documentation is updated.
Examples:
Record with Intel PT:
perf record -e intel_pt//u ls
Branch stacks are used by default if synthesized so:
perf report --itrace=ile
is the same as:
perf report --itrace=ile -b
Branch history can be requested also:
perf report --itrace=igle --branch-history
Based-on-patch-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1443186956-18718-15-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/Documentation/intel-pt.txt | 10 +++
tools/perf/util/intel-pt.c | 115 ++++++++++++++++++++++++++++++++++
2 files changed, 125 insertions(+)
diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
index 886612b50961..a0fbb5d71f7d 100644
--- a/tools/perf/Documentation/intel-pt.txt
+++ b/tools/perf/Documentation/intel-pt.txt
@@ -671,6 +671,7 @@ The letters are:
e synthesize tracing error events
d create a debug log
g synthesize a call chain (use with i or x)
+ l synthesize last branch entries (use with i or x)
"Instructions" events look like they were recorded by "perf record -e
instructions".
@@ -718,6 +719,15 @@ transactions events can be specified. e.g.
--itrace=ig32
--itrace=xg32
+Also the number of last branch entries (default 64, max. 1024) for instructions or
+transactions events can be specified. e.g.
+
+ --itrace=il10
+ --itrace=xl10
+
+Note that last branch entries are cleared for each sample, so there is no overlap
+from one sample to the next.
+
To disable trace decoding entirely, use the option --no-itrace.
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 2c01e723826a..05e8fcc5188b 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -22,6 +22,7 @@
#include "../perf.h"
#include "session.h"
#include "machine.h"
+#include "sort.h"
#include "tool.h"
#include "event.h"
#include "evlist.h"
@@ -115,6 +116,9 @@ struct intel_pt_queue {
void *decoder;
const struct intel_pt_state *state;
struct ip_callchain *chain;
+ struct branch_stack *last_branch;
+ struct branch_stack *last_branch_rb;
+ size_t last_branch_pos;
union perf_event *event_buf;
bool on_heap;
bool stop;
@@ -675,6 +679,19 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
goto out_free;
}
+ if (pt->synth_opts.last_branch) {
+ size_t sz = sizeof(struct branch_stack);
+
+ sz += pt->synth_opts.last_branch_sz *
+ sizeof(struct branch_entry);
+ ptq->last_branch = zalloc(sz);
+ if (!ptq->last_branch)
+ goto out_free;
+ ptq->last_branch_rb = zalloc(sz);
+ if (!ptq->last_branch_rb)
+ goto out_free;
+ }
+
ptq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE);
if (!ptq->event_buf)
goto out_free;
@@ -732,6 +749,8 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
out_free:
zfree(&ptq->event_buf);
+ zfree(&ptq->last_branch);
+ zfree(&ptq->last_branch_rb);
zfree(&ptq->chain);
free(ptq);
return NULL;
@@ -746,6 +765,8 @@ static void intel_pt_free_queue(void *priv)
thread__zput(ptq->thread);
intel_pt_decoder_free(ptq->decoder);
zfree(&ptq->event_buf);
+ zfree(&ptq->last_branch);
+ zfree(&ptq->last_branch_rb);
zfree(&ptq->chain);
free(ptq);
}
@@ -876,6 +897,57 @@ static int intel_pt_setup_queues(struct intel_pt *pt)
return 0;
}
+static inline void intel_pt_copy_last_branch_rb(struct intel_pt_queue *ptq)
+{
+ struct branch_stack *bs_src = ptq->last_branch_rb;
+ struct branch_stack *bs_dst = ptq->last_branch;
+ size_t nr = 0;
+
+ bs_dst->nr = bs_src->nr;
+
+ if (!bs_src->nr)
+ return;
+
+ nr = ptq->pt->synth_opts.last_branch_sz - ptq->last_branch_pos;
+ memcpy(&bs_dst->entries[0],
+ &bs_src->entries[ptq->last_branch_pos],
+ sizeof(struct branch_entry) * nr);
+
+ if (bs_src->nr >= ptq->pt->synth_opts.last_branch_sz) {
+ memcpy(&bs_dst->entries[nr],
+ &bs_src->entries[0],
+ sizeof(struct branch_entry) * ptq->last_branch_pos);
+ }
+}
+
+static inline void intel_pt_reset_last_branch_rb(struct intel_pt_queue *ptq)
+{
+ ptq->last_branch_pos = 0;
+ ptq->last_branch_rb->nr = 0;
+}
+
+static void intel_pt_update_last_branch_rb(struct intel_pt_queue *ptq)
+{
+ const struct intel_pt_state *state = ptq->state;
+ struct branch_stack *bs = ptq->last_branch_rb;
+ struct branch_entry *be;
+
+ if (!ptq->last_branch_pos)
+ ptq->last_branch_pos = ptq->pt->synth_opts.last_branch_sz;
+
+ ptq->last_branch_pos -= 1;
+
+ be = &bs->entries[ptq->last_branch_pos];
+ be->from = state->from_ip;
+ be->to = state->to_ip;
+ be->flags.abort = !!(state->flags & INTEL_PT_ABORT_TX);
+ be->flags.in_tx = !!(state->flags & INTEL_PT_IN_TX);
+ /* No support for mispredict */
+
+ if (bs->nr < ptq->pt->synth_opts.last_branch_sz)
+ bs->nr += 1;
+}
+
static int intel_pt_inject_event(union perf_event *event,
struct perf_sample *sample, u64 type,
bool swapped)
@@ -890,6 +962,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
struct intel_pt *pt = ptq->pt;
union perf_event *event = ptq->event_buf;
struct perf_sample sample = { .ip = 0, };
+ struct dummy_branch_stack {
+ u64 nr;
+ struct branch_entry entries;
+ } dummy_bs;
if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
return 0;
@@ -912,6 +988,21 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
+ /*
+ * perf report cannot handle events without a branch stack when using
+ * SORT_MODE__BRANCH so make a dummy one.
+ */
+ if (pt->synth_opts.last_branch && sort__mode == SORT_MODE__BRANCH) {
+ dummy_bs = (struct dummy_branch_stack){
+ .nr = 1,
+ .entries = {
+ .from = sample.ip,
+ .to = sample.addr,
+ },
+ };
+ sample.branch_stack = (struct branch_stack *)&dummy_bs;
+ }
+
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
pt->branches_sample_type,
@@ -961,6 +1052,11 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.callchain = ptq->chain;
}
+ if (pt->synth_opts.last_branch) {
+ intel_pt_copy_last_branch_rb(ptq);
+ sample.branch_stack = ptq->last_branch;
+ }
+
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
pt->instructions_sample_type,
@@ -974,6 +1070,9 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
pr_err("Intel Processor Trace: failed to deliver instruction event, error %d\n",
ret);
+ if (pt->synth_opts.last_branch)
+ intel_pt_reset_last_branch_rb(ptq);
+
return ret;
}
@@ -1008,6 +1107,11 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
sample.callchain = ptq->chain;
}
+ if (pt->synth_opts.last_branch) {
+ intel_pt_copy_last_branch_rb(ptq);
+ sample.branch_stack = ptq->last_branch;
+ }
+
if (pt->synth_opts.inject) {
ret = intel_pt_inject_event(event, &sample,
pt->transactions_sample_type,
@@ -1021,6 +1125,9 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
pr_err("Intel Processor Trace: failed to deliver transaction event, error %d\n",
ret);
+ if (pt->synth_opts.callchain)
+ intel_pt_reset_last_branch_rb(ptq);
+
return ret;
}
@@ -1116,6 +1223,9 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return err;
}
+ if (pt->synth_opts.last_branch)
+ intel_pt_update_last_branch_rb(ptq);
+
if (!pt->sync_switch)
return 0;
@@ -1763,6 +1873,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
pt->instructions_sample_period = attr.sample_period;
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+ if (pt->synth_opts.last_branch)
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
pr_debug("Synthesizing 'instructions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
@@ -1782,6 +1894,8 @@ static int intel_pt_synth_events(struct intel_pt *pt,
attr.sample_period = 1;
if (pt->synth_opts.callchain)
attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+ if (pt->synth_opts.last_branch)
+ attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
pr_debug("Synthesizing 'transactions' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
@@ -1808,6 +1922,7 @@ static int intel_pt_synth_events(struct intel_pt *pt,
attr.sample_period = 1;
attr.sample_type |= PERF_SAMPLE_ADDR;
attr.sample_type &= ~(u64)PERF_SAMPLE_CALLCHAIN;
+ attr.sample_type &= ~(u64)PERF_SAMPLE_BRANCH_STACK;
pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
id, (u64)attr.sample_type);
err = intel_pt_synth_event(session, &attr, id);
--
2.1.0
next prev parent reply other threads:[~2015-09-28 21:21 UTC|newest]
Thread overview: 41+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-28 21:07 [GIT PULL 00/39] perf/core improvements and fixes Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 01/39] perf top: Filter symbols based on __map__is_kernel(map) Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 02/39] perf hists browser: Use the map to determine if a DSO is being used as a kernel Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 03/39] perf tools: Use __map__is_kernel() when synthesizing kernel module mmap records Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 04/39] tools lib api fs: Store tracing mountpoint for better error message Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 05/39] tools build: Add Makefile.include Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 06/39] tools build: Add test for missing include Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 07/39] tools build: Add fixdep dependency helper Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 08/39] tools build: Move dependency copy into function Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 09/39] tools build: Make the fixdep helper part of the build process Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 10/39] perf tools: Rename the 'single_dep' target to 'prepare' Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 11/39] tools build: Build fixdep helper from perf and basic libs Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 12/39] perf auxtrace: Fix 'instructions' period of zero Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 13/39] perf report: Fix sample type validation for synthesized callchains Arnaldo Carvalho de Melo
2015-09-28 21:07 ` [PATCH 14/39] perf intel-pt: Fix potential loop forever Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 15/39] perf intel-pt: Make logging slightly more efficient Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 16/39] perf script: Allow time to be displayed in nanoseconds Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 17/39] perf session: Warn when AUX data has been lost Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 18/39] perf tools: Add more documentation to export-to-postgresql.py script Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 19/39] perf auxtrace: Add option to synthesize branch stacks on samples Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 20/39] perf report: Adjust sample type validation for synthesized branch stacks Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 21/39] perf report: Also do default setup " Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 22/39] perf report: Skip events with null " Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 23/39] perf inject: Set branch stack feature flag when synthesizing " Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 24/39] perf intel-pt: Move branch filter logic Arnaldo Carvalho de Melo
2015-09-28 21:08 ` Arnaldo Carvalho de Melo [this message]
2015-09-28 21:08 ` [PATCH 26/39] perf report: Make max_stack value allow for synthesized callchains Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 27/39] perf hists: Allow for max_stack greater than PERF_MAX_STACK_DEPTH Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 28/39] perf script: Add a setting for maximum stack depth Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 29/39] perf scripting python: Allow for max_stack greater than PERF_MAX_STACK_DEPTH Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 30/39] perf script: Make scripting_max_stack value allow for synthesized callchains Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 31/39] perf evlist: Add perf_evlist__id2evsel_strict() Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 32/39] perf evlist: Add perf_evlist__remove() Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 33/39] perf inject: Remove more aux-related stuff when processing instruction traces Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 34/39] perf inject: Add --strip option to strip out non-synthesized events Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 35/39] perf intel-pt: Add mispred-all config option to aid use with autofdo Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 36/39] perf tools: Adds the config_term callback for different type events Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 37/39] perf tools: Show proper error message for wrong terms of hw/sw events Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 38/39] perf tools: Adds the tracepoint name parsing support Arnaldo Carvalho de Melo
2015-09-28 21:08 ` [PATCH 39/39] perf tools: Enable event_config terms to tracepoint events Arnaldo Carvalho de Melo
2015-09-29 7:47 ` [GIT PULL 00/39] perf/core improvements and fixes Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1443474504-16528-26-git-send-email-acme@kernel.org \
--to=acme@kernel.org \
--cc=acme@redhat.com \
--cc=adrian.hunter@intel.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.