From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org, Andi Kleen <ak@linux.intel.com>,
Adrian Hunter <adrian.hunter@intel.com>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 02/37] perf intel-pt/bts: Report instruction bytes and length in sample
Date: Mon, 24 Oct 2016 13:20:22 -0300 [thread overview]
Message-ID: <1477326057-24080-3-git-send-email-acme@kernel.org> (raw)
In-Reply-To: <1477326057-24080-1-git-send-email-acme@kernel.org>
From: Andi Kleen <ak@linux.intel.com>
Change Intel PT and BTS to pass up the length and the instruction
bytes of the decoded or sampled instruction in the perf sample.
The decoder already knows this information, we just need to pass it
up. Since it is only a couple of movs it is not very expensive.
Handle instruction cache too. Make sure ilen is always initialized.
Used in the next patch.
[Adrian: re-base on top (and adjust for) instruction buffer size tidy-up]
[Adrian: add BTS support and adjust commit message accordingly]
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Link: http://lkml.kernel.org/r/1475847747-30994-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/util/event.h | 3 +++
tools/perf/util/intel-bts.c | 1 +
tools/perf/util/intel-pt-decoder/intel-pt-decoder.c | 2 ++
tools/perf/util/intel-pt-decoder/intel-pt-decoder.h | 1 +
tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 +-
tools/perf/util/intel-pt.c | 11 +++++++++++
6 files changed, 19 insertions(+), 1 deletion(-)
diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h
index 8d363d5e65a2..c735c53a26f8 100644
--- a/tools/perf/util/event.h
+++ b/tools/perf/util/event.h
@@ -177,6 +177,8 @@ enum {
PERF_IP_FLAG_TRACE_BEGIN |\
PERF_IP_FLAG_TRACE_END)
+#define MAX_INSN 16
+
struct perf_sample {
u64 ip;
u32 pid, tid;
@@ -193,6 +195,7 @@ struct perf_sample {
u32 flags;
u16 insn_len;
u8 cpumode;
+ char insn[MAX_INSN];
void *raw_data;
struct ip_callchain *callchain;
struct branch_stack *branch_stack;
diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index 8bc7fec817d7..6c2eb5da4afc 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -295,6 +295,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
sample.cpu = btsq->cpu;
sample.flags = btsq->sample_flags;
sample.insn_len = btsq->intel_pt_insn.length;
+ memcpy(sample.insn, btsq->intel_pt_insn.buf, INTEL_PT_INSN_BUF_SZ);
if (bts->synth_opts.inject) {
event.sample.header.size = bts->branches_event_size;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 16c06d3ae577..e4e7dc781d21 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -980,6 +980,8 @@ static int intel_pt_walk_insn(struct intel_pt_decoder *decoder,
out_no_progress:
decoder->state.insn_op = intel_pt_insn->op;
decoder->state.insn_len = intel_pt_insn->length;
+ memcpy(decoder->state.insn, intel_pt_insn->buf,
+ INTEL_PT_INSN_BUF_SZ);
if (decoder->tx_flags & INTEL_PT_IN_TX)
decoder->state.flags |= INTEL_PT_IN_TX;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 89399985fa4d..e90619a43c0c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -66,6 +66,7 @@ struct intel_pt_state {
uint32_t flags;
enum intel_pt_insn_op insn_op;
int insn_len;
+ char insn[INTEL_PT_INSN_BUF_SZ];
};
struct intel_pt_insn;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
index 5f95cd442075..7913363bde5c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c
@@ -27,7 +27,7 @@
#include "intel-pt-insn-decoder.h"
-#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE
+#if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN
#error Instruction buffer size too small
#endif
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 815a14d8904b..85d5eeb66c75 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -143,6 +143,7 @@ struct intel_pt_queue {
u32 flags;
u16 insn_len;
u64 last_insn_cnt;
+ char insn[INTEL_PT_INSN_BUF_SZ];
};
static void intel_pt_dump(struct intel_pt *pt __maybe_unused,
@@ -315,6 +316,7 @@ struct intel_pt_cache_entry {
enum intel_pt_insn_branch branch;
int length;
int32_t rel;
+ char insn[INTEL_PT_INSN_BUF_SZ];
};
static int intel_pt_config_div(const char *var, const char *value, void *data)
@@ -400,6 +402,7 @@ static int intel_pt_cache_add(struct dso *dso, struct machine *machine,
e->branch = intel_pt_insn->branch;
e->length = intel_pt_insn->length;
e->rel = intel_pt_insn->rel;
+ memcpy(e->insn, intel_pt_insn->buf, INTEL_PT_INSN_BUF_SZ);
err = auxtrace_cache__add(c, offset, &e->entry);
if (err)
@@ -436,6 +439,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
u64 insn_cnt = 0;
bool one_map = true;
+ intel_pt_insn->length = 0;
+
if (to_ip && *ip == to_ip)
goto out_no_cache;
@@ -475,6 +480,8 @@ static int intel_pt_walk_next_insn(struct intel_pt_insn *intel_pt_insn,
intel_pt_insn->branch = e->branch;
intel_pt_insn->length = e->length;
intel_pt_insn->rel = e->rel;
+ memcpy(intel_pt_insn->buf, e->insn,
+ INTEL_PT_INSN_BUF_SZ);
intel_pt_log_insn_no_data(intel_pt_insn, *ip);
return 0;
}
@@ -898,6 +905,7 @@ static void intel_pt_sample_flags(struct intel_pt_queue *ptq)
if (ptq->state->flags & INTEL_PT_IN_TX)
ptq->flags |= PERF_IP_FLAG_IN_TX;
ptq->insn_len = ptq->state->insn_len;
+ memcpy(ptq->insn, ptq->state->insn, INTEL_PT_INSN_BUF_SZ);
}
}
@@ -1078,6 +1086,7 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
sample.cpu = ptq->cpu;
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
+ memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
/*
* perf report cannot handle events without a branch stack when using
@@ -1139,6 +1148,7 @@ static int intel_pt_synth_instruction_sample(struct intel_pt_queue *ptq)
sample.cpu = ptq->cpu;
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
+ memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
ptq->last_insn_cnt = ptq->state->tot_insn_cnt;
@@ -1201,6 +1211,7 @@ static int intel_pt_synth_transaction_sample(struct intel_pt_queue *ptq)
sample.cpu = ptq->cpu;
sample.flags = ptq->flags;
sample.insn_len = ptq->insn_len;
+ memcpy(sample.insn, ptq->insn, INTEL_PT_INSN_BUF_SZ);
if (pt->synth_opts.callchain) {
thread_stack__sample(ptq->thread, ptq->chain,
--
2.7.4
next prev parent reply other threads:[~2016-10-24 16:21 UTC|newest]
Thread overview: 42+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-10-24 16:20 [GIT PULL 00/37] perf/core improvements and fixes Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 01/37] perf intel-pt/bts: Tidy instruction buffer size usage Arnaldo Carvalho de Melo
2016-10-24 16:20 ` Arnaldo Carvalho de Melo [this message]
2016-10-24 16:20 ` [PATCH 03/37] perf script: Support insn and insnlen Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 04/37] perf tools: Sync copy of x86's syscall table Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 05/37] tools lib traceevent: Add install_headers target Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 06/37] tools lib traceevent: Add do_install_mkdir Makefile function Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 07/37] tools lib traceevent: Rename LIB_FILE to LIB_TARGET Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 08/37] tools lib traceevent: Add version for traceevent shared object Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 09/37] tools lib: Add for_each_clear_bit macro Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 10/37] perf report: Move captured info to generic header info Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 11/37] perf header: Display missing features Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 12/37] perf header: Display feature name on write failure Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 13/37] perf record: Improve documentation of event parameters Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 14/37] perf tools: Implement branch_type event parameter Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 15/37] perf jit: Avoid returning garbage for a ret variable Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 16/37] perf jit: Add NT_GNU_BUILD_ID definition for older distros Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 17/37] perf jit: Improve error messages from JVMTI Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 18/37] perf jit: Enable jitdump support without dwarf Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 19/37] perf jit: Remove unecessary padding in jitdump file Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 20/37] perf jit: Make perf skip unknown records Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 21/37] perf jit: Do not assume pgoff is zero Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 22/37] perf jit: Add unwinding support Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 23/37] perf jit: Generate .eh_frame/.eh_frame_hdr in DSO Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 24/37] perf jit: Check JITHEADER_VERSION Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 25/37] perf jit: Add jitdump format specification document Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 26/37] perf pmu: Only print Using CPUID message once Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 27/37] perf tools: Fix typo "No enough" to "Not enough" Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 28/37] perf hists browser: Dynamically change verbosity level Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 29/37] perf trace: Implement --delay Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 30/37] perf bench mem: Move boilerplate memory allocation to the infrastructure Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 31/37] perf tools: Normalize sq_quote_argv() error reporting Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 32/37] perf tools: Use normal error reporting when processing PERF_RECORD_READ events Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 33/37] perf bench futex: Cache align the worker struct Arnaldo Carvalho de Melo
2016-10-24 18:50 ` Davidlohr Bueso
2016-10-24 18:53 ` Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 34/37] perf trace: Remove thread_trace->exit_time Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 35/37] perf trace: Use the syscall raw_syscalls:sys_enter timestamp Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 36/37] perf list: Make vendor event matching case insensitive Arnaldo Carvalho de Melo
2016-10-24 16:20 ` [PATCH 37/37] perf coresight: Removing miscellaneous debug output Arnaldo Carvalho de Melo
2016-10-24 16:20 ` Arnaldo Carvalho de Melo
2016-10-24 18:44 ` [GIT PULL 00/37] perf/core improvements and fixes Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1477326057-24080-3-git-send-email-acme@kernel.org \
--to=acme@kernel.org \
--cc=acme@redhat.com \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.