linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>, Thomas Gleixner <tglx@linutronix.de>
Cc: Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Clark Williams <williams@redhat.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Adrian Hunter <adrian.hunter@intel.com>,
	Jiri Olsa <jolsa@redhat.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 05/25] perf intel-pt: Add decoder support for PEBS via PT
Date: Fri, 21 Jun 2019 14:38:11 -0300	[thread overview]
Message-ID: <20190621173831.13780-6-acme@kernel.org> (raw)
In-Reply-To: <20190621173831.13780-1-acme@kernel.org>

From: Adrian Hunter <adrian.hunter@intel.com>

PEBS data is encoded in Block Item Packets (BIP). Populate a new structure
intel_pt_blk_items with the values and, upon a Block End Packet (BEP),
report them as a new Intel PT sample type INTEL_PT_BLK_ITEMS.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/20190610072803.10456-4-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../util/intel-pt-decoder/intel-pt-decoder.c  |  78 +++++++++-
 .../util/intel-pt-decoder/intel-pt-decoder.h  | 137 ++++++++++++++++++
 2 files changed, 214 insertions(+), 1 deletion(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index 2f7791d4034f..f8b71bf2bb4c 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -134,6 +134,9 @@ struct intel_pt_decoder {
 	struct intel_pt_stack stack;
 	enum intel_pt_pkt_state pkt_state;
 	enum intel_pt_pkt_ctx pkt_ctx;
+	enum intel_pt_pkt_ctx prev_pkt_ctx;
+	enum intel_pt_blk_type blk_type;
+	int blk_type_pos;
 	struct intel_pt_pkt packet;
 	struct intel_pt_pkt tnt;
 	int pkt_step;
@@ -167,6 +170,7 @@ struct intel_pt_decoder {
 	bool set_fup_mwait;
 	bool set_fup_pwre;
 	bool set_fup_exstop;
+	bool set_fup_bep;
 	bool sample_cyc;
 	unsigned int fup_tx_flags;
 	unsigned int tx_flags;
@@ -560,6 +564,7 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
 	memcpy(buf + len, decoder->buf, n);
 	len += n;
 
+	decoder->prev_pkt_ctx = decoder->pkt_ctx;
 	ret = intel_pt_get_packet(buf, len, &decoder->packet, &decoder->pkt_ctx);
 	if (ret < (int)old_len) {
 		decoder->next_buf = decoder->buf;
@@ -885,6 +890,7 @@ static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
 				return ret;
 		}
 
+		decoder->prev_pkt_ctx = decoder->pkt_ctx;
 		ret = intel_pt_get_packet(decoder->buf, decoder->len,
 					  &decoder->packet, &decoder->pkt_ctx);
 		if (ret == INTEL_PT_NEED_MORE_BYTES && BITS_PER_LONG == 32 &&
@@ -1124,6 +1130,14 @@ static bool intel_pt_fup_event(struct intel_pt_decoder *decoder)
 		decoder->state.to_ip = 0;
 		ret = true;
 	}
+	if (decoder->set_fup_bep) {
+		decoder->set_fup_bep = false;
+		decoder->state.type |= INTEL_PT_BLK_ITEMS;
+		decoder->state.type &= ~INTEL_PT_BRANCH;
+		decoder->state.from_ip = decoder->ip;
+		decoder->state.to_ip = 0;
+		ret = true;
+	}
 	return ret;
 }
 
@@ -1609,6 +1623,46 @@ static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
 	intel_pt_log_to("Setting timestamp", decoder->timestamp);
 }
 
+static void intel_pt_bbp(struct intel_pt_decoder *decoder)
+{
+	if (decoder->prev_pkt_ctx == INTEL_PT_NO_CTX) {
+		memset(decoder->state.items.mask, 0, sizeof(decoder->state.items.mask));
+		decoder->state.items.is_32_bit = false;
+	}
+	decoder->blk_type = decoder->packet.payload;
+	decoder->blk_type_pos = intel_pt_blk_type_pos(decoder->blk_type);
+	if (decoder->blk_type == INTEL_PT_GP_REGS)
+		decoder->state.items.is_32_bit = decoder->packet.count;
+	if (decoder->blk_type_pos < 0) {
+		intel_pt_log("WARNING: Unknown block type %u\n",
+			     decoder->blk_type);
+	} else if (decoder->state.items.mask[decoder->blk_type_pos]) {
+		intel_pt_log("WARNING: Duplicate block type %u\n",
+			     decoder->blk_type);
+	}
+}
+
+static void intel_pt_bip(struct intel_pt_decoder *decoder)
+{
+	uint32_t id = decoder->packet.count;
+	uint32_t bit = 1 << id;
+	int pos = decoder->blk_type_pos;
+
+	if (pos < 0 || id >= INTEL_PT_BLK_ITEM_ID_CNT) {
+		intel_pt_log("WARNING: Unknown block item %u type %d\n",
+			     id, decoder->blk_type);
+		return;
+	}
+
+	if (decoder->state.items.mask[pos] & bit) {
+		intel_pt_log("WARNING: Duplicate block item %u type %d\n",
+			     id, decoder->blk_type);
+	}
+
+	decoder->state.items.mask[pos] |= bit;
+	decoder->state.items.val[pos][id] = decoder->packet.payload;
+}
+
 /* Walk PSB+ packets when already in sync. */
 static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
 {
@@ -2063,10 +2117,31 @@ static int intel_pt_walk_trace(struct intel_pt_decoder *decoder)
 			return 0;
 
 		case INTEL_PT_BBP:
+			intel_pt_bbp(decoder);
+			break;
+
 		case INTEL_PT_BIP:
+			intel_pt_bip(decoder);
+			break;
+
 		case INTEL_PT_BEP:
+			decoder->state.type = INTEL_PT_BLK_ITEMS;
+			decoder->state.from_ip = decoder->ip;
+			decoder->state.to_ip = 0;
+			return 0;
+
 		case INTEL_PT_BEP_IP:
-			break;
+			err = intel_pt_get_next_packet(decoder);
+			if (err)
+				return err;
+			if (decoder->packet.type == INTEL_PT_FUP) {
+				decoder->set_fup_bep = true;
+				no_tip = true;
+			} else {
+				intel_pt_log_at("ERROR: Missing FUP after BEP",
+						decoder->pos);
+			}
+			goto next;
 
 		default:
 			return intel_pt_bug(decoder);
@@ -2335,6 +2410,7 @@ static int intel_pt_sync_ip(struct intel_pt_decoder *decoder)
 	decoder->set_fup_mwait = false;
 	decoder->set_fup_pwre = false;
 	decoder->set_fup_exstop = false;
+	decoder->set_fup_bep = false;
 
 	if (!decoder->branch_enable) {
 		decoder->pkt_state = INTEL_PT_STATE_IN_SYNC;
diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
index 754efa8b501f..9957f2ccdca8 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.h
@@ -30,6 +30,7 @@ enum intel_pt_sample_type {
 	INTEL_PT_CBR_CHG	= 1 << 8,
 	INTEL_PT_TRACE_BEGIN	= 1 << 9,
 	INTEL_PT_TRACE_END	= 1 << 10,
+	INTEL_PT_BLK_ITEMS	= 1 << 11,
 };
 
 enum intel_pt_period_type {
@@ -61,6 +62,141 @@ enum intel_pt_param_flags {
 	INTEL_PT_FUP_WITH_NLIP	= 1 << 0,
 };
 
+enum intel_pt_blk_type {
+	INTEL_PT_GP_REGS	= 1,
+	INTEL_PT_PEBS_BASIC	= 4,
+	INTEL_PT_PEBS_MEM	= 5,
+	INTEL_PT_LBR_0		= 8,
+	INTEL_PT_LBR_1		= 9,
+	INTEL_PT_LBR_2		= 10,
+	INTEL_PT_XMM		= 16,
+	INTEL_PT_BLK_TYPE_MAX
+};
+
+/*
+ * The block type numbers are not sequential but here they are given sequential
+ * positions to avoid wasting space for array placement.
+ */
+enum intel_pt_blk_type_pos {
+	INTEL_PT_GP_REGS_POS,
+	INTEL_PT_PEBS_BASIC_POS,
+	INTEL_PT_PEBS_MEM_POS,
+	INTEL_PT_LBR_0_POS,
+	INTEL_PT_LBR_1_POS,
+	INTEL_PT_LBR_2_POS,
+	INTEL_PT_XMM_POS,
+	INTEL_PT_BLK_TYPE_CNT
+};
+
+/* Get the array position for a block type */
+static inline int intel_pt_blk_type_pos(enum intel_pt_blk_type blk_type)
+{
+#define BLK_TYPE(bt) [INTEL_PT_##bt] = INTEL_PT_##bt##_POS + 1
+	const int map[INTEL_PT_BLK_TYPE_MAX] = {
+		BLK_TYPE(GP_REGS),
+		BLK_TYPE(PEBS_BASIC),
+		BLK_TYPE(PEBS_MEM),
+		BLK_TYPE(LBR_0),
+		BLK_TYPE(LBR_1),
+		BLK_TYPE(LBR_2),
+		BLK_TYPE(XMM),
+	};
+#undef BLK_TYPE
+
+	return blk_type < INTEL_PT_BLK_TYPE_MAX ? map[blk_type] - 1 : -1;
+}
+
+#define INTEL_PT_BLK_ITEM_ID_CNT	32
+
+/*
+ * Use unions so that the block items can be accessed by name or by array index.
+ * There is an array of 32-bit masks for each block type, which indicate which
+ * values are present. Then arrays of 32 64-bit values for each block type.
+ */
+struct intel_pt_blk_items {
+	union {
+		uint32_t mask[INTEL_PT_BLK_TYPE_CNT];
+		struct {
+			uint32_t has_rflags:1;
+			uint32_t has_rip:1;
+			uint32_t has_rax:1;
+			uint32_t has_rcx:1;
+			uint32_t has_rdx:1;
+			uint32_t has_rbx:1;
+			uint32_t has_rsp:1;
+			uint32_t has_rbp:1;
+			uint32_t has_rsi:1;
+			uint32_t has_rdi:1;
+			uint32_t has_r8:1;
+			uint32_t has_r9:1;
+			uint32_t has_r10:1;
+			uint32_t has_r11:1;
+			uint32_t has_r12:1;
+			uint32_t has_r13:1;
+			uint32_t has_r14:1;
+			uint32_t has_r15:1;
+			uint32_t has_unused_0:14;
+			uint32_t has_ip:1;
+			uint32_t has_applicable_counters:1;
+			uint32_t has_timestamp:1;
+			uint32_t has_unused_1:29;
+			uint32_t has_mem_access_address:1;
+			uint32_t has_mem_aux_info:1;
+			uint32_t has_mem_access_latency:1;
+			uint32_t has_tsx_aux_info:1;
+			uint32_t has_unused_2:28;
+			uint32_t has_lbr_0;
+			uint32_t has_lbr_1;
+			uint32_t has_lbr_2;
+			uint32_t has_xmm;
+		};
+	};
+	union {
+		uint64_t val[INTEL_PT_BLK_TYPE_CNT][INTEL_PT_BLK_ITEM_ID_CNT];
+		struct {
+			struct {
+				uint64_t rflags;
+				uint64_t rip;
+				uint64_t rax;
+				uint64_t rcx;
+				uint64_t rdx;
+				uint64_t rbx;
+				uint64_t rsp;
+				uint64_t rbp;
+				uint64_t rsi;
+				uint64_t rdi;
+				uint64_t r8;
+				uint64_t r9;
+				uint64_t r10;
+				uint64_t r11;
+				uint64_t r12;
+				uint64_t r13;
+				uint64_t r14;
+				uint64_t r15;
+				uint64_t unused_0[INTEL_PT_BLK_ITEM_ID_CNT - 18];
+			};
+			struct {
+				uint64_t ip;
+				uint64_t applicable_counters;
+				uint64_t timestamp;
+				uint64_t unused_1[INTEL_PT_BLK_ITEM_ID_CNT - 3];
+			};
+			struct {
+				uint64_t mem_access_address;
+				uint64_t mem_aux_info;
+				uint64_t mem_access_latency;
+				uint64_t tsx_aux_info;
+				uint64_t unused_2[INTEL_PT_BLK_ITEM_ID_CNT - 4];
+			};
+			uint64_t lbr_0[INTEL_PT_BLK_ITEM_ID_CNT];
+			uint64_t lbr_1[INTEL_PT_BLK_ITEM_ID_CNT];
+			uint64_t lbr_2[INTEL_PT_BLK_ITEM_ID_CNT];
+			uint64_t xmm[INTEL_PT_BLK_ITEM_ID_CNT];
+		};
+	};
+	bool is_32_bit;
+};
+
 struct intel_pt_state {
 	enum intel_pt_sample_type type;
 	int err;
@@ -81,6 +217,7 @@ struct intel_pt_state {
 	enum intel_pt_insn_op insn_op;
 	int insn_len;
 	char insn[INTEL_PT_INSN_BUF_SZ];
+	struct intel_pt_blk_items items;
 };
 
 struct intel_pt_insn;
-- 
2.20.1

  parent reply	other threads:[~2019-06-21 17:38 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2019-06-21 17:38 [GIT PULL] perf/core improvements and fixes Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 01/25] perf tests arm64: Compile tests unconditionally Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 02/25] perf: cs-etm: Optimize option setup for CPU-wide sessions Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 03/25] perf intel-pt: Add new packets for PEBS via PT Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 04/25] perf intel-pt: Add Intel PT packet decoder test Arnaldo Carvalho de Melo
2019-06-21 17:38 ` Arnaldo Carvalho de Melo [this message]
2019-06-21 17:38 ` [PATCH 06/25] perf intel-pt: Prepare to synthesize PEBS samples Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 07/25] perf intel-pt: Factor out common sample preparation for re-use Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 08/25] perf intel-pt: Synthesize PEBS sample basic information Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 09/25] perf intel-pt: Add gp registers to synthesized PEBS sample Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 10/25] perf intel-pt: Add XMM " Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 11/25] perf intel-pt: Add LBR information " Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 12/25] perf intel-pt: Add memory " Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 13/25] perf intel-pt: Add callchain " Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 14/25] tools build: Check if gettid() is available before providing helper Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 15/25] perf trace: Fix exclusion of not available syscall names from selector list Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 16/25] perf trace: Streamline validation of select syscall names list Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 17/25] tools build feature tests: Add missing SPDX headers Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 18/25] perf " Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 19/25] perf pmu: Fix uncore PMU alias list for ARM64 Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 20/25] perf trace: Fixup pointer arithmetic when consuming augmented syscall args Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 21/25] perf evsel: Make perf_evsel__name() accept a NULL argument Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 22/25] perf tools: Don't hardcode host include path for libslang Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 23/25] tools build: Add test to check if slang.h is in /usr/include/slang/ Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 24/25] perf build: Handle slang being in /usr/include and " Arnaldo Carvalho de Melo
2019-06-21 17:38 ` [PATCH 25/25] tools build: Fix the zstd test in the test-all.c common case feature test Arnaldo Carvalho de Melo
2019-06-22  6:28 ` [GIT PULL] perf/core improvements and fixes Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20190621173831.13780-6-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=jolsa@kernel.org \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).