All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org,
	Adrian Hunter <adrian.hunter@intel.com>,
	Jiri Olsa <jolsa@redhat.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 11/22] perf tools: Add Intel PT support for decoding CYC packets
Date: Wed, 26 Aug 2015 12:58:01 -0300	[thread overview]
Message-ID: <1440604692-26918-12-git-send-email-acme@kernel.org> (raw)
In-Reply-To: <1440604692-26918-1-git-send-email-acme@kernel.org>

From: Adrian Hunter <adrian.hunter@intel.com>

CYC packets provide even finer grain timestamp information than MTC and
TSC packets.  A CYC packet contains the number of CPU cycles since the
last CYC packet.

This patch just adds decoder support.  The CPU frequency can be related
to TSC using the Maximum Non-Turbo Ratio in combination with the CBR
(core-to-bus ratio) packet.  However more accuracy is achieved by simply
interpolating the number of cycles between other timing packets like MTC
or TSC.  This patch takes the latter approach.

Support for a default value and validation of values is provided by a
later patch. Also documentation is updated in a separate patch.

For details refer to the June 2015 or later Intel 64 and IA-32
Architectures SDM Chapter 36 Intel Processor Trace.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1437150840-31811-23-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 .../perf/util/intel-pt-decoder/intel-pt-decoder.c  | 311 ++++++++++++++++++++-
 1 file changed, 306 insertions(+), 5 deletions(-)

diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
index f7119a11a4b6..0845c5e6ad1d 100644
--- a/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
+++ b/tools/perf/util/intel-pt-decoder/intel-pt-decoder.c
@@ -88,6 +88,7 @@ struct intel_pt_decoder {
 	bool mtc_insn;
 	bool pge;
 	bool have_tma;
+	bool have_cyc;
 	uint64_t pos;
 	uint64_t last_ip;
 	uint64_t ip;
@@ -98,6 +99,8 @@ struct intel_pt_decoder {
 	uint64_t ret_addr;
 	uint64_t ctc_timestamp;
 	uint64_t ctc_delta;
+	uint64_t cycle_cnt;
+	uint64_t cyc_ref_timestamp;
 	uint32_t last_mtc;
 	uint32_t tsc_ctc_ratio_n;
 	uint32_t tsc_ctc_ratio_d;
@@ -111,8 +114,13 @@ struct intel_pt_decoder {
 	struct intel_pt_pkt tnt;
 	int pkt_step;
 	int pkt_len;
+	int last_packet_type;
 	unsigned int cbr;
 	unsigned int max_non_turbo_ratio;
+	double max_non_turbo_ratio_fp;
+	double cbr_cyc_to_tsc;
+	double calc_cyc_to_tsc;
+	bool have_calc_cyc_to_tsc;
 	int exec_mode;
 	unsigned int insn_bytes;
 	uint64_t sign_bit;
@@ -189,7 +197,8 @@ struct intel_pt_decoder *intel_pt_decoder_new(struct intel_pt_params *params)
 	decoder->period             = params->period;
 	decoder->period_type        = params->period_type;
 
-	decoder->max_non_turbo_ratio = params->max_non_turbo_ratio;
+	decoder->max_non_turbo_ratio    = params->max_non_turbo_ratio;
+	decoder->max_non_turbo_ratio_fp = params->max_non_turbo_ratio;
 
 	intel_pt_setup_period(decoder);
 
@@ -514,10 +523,247 @@ static int intel_pt_get_split_packet(struct intel_pt_decoder *decoder)
 	return ret;
 }
 
+struct intel_pt_pkt_info {
+	struct intel_pt_decoder	  *decoder;
+	struct intel_pt_pkt       packet;
+	uint64_t                  pos;
+	int                       pkt_len;
+	int                       last_packet_type;
+	void                      *data;
+};
+
+typedef int (*intel_pt_pkt_cb_t)(struct intel_pt_pkt_info *pkt_info);
+
+/* Lookahead packets in current buffer */
+static int intel_pt_pkt_lookahead(struct intel_pt_decoder *decoder,
+				  intel_pt_pkt_cb_t cb, void *data)
+{
+	struct intel_pt_pkt_info pkt_info;
+	const unsigned char *buf = decoder->buf;
+	size_t len = decoder->len;
+	int ret;
+
+	pkt_info.decoder          = decoder;
+	pkt_info.pos              = decoder->pos;
+	pkt_info.pkt_len          = decoder->pkt_step;
+	pkt_info.last_packet_type = decoder->last_packet_type;
+	pkt_info.data             = data;
+
+	while (1) {
+		do {
+			pkt_info.pos += pkt_info.pkt_len;
+			buf          += pkt_info.pkt_len;
+			len          -= pkt_info.pkt_len;
+
+			if (!len)
+				return INTEL_PT_NEED_MORE_BYTES;
+
+			ret = intel_pt_get_packet(buf, len, &pkt_info.packet);
+			if (!ret)
+				return INTEL_PT_NEED_MORE_BYTES;
+			if (ret < 0)
+				return ret;
+
+			pkt_info.pkt_len = ret;
+		} while (pkt_info.packet.type == INTEL_PT_PAD);
+
+		ret = cb(&pkt_info);
+		if (ret)
+			return 0;
+
+		pkt_info.last_packet_type = pkt_info.packet.type;
+	}
+}
+
+struct intel_pt_calc_cyc_to_tsc_info {
+	uint64_t        cycle_cnt;
+	unsigned int    cbr;
+	uint32_t        last_mtc;
+	uint64_t        ctc_timestamp;
+	uint64_t        ctc_delta;
+	uint64_t        tsc_timestamp;
+	uint64_t        timestamp;
+	bool            have_tma;
+	bool            from_mtc;
+	double          cbr_cyc_to_tsc;
+};
+
+static int intel_pt_calc_cyc_cb(struct intel_pt_pkt_info *pkt_info)
+{
+	struct intel_pt_decoder *decoder = pkt_info->decoder;
+	struct intel_pt_calc_cyc_to_tsc_info *data = pkt_info->data;
+	uint64_t timestamp;
+	double cyc_to_tsc;
+	unsigned int cbr;
+	uint32_t mtc, mtc_delta, ctc, fc, ctc_rem;
+
+	switch (pkt_info->packet.type) {
+	case INTEL_PT_TNT:
+	case INTEL_PT_TIP_PGE:
+	case INTEL_PT_TIP:
+	case INTEL_PT_FUP:
+	case INTEL_PT_PSB:
+	case INTEL_PT_PIP:
+	case INTEL_PT_MODE_EXEC:
+	case INTEL_PT_MODE_TSX:
+	case INTEL_PT_PSBEND:
+	case INTEL_PT_PAD:
+	case INTEL_PT_VMCS:
+	case INTEL_PT_MNT:
+		return 0;
+
+	case INTEL_PT_MTC:
+		if (!data->have_tma)
+			return 0;
+
+		mtc = pkt_info->packet.payload;
+		if (mtc > data->last_mtc)
+			mtc_delta = mtc - data->last_mtc;
+		else
+			mtc_delta = mtc + 256 - data->last_mtc;
+		data->ctc_delta += mtc_delta << decoder->mtc_shift;
+		data->last_mtc = mtc;
+
+		if (decoder->tsc_ctc_mult) {
+			timestamp = data->ctc_timestamp +
+				data->ctc_delta * decoder->tsc_ctc_mult;
+		} else {
+			timestamp = data->ctc_timestamp +
+				multdiv(data->ctc_delta,
+					decoder->tsc_ctc_ratio_n,
+					decoder->tsc_ctc_ratio_d);
+		}
+
+		if (timestamp < data->timestamp)
+			return 1;
+
+		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+			data->timestamp = timestamp;
+			return 0;
+		}
+
+		break;
+
+	case INTEL_PT_TSC:
+		timestamp = pkt_info->packet.payload |
+			    (data->timestamp & (0xffULL << 56));
+		if (data->from_mtc && timestamp < data->timestamp &&
+		    data->timestamp - timestamp < decoder->tsc_slip)
+			return 1;
+		while (timestamp < data->timestamp)
+			timestamp += (1ULL << 56);
+		if (pkt_info->last_packet_type != INTEL_PT_CYC) {
+			if (data->from_mtc)
+				return 1;
+			data->tsc_timestamp = timestamp;
+			data->timestamp = timestamp;
+			return 0;
+		}
+		break;
+
+	case INTEL_PT_TMA:
+		if (data->from_mtc)
+			return 1;
+
+		if (!decoder->tsc_ctc_ratio_d)
+			return 0;
+
+		ctc = pkt_info->packet.payload;
+		fc = pkt_info->packet.count;
+		ctc_rem = ctc & decoder->ctc_rem_mask;
+
+		data->last_mtc = (ctc >> decoder->mtc_shift) & 0xff;
+
+		data->ctc_timestamp = data->tsc_timestamp - fc;
+		if (decoder->tsc_ctc_mult) {
+			data->ctc_timestamp -= ctc_rem * decoder->tsc_ctc_mult;
+		} else {
+			data->ctc_timestamp -=
+				multdiv(ctc_rem, decoder->tsc_ctc_ratio_n,
+					decoder->tsc_ctc_ratio_d);
+		}
+
+		data->ctc_delta = 0;
+		data->have_tma = true;
+
+		return 0;
+
+	case INTEL_PT_CYC:
+		data->cycle_cnt += pkt_info->packet.payload;
+		return 0;
+
+	case INTEL_PT_CBR:
+		cbr = pkt_info->packet.payload;
+		if (data->cbr && data->cbr != cbr)
+			return 1;
+		data->cbr = cbr;
+		data->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+		return 0;
+
+	case INTEL_PT_TIP_PGD:
+	case INTEL_PT_TRACESTOP:
+	case INTEL_PT_OVF:
+	case INTEL_PT_BAD: /* Does not happen */
+	default:
+		return 1;
+	}
+
+	if (!data->cbr && decoder->cbr) {
+		data->cbr = decoder->cbr;
+		data->cbr_cyc_to_tsc = decoder->cbr_cyc_to_tsc;
+	}
+
+	if (!data->cycle_cnt)
+		return 1;
+
+	cyc_to_tsc = (double)(timestamp - decoder->timestamp) / data->cycle_cnt;
+
+	if (data->cbr && cyc_to_tsc > data->cbr_cyc_to_tsc &&
+	    cyc_to_tsc / data->cbr_cyc_to_tsc > 1.25) {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle too big (c.f. CBR-based value %g), pos " x64_fmt "\n",
+			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+		return 1;
+	}
+
+	decoder->calc_cyc_to_tsc = cyc_to_tsc;
+	decoder->have_calc_cyc_to_tsc = true;
+
+	if (data->cbr) {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. CBR-based value %g, pos " x64_fmt "\n",
+			     cyc_to_tsc, data->cbr_cyc_to_tsc, pkt_info->pos);
+	} else {
+		intel_pt_log("Timestamp: calculated %g TSC ticks per cycle c.f. unknown CBR-based value, pos " x64_fmt "\n",
+			     cyc_to_tsc, pkt_info->pos);
+	}
+
+	return 1;
+}
+
+static void intel_pt_calc_cyc_to_tsc(struct intel_pt_decoder *decoder,
+				     bool from_mtc)
+{
+	struct intel_pt_calc_cyc_to_tsc_info data = {
+		.cycle_cnt      = 0,
+		.cbr            = 0,
+		.last_mtc       = decoder->last_mtc,
+		.ctc_timestamp  = decoder->ctc_timestamp,
+		.ctc_delta      = decoder->ctc_delta,
+		.tsc_timestamp  = decoder->tsc_timestamp,
+		.timestamp      = decoder->timestamp,
+		.have_tma       = decoder->have_tma,
+		.from_mtc       = from_mtc,
+		.cbr_cyc_to_tsc = 0,
+	};
+
+	intel_pt_pkt_lookahead(decoder, intel_pt_calc_cyc_cb, &data);
+}
+
 static int intel_pt_get_next_packet(struct intel_pt_decoder *decoder)
 {
 	int ret;
 
+	decoder->last_packet_type = decoder->packet.type;
+
 	do {
 		decoder->pos += decoder->pkt_step;
 		decoder->buf += decoder->pkt_step;
@@ -954,6 +1200,13 @@ static void intel_pt_calc_tsc_timestamp(struct intel_pt_decoder *decoder)
 		decoder->timestamp_insn_cnt = 0;
 	}
 
+	if (decoder->last_packet_type == INTEL_PT_CYC) {
+		decoder->cyc_ref_timestamp = decoder->timestamp;
+		decoder->cycle_cnt = 0;
+		decoder->have_calc_cyc_to_tsc = false;
+		intel_pt_calc_cyc_to_tsc(decoder, false);
+	}
+
 	intel_pt_log_to("Setting timestamp", decoder->timestamp);
 }
 
@@ -962,6 +1215,7 @@ static int intel_pt_overflow(struct intel_pt_decoder *decoder)
 	intel_pt_log("ERROR: Buffer overflow\n");
 	intel_pt_clear_tx_flags(decoder);
 	decoder->have_tma = false;
+	decoder->cbr = 0;
 	decoder->pkt_state = INTEL_PT_STATE_ERR_RESYNC;
 	decoder->overflow = true;
 	return -EOVERFLOW;
@@ -1026,6 +1280,49 @@ static void intel_pt_calc_mtc_timestamp(struct intel_pt_decoder *decoder)
 
 	decoder->timestamp_insn_cnt = 0;
 	decoder->last_mtc = mtc;
+
+	if (decoder->last_packet_type == INTEL_PT_CYC) {
+		decoder->cyc_ref_timestamp = decoder->timestamp;
+		decoder->cycle_cnt = 0;
+		decoder->have_calc_cyc_to_tsc = false;
+		intel_pt_calc_cyc_to_tsc(decoder, true);
+	}
+}
+
+static void intel_pt_calc_cbr(struct intel_pt_decoder *decoder)
+{
+	unsigned int cbr = decoder->packet.payload;
+
+	if (decoder->cbr == cbr)
+		return;
+
+	decoder->cbr = cbr;
+	decoder->cbr_cyc_to_tsc = decoder->max_non_turbo_ratio_fp / cbr;
+}
+
+static void intel_pt_calc_cyc_timestamp(struct intel_pt_decoder *decoder)
+{
+	uint64_t timestamp = decoder->cyc_ref_timestamp;
+
+	decoder->have_cyc = true;
+
+	decoder->cycle_cnt += decoder->packet.payload;
+
+	if (!decoder->cyc_ref_timestamp)
+		return;
+
+	if (decoder->have_calc_cyc_to_tsc)
+		timestamp += decoder->cycle_cnt * decoder->calc_cyc_to_tsc;
+	else if (decoder->cbr)
+		timestamp += decoder->cycle_cnt * decoder->cbr_cyc_to_tsc;
+	else
+		return;
+
+	if (timestamp < decoder->timestamp)
+		intel_pt_log("Suppressing CYC timestamp " x64_fmt " less than current timestamp " x64_fmt "\n",
+			     timestamp, decoder->timestamp);
+	else
+		decoder->timestamp = timestamp;
 }
 
 /* Walk PSB+ packets when already in sync. */
@@ -1065,7 +1362,7 @@ static int intel_pt_walk_psbend(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_CBR:
-			decoder->cbr = decoder->packet.payload;
+			intel_pt_calc_cbr(decoder);
 			break;
 
 		case INTEL_PT_MODE_EXEC:
@@ -1182,6 +1479,7 @@ static int intel_pt_walk_fup_tip(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
 			break;
 
 		case INTEL_PT_MODE_EXEC:
@@ -1318,10 +1616,11 @@ next:
 			break;
 
 		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
 			break;
 
 		case INTEL_PT_CBR:
-			decoder->cbr = decoder->packet.payload;
+			intel_pt_calc_cbr(decoder);
 			break;
 
 		case INTEL_PT_MODE_EXEC:
@@ -1398,10 +1697,11 @@ static int intel_pt_walk_psb(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
 			break;
 
 		case INTEL_PT_CBR:
-			decoder->cbr = decoder->packet.payload;
+			intel_pt_calc_cbr(decoder);
 			break;
 
 		case INTEL_PT_PIP:
@@ -1493,10 +1793,11 @@ static int intel_pt_walk_to_ip(struct intel_pt_decoder *decoder)
 			break;
 
 		case INTEL_PT_CYC:
+			intel_pt_calc_cyc_timestamp(decoder);
 			break;
 
 		case INTEL_PT_CBR:
-			decoder->cbr = decoder->packet.payload;
+			intel_pt_calc_cbr(decoder);
 			break;
 
 		case INTEL_PT_PIP:
-- 
2.1.0


  parent reply	other threads:[~2015-08-26 16:05 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-08-26 15:57 [GIT PULL 00/22] perf/core improvements and fixes Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 01/22] perf tools: Fix tarball build broken by pt/bts Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 02/22] perf annotate: Reset the dso find_symbol cache when removing symbols Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 03/22] perf ui tui progress: Implement the ui_progress_ops->finish() method Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 04/22] perf ordered_events: Clear the progress bar at the end of a flush Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 05/22] perf tools: Fix Intel PT 'instructions' sample period Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 06/22] perf tools: Add Intel PT support for PSB periods Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 07/22] perf tools: Add new Intel PT packet definitions Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 08/22] perf tools: Pass Intel PT information for decoding MTC and CYC Arnaldo Carvalho de Melo
2015-08-26 15:57 ` [PATCH 09/22] perf tools: Add Intel PT support for decoding MTC packets Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 10/22] perf tools: Add Intel PT support for using " Arnaldo Carvalho de Melo
2015-08-26 15:58 ` Arnaldo Carvalho de Melo [this message]
2015-08-26 15:58 ` [PATCH 12/22] perf tools: Add Intel PT support for using CYC packets Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 13/22] perf tools: Add Intel PT support for decoding TRACESTOP packets Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 14/22] perf tools: Update Intel PT documentation Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 15/22] perf probe: Prevent segfault when reading probe point with absolute address Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 16/22] perf tools: Remove export.h from MANIFEST Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 17/22] tools build: Allow duplicate objects in the object list Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 18/22] perf probe: Fix list result when symbol can't be found Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 19/22] perf probe: Fix list result when address is zero Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 20/22] perf probe: Fix error reported when offset without function Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 21/22] perf probe: Support probing at absolute address Arnaldo Carvalho de Melo
2015-08-26 15:58 ` [PATCH 22/22] tracing/uprobes: Do not print '0x (null)' when offset is 0 Arnaldo Carvalho de Melo
2015-08-28  6:24 ` [GIT PULL 00/22] perf/core improvements and fixes Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1440604692-26918-12-git-send-email-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.