All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH v5 RESEND] perf: Support for Arm A32/T32 instruction sets in CoreSight trace
@ 2018-12-03 12:18 ` Robert Walker
  0 siblings, 0 replies; 6+ messages in thread
From: Robert Walker @ 2018-12-03 12:18 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Mathieu Poirier, Peter Zijlstra, CoreSight, linux-kernel,
	Alexander Shishkin, Ingo Molnar, Namhyung Kim, Robert Walker,
	Jiri Olsa, linux-arm-kernel

This patch adds support for generating instruction samples from trace of
AArch32 programs using the A32 and T32 instruction sets.

T32 has variable 2 or 4 byte instruction size, so the conversion between
addresses and instruction counts requires extra information from the trace
decoder, requiring version 0.10.0 of OpenCSD.  A check for the OpenCSD
library version has been added to the feature check for OpenCSD.

Signed-off-by: Robert Walker <robert.walker@arm.com>
---

Hi Arnaldo,

Please add this patch to the queue for 4.21 (originally sent as https://lkml.org/lkml/2018/11/9/965)

Mathieu has reviewed it: https://lkml.org/lkml/2018/11/9/1432

Thanks

Robert Walker


Changes since v4:
 Formatting of comment block

 tools/build/feature/test-libopencsd.c           |  8 +++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++
 tools/perf/util/cs-etm.c                        | 70 +++++++++++--------------
 4 files changed, 78 insertions(+), 39 deletions(-)

diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 5ff1246..d68eb4f 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -1,6 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <opencsd/c_api/opencsd_c_api.h>
 
+/*
+ * Check OpenCSD library version is sufficient to provide required features
+ */
+#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
+#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
+#error "OpenCSD >= 0.10.0 is required"
+#endif
+
 int main(void)
 {
 	(void)ocsd_get_version();
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 938def6..5efb616 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
 	decoder->tail = 0;
 	decoder->packet_count = 0;
 	for (i = 0; i < MAX_BUFFER; i++) {
+		decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
 		decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
 		decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+		decoder->packet_buffer[i].instr_count = 0;
 		decoder->packet_buffer[i].last_instr_taken_branch = false;
+		decoder->packet_buffer[i].last_instr_size = 0;
 		decoder->packet_buffer[i].exc = false;
 		decoder->packet_buffer[i].exc_ret = false;
 		decoder->packet_buffer[i].cpu = INT_MIN;
@@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
 	decoder->packet_count++;
 
 	decoder->packet_buffer[et].sample_type = sample_type;
+	decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
 	decoder->packet_buffer[et].exc = false;
 	decoder->packet_buffer[et].exc_ret = false;
 	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
 	decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
 	decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+	decoder->packet_buffer[et].instr_count = 0;
+	decoder->packet_buffer[et].last_instr_taken_branch = false;
+	decoder->packet_buffer[et].last_instr_size = 0;
 
 	if (decoder->packet_count == MAX_BUFFER - 1)
 		return OCSD_RESP_WAIT;
@@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 
 	packet = &decoder->packet_buffer[decoder->tail];
 
+	switch (elem->isa) {
+	case ocsd_isa_aarch64:
+		packet->isa = CS_ETM_ISA_A64;
+		break;
+	case ocsd_isa_arm:
+		packet->isa = CS_ETM_ISA_A32;
+		break;
+	case ocsd_isa_thumb2:
+		packet->isa = CS_ETM_ISA_T32;
+		break;
+	case ocsd_isa_tee:
+	case ocsd_isa_jazelle:
+	case ocsd_isa_custom:
+	case ocsd_isa_unknown:
+	default:
+		packet->isa = CS_ETM_ISA_UNKNOWN;
+	}
+
 	packet->start_addr = elem->st_addr;
 	packet->end_addr = elem->en_addr;
+	packet->instr_count = elem->num_instr_range;
+
 	switch (elem->last_i_type) {
 	case OCSD_INSTR_BR:
 	case OCSD_INSTR_BR_INDIRECT:
@@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 		break;
 	}
 
+	packet->last_instr_size = elem->last_instr_sz;
+
 	return ret;
 }
 
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 612b575..9351bd1 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -28,11 +28,21 @@ enum cs_etm_sample_type {
 	CS_ETM_TRACE_ON = 1 << 1,
 };
 
+enum cs_etm_isa {
+	CS_ETM_ISA_UNKNOWN,
+	CS_ETM_ISA_A64,
+	CS_ETM_ISA_A32,
+	CS_ETM_ISA_T32,
+};
+
 struct cs_etm_packet {
 	enum cs_etm_sample_type sample_type;
+	enum cs_etm_isa isa;
 	u64 start_addr;
 	u64 end_addr;
+	u32 instr_count;
 	u8 last_instr_taken_branch;
+	u8 last_instr_size;
 	u8 exc;
 	u8 exc_ret;
 	int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 73430b7..48ad217 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -31,14 +31,6 @@
 
 #define MAX_TIMESTAMP (~0ULL)
 
-/*
- * A64 instructions are always 4 bytes
- *
- * Only A64 is supported, so can use this constant for converting between
- * addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
-
 struct cs_etm_auxtrace {
 	struct auxtrace auxtrace;
 	struct auxtrace_queues queues;
@@ -510,21 +502,17 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
 	etmq->last_branch_rb->nr = 0;
 }
 
-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
-{
-	/* Returns 0 for the CS_ETM_TRACE_ON packet */
-	if (packet->sample_type == CS_ETM_TRACE_ON)
-		return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+					 u64 addr) {
+	u8 instrBytes[2];
 
+	cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
 	/*
-	 * The packet records the execution range with an exclusive end address
-	 *
-	 * A64 instructions are constant size, so the last executed
-	 * instruction is A64_INSTR_SIZE before the end address
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
+	 * T32 instruction size is indicated by bits[15:11] of the first
+	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+	 * denote a 32-bit instruction.
 	 */
-	return packet->end_addr - A64_INSTR_SIZE;
+	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
 }
 
 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
@@ -536,27 +524,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
 	return packet->start_addr;
 }
 
-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
 {
-	/*
-	 * Only A64 instructions are currently supported, so can get
-	 * instruction count by dividing.
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
-	 */
-	return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+	/* Returns 0 for the CS_ETM_TRACE_ON packet */
+	if (packet->sample_type == CS_ETM_TRACE_ON)
+		return 0;
+
+	return packet->end_addr - packet->last_instr_size;
 }
 
-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+				     const struct cs_etm_packet *packet,
 				     u64 offset)
 {
-	/*
-	 * Only A64 instructions are currently supported, so can get
-	 * instruction address by muliplying.
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
-	 */
-	return packet->start_addr + offset * A64_INSTR_SIZE;
+	if (packet->isa == CS_ETM_ISA_T32) {
+		u64 addr = packet->start_addr;
+
+		while (offset > 0) {
+			addr += cs_etm__t32_instr_size(etmq, addr);
+			offset--;
+		}
+		return addr;
+	}
+
+	/* Assume a 4 byte instruction size (A32/A64) */
+	return packet->start_addr + offset * 4;
 }
 
 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -888,9 +881,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 	struct cs_etm_auxtrace *etm = etmq->etm;
 	struct cs_etm_packet *tmp;
 	int ret;
-	u64 instrs_executed;
+	u64 instrs_executed = etmq->packet->instr_count;
 
-	instrs_executed = cs_etm__instr_count(etmq->packet);
 	etmq->period_instructions += instrs_executed;
 
 	/*
@@ -920,7 +912,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 		 * executed, but PC has not advanced to next instruction)
 		 */
 		u64 offset = (instrs_executed - instrs_over - 1);
-		u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+		u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
 
 		ret = cs_etm__synth_instruction_sample(
 			etmq, addr, etm->instructions_sample_period);
-- 
2.7.4


_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v5 RESEND] perf: Support for Arm A32/T32 instruction sets in CoreSight trace
@ 2018-12-03 12:18 ` Robert Walker
  0 siblings, 0 replies; 6+ messages in thread
From: Robert Walker @ 2018-12-03 12:18 UTC (permalink / raw)
  To: Arnaldo Carvalho de Melo
  Cc: Robert Walker, Mathieu Poirier, Peter Zijlstra, Ingo Molnar,
	Alexander Shishkin, Jiri Olsa, Namhyung Kim, linux-kernel,
	linux-arm-kernel, CoreSight

This patch adds support for generating instruction samples from trace of
AArch32 programs using the A32 and T32 instruction sets.

T32 has variable 2 or 4 byte instruction size, so the conversion between
addresses and instruction counts requires extra information from the trace
decoder, requiring version 0.10.0 of OpenCSD.  A check for the OpenCSD
library version has been added to the feature check for OpenCSD.

Signed-off-by: Robert Walker <robert.walker@arm.com>
---

Hi Arnaldo,

Please add this patch to the queue for 4.21 (originally sent as https://lkml.org/lkml/2018/11/9/965)

Mathieu has reviewed it: https://lkml.org/lkml/2018/11/9/1432

Thanks

Robert Walker


Changes since v4:
 Formatting of comment block

 tools/build/feature/test-libopencsd.c           |  8 +++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++
 tools/perf/util/cs-etm.c                        | 70 +++++++++++--------------
 4 files changed, 78 insertions(+), 39 deletions(-)

diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 5ff1246..d68eb4f 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -1,6 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <opencsd/c_api/opencsd_c_api.h>
 
+/*
+ * Check OpenCSD library version is sufficient to provide required features
+ */
+#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
+#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
+#error "OpenCSD >= 0.10.0 is required"
+#endif
+
 int main(void)
 {
 	(void)ocsd_get_version();
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 938def6..5efb616 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
 	decoder->tail = 0;
 	decoder->packet_count = 0;
 	for (i = 0; i < MAX_BUFFER; i++) {
+		decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
 		decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
 		decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+		decoder->packet_buffer[i].instr_count = 0;
 		decoder->packet_buffer[i].last_instr_taken_branch = false;
+		decoder->packet_buffer[i].last_instr_size = 0;
 		decoder->packet_buffer[i].exc = false;
 		decoder->packet_buffer[i].exc_ret = false;
 		decoder->packet_buffer[i].cpu = INT_MIN;
@@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
 	decoder->packet_count++;
 
 	decoder->packet_buffer[et].sample_type = sample_type;
+	decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
 	decoder->packet_buffer[et].exc = false;
 	decoder->packet_buffer[et].exc_ret = false;
 	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
 	decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
 	decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+	decoder->packet_buffer[et].instr_count = 0;
+	decoder->packet_buffer[et].last_instr_taken_branch = false;
+	decoder->packet_buffer[et].last_instr_size = 0;
 
 	if (decoder->packet_count == MAX_BUFFER - 1)
 		return OCSD_RESP_WAIT;
@@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 
 	packet = &decoder->packet_buffer[decoder->tail];
 
+	switch (elem->isa) {
+	case ocsd_isa_aarch64:
+		packet->isa = CS_ETM_ISA_A64;
+		break;
+	case ocsd_isa_arm:
+		packet->isa = CS_ETM_ISA_A32;
+		break;
+	case ocsd_isa_thumb2:
+		packet->isa = CS_ETM_ISA_T32;
+		break;
+	case ocsd_isa_tee:
+	case ocsd_isa_jazelle:
+	case ocsd_isa_custom:
+	case ocsd_isa_unknown:
+	default:
+		packet->isa = CS_ETM_ISA_UNKNOWN;
+	}
+
 	packet->start_addr = elem->st_addr;
 	packet->end_addr = elem->en_addr;
+	packet->instr_count = elem->num_instr_range;
+
 	switch (elem->last_i_type) {
 	case OCSD_INSTR_BR:
 	case OCSD_INSTR_BR_INDIRECT:
@@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 		break;
 	}
 
+	packet->last_instr_size = elem->last_instr_sz;
+
 	return ret;
 }
 
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 612b575..9351bd1 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -28,11 +28,21 @@ enum cs_etm_sample_type {
 	CS_ETM_TRACE_ON = 1 << 1,
 };
 
+enum cs_etm_isa {
+	CS_ETM_ISA_UNKNOWN,
+	CS_ETM_ISA_A64,
+	CS_ETM_ISA_A32,
+	CS_ETM_ISA_T32,
+};
+
 struct cs_etm_packet {
 	enum cs_etm_sample_type sample_type;
+	enum cs_etm_isa isa;
 	u64 start_addr;
 	u64 end_addr;
+	u32 instr_count;
 	u8 last_instr_taken_branch;
+	u8 last_instr_size;
 	u8 exc;
 	u8 exc_ret;
 	int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 73430b7..48ad217 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -31,14 +31,6 @@
 
 #define MAX_TIMESTAMP (~0ULL)
 
-/*
- * A64 instructions are always 4 bytes
- *
- * Only A64 is supported, so can use this constant for converting between
- * addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
-
 struct cs_etm_auxtrace {
 	struct auxtrace auxtrace;
 	struct auxtrace_queues queues;
@@ -510,21 +502,17 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
 	etmq->last_branch_rb->nr = 0;
 }
 
-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
-{
-	/* Returns 0 for the CS_ETM_TRACE_ON packet */
-	if (packet->sample_type == CS_ETM_TRACE_ON)
-		return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+					 u64 addr) {
+	u8 instrBytes[2];
 
+	cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
 	/*
-	 * The packet records the execution range with an exclusive end address
-	 *
-	 * A64 instructions are constant size, so the last executed
-	 * instruction is A64_INSTR_SIZE before the end address
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
+	 * T32 instruction size is indicated by bits[15:11] of the first
+	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+	 * denote a 32-bit instruction.
 	 */
-	return packet->end_addr - A64_INSTR_SIZE;
+	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
 }
 
 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
@@ -536,27 +524,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
 	return packet->start_addr;
 }
 
-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
 {
-	/*
-	 * Only A64 instructions are currently supported, so can get
-	 * instruction count by dividing.
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
-	 */
-	return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+	/* Returns 0 for the CS_ETM_TRACE_ON packet */
+	if (packet->sample_type == CS_ETM_TRACE_ON)
+		return 0;
+
+	return packet->end_addr - packet->last_instr_size;
 }
 
-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+				     const struct cs_etm_packet *packet,
 				     u64 offset)
 {
-	/*
-	 * Only A64 instructions are currently supported, so can get
-	 * instruction address by muliplying.
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
-	 */
-	return packet->start_addr + offset * A64_INSTR_SIZE;
+	if (packet->isa == CS_ETM_ISA_T32) {
+		u64 addr = packet->start_addr;
+
+		while (offset > 0) {
+			addr += cs_etm__t32_instr_size(etmq, addr);
+			offset--;
+		}
+		return addr;
+	}
+
+	/* Assume a 4 byte instruction size (A32/A64) */
+	return packet->start_addr + offset * 4;
 }
 
 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -888,9 +881,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 	struct cs_etm_auxtrace *etm = etmq->etm;
 	struct cs_etm_packet *tmp;
 	int ret;
-	u64 instrs_executed;
+	u64 instrs_executed = etmq->packet->instr_count;
 
-	instrs_executed = cs_etm__instr_count(etmq->packet);
 	etmq->period_instructions += instrs_executed;
 
 	/*
@@ -920,7 +912,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 		 * executed, but PC has not advanced to next instruction)
 		 */
 		u64 offset = (instrs_executed - instrs_over - 1);
-		u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+		u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
 
 		ret = cs_etm__synth_instruction_sample(
 			etmq, addr, etm->instructions_sample_period);
-- 
2.7.4


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v5 RESEND] perf: Support for Arm A32/T32 instruction sets in CoreSight trace
  2018-12-03 12:18 ` Robert Walker
@ 2018-12-03 14:53   ` Arnaldo Carvalho de Melo
  -1 siblings, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2018-12-03 14:53 UTC (permalink / raw)
  To: Robert Walker
  Cc: Mathieu Poirier, Peter Zijlstra, CoreSight, linux-kernel,
	Alexander Shishkin, Ingo Molnar, Namhyung Kim, Jiri Olsa,
	linux-arm-kernel

Em Mon, Dec 03, 2018 at 12:18:46PM +0000, Robert Walker escreveu:
> This patch adds support for generating instruction samples from trace of
> AArch32 programs using the A32 and T32 instruction sets.
> 
> T32 has variable 2 or 4 byte instruction size, so the conversion between
> addresses and instruction counts requires extra information from the trace
> decoder, requiring version 0.10.0 of OpenCSD.  A check for the OpenCSD
> library version has been added to the feature check for OpenCSD.
> 
> Signed-off-by: Robert Walker <robert.walker@arm.com>
> ---
> 
> Hi Arnaldo,
> 
> Please add this patch to the queue for 4.21 (originally sent as https://lkml.org/lkml/2018/11/9/965)
> 
> Mathieu has reviewed it: https://lkml.org/lkml/2018/11/9/1432

Ok, and reading responses to this patch in previous submissions I notice
that it got some Reviewed-by and Tested-by, please collect those tags
when reposting, that of course, if the patch contents are the same.

Thanks, applied.

- Arnaldo
 
> Thanks
> 
> Robert Walker
> 
> 
> Changes since v4:
>  Formatting of comment block
> 
>  tools/build/feature/test-libopencsd.c           |  8 +++
>  tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++
>  tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++
>  tools/perf/util/cs-etm.c                        | 70 +++++++++++--------------
>  4 files changed, 78 insertions(+), 39 deletions(-)
> 
> diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
> index 5ff1246..d68eb4f 100644
> --- a/tools/build/feature/test-libopencsd.c
> +++ b/tools/build/feature/test-libopencsd.c
> @@ -1,6 +1,14 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <opencsd/c_api/opencsd_c_api.h>
>  
> +/*
> + * Check OpenCSD library version is sufficient to provide required features
> + */
> +#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
> +#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
> +#error "OpenCSD >= 0.10.0 is required"
> +#endif
> +
>  int main(void)
>  {
>  	(void)ocsd_get_version();
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 938def6..5efb616 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
>  	decoder->tail = 0;
>  	decoder->packet_count = 0;
>  	for (i = 0; i < MAX_BUFFER; i++) {
> +		decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
>  		decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
>  		decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
> +		decoder->packet_buffer[i].instr_count = 0;
>  		decoder->packet_buffer[i].last_instr_taken_branch = false;
> +		decoder->packet_buffer[i].last_instr_size = 0;
>  		decoder->packet_buffer[i].exc = false;
>  		decoder->packet_buffer[i].exc_ret = false;
>  		decoder->packet_buffer[i].cpu = INT_MIN;
> @@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
>  	decoder->packet_count++;
>  
>  	decoder->packet_buffer[et].sample_type = sample_type;
> +	decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
>  	decoder->packet_buffer[et].exc = false;
>  	decoder->packet_buffer[et].exc_ret = false;
>  	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
>  	decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
>  	decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
> +	decoder->packet_buffer[et].instr_count = 0;
> +	decoder->packet_buffer[et].last_instr_taken_branch = false;
> +	decoder->packet_buffer[et].last_instr_size = 0;
>  
>  	if (decoder->packet_count == MAX_BUFFER - 1)
>  		return OCSD_RESP_WAIT;
> @@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
>  
>  	packet = &decoder->packet_buffer[decoder->tail];
>  
> +	switch (elem->isa) {
> +	case ocsd_isa_aarch64:
> +		packet->isa = CS_ETM_ISA_A64;
> +		break;
> +	case ocsd_isa_arm:
> +		packet->isa = CS_ETM_ISA_A32;
> +		break;
> +	case ocsd_isa_thumb2:
> +		packet->isa = CS_ETM_ISA_T32;
> +		break;
> +	case ocsd_isa_tee:
> +	case ocsd_isa_jazelle:
> +	case ocsd_isa_custom:
> +	case ocsd_isa_unknown:
> +	default:
> +		packet->isa = CS_ETM_ISA_UNKNOWN;
> +	}
> +
>  	packet->start_addr = elem->st_addr;
>  	packet->end_addr = elem->en_addr;
> +	packet->instr_count = elem->num_instr_range;
> +
>  	switch (elem->last_i_type) {
>  	case OCSD_INSTR_BR:
>  	case OCSD_INSTR_BR_INDIRECT:
> @@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
>  		break;
>  	}
>  
> +	packet->last_instr_size = elem->last_instr_sz;
> +
>  	return ret;
>  }
>  
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> index 612b575..9351bd1 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> @@ -28,11 +28,21 @@ enum cs_etm_sample_type {
>  	CS_ETM_TRACE_ON = 1 << 1,
>  };
>  
> +enum cs_etm_isa {
> +	CS_ETM_ISA_UNKNOWN,
> +	CS_ETM_ISA_A64,
> +	CS_ETM_ISA_A32,
> +	CS_ETM_ISA_T32,
> +};
> +
>  struct cs_etm_packet {
>  	enum cs_etm_sample_type sample_type;
> +	enum cs_etm_isa isa;
>  	u64 start_addr;
>  	u64 end_addr;
> +	u32 instr_count;
>  	u8 last_instr_taken_branch;
> +	u8 last_instr_size;
>  	u8 exc;
>  	u8 exc_ret;
>  	int cpu;
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 73430b7..48ad217 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -31,14 +31,6 @@
>  
>  #define MAX_TIMESTAMP (~0ULL)
>  
> -/*
> - * A64 instructions are always 4 bytes
> - *
> - * Only A64 is supported, so can use this constant for converting between
> - * addresses and instruction counts, calculting offsets etc
> - */
> -#define A64_INSTR_SIZE 4
> -
>  struct cs_etm_auxtrace {
>  	struct auxtrace auxtrace;
>  	struct auxtrace_queues queues;
> @@ -510,21 +502,17 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
>  	etmq->last_branch_rb->nr = 0;
>  }
>  
> -static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
> -{
> -	/* Returns 0 for the CS_ETM_TRACE_ON packet */
> -	if (packet->sample_type == CS_ETM_TRACE_ON)
> -		return 0;
> +static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
> +					 u64 addr) {
> +	u8 instrBytes[2];
>  
> +	cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
>  	/*
> -	 * The packet records the execution range with an exclusive end address
> -	 *
> -	 * A64 instructions are constant size, so the last executed
> -	 * instruction is A64_INSTR_SIZE before the end address
> -	 * Will need to do instruction level decode for T32 instructions as
> -	 * they can be variable size (not yet supported).
> +	 * T32 instruction size is indicated by bits[15:11] of the first
> +	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
> +	 * denote a 32-bit instruction.
>  	 */
> -	return packet->end_addr - A64_INSTR_SIZE;
> +	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
>  }
>  
>  static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
> @@ -536,27 +524,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
>  	return packet->start_addr;
>  }
>  
> -static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
> +static inline
> +u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
>  {
> -	/*
> -	 * Only A64 instructions are currently supported, so can get
> -	 * instruction count by dividing.
> -	 * Will need to do instruction level decode for T32 instructions as
> -	 * they can be variable size (not yet supported).
> -	 */
> -	return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
> +	/* Returns 0 for the CS_ETM_TRACE_ON packet */
> +	if (packet->sample_type == CS_ETM_TRACE_ON)
> +		return 0;
> +
> +	return packet->end_addr - packet->last_instr_size;
>  }
>  
> -static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
> +static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
> +				     const struct cs_etm_packet *packet,
>  				     u64 offset)
>  {
> -	/*
> -	 * Only A64 instructions are currently supported, so can get
> -	 * instruction address by muliplying.
> -	 * Will need to do instruction level decode for T32 instructions as
> -	 * they can be variable size (not yet supported).
> -	 */
> -	return packet->start_addr + offset * A64_INSTR_SIZE;
> +	if (packet->isa == CS_ETM_ISA_T32) {
> +		u64 addr = packet->start_addr;
> +
> +		while (offset > 0) {
> +			addr += cs_etm__t32_instr_size(etmq, addr);
> +			offset--;
> +		}
> +		return addr;
> +	}
> +
> +	/* Assume a 4 byte instruction size (A32/A64) */
> +	return packet->start_addr + offset * 4;
>  }
>  
>  static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
> @@ -888,9 +881,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
>  	struct cs_etm_auxtrace *etm = etmq->etm;
>  	struct cs_etm_packet *tmp;
>  	int ret;
> -	u64 instrs_executed;
> +	u64 instrs_executed = etmq->packet->instr_count;
>  
> -	instrs_executed = cs_etm__instr_count(etmq->packet);
>  	etmq->period_instructions += instrs_executed;
>  
>  	/*
> @@ -920,7 +912,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
>  		 * executed, but PC has not advanced to next instruction)
>  		 */
>  		u64 offset = (instrs_executed - instrs_over - 1);
> -		u64 addr = cs_etm__instr_addr(etmq->packet, offset);
> +		u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
>  
>  		ret = cs_etm__synth_instruction_sample(
>  			etmq, addr, etm->instructions_sample_period);
> -- 
> 2.7.4

-- 

- Arnaldo

_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v5 RESEND] perf: Support for Arm A32/T32 instruction sets in CoreSight trace
@ 2018-12-03 14:53   ` Arnaldo Carvalho de Melo
  0 siblings, 0 replies; 6+ messages in thread
From: Arnaldo Carvalho de Melo @ 2018-12-03 14:53 UTC (permalink / raw)
  To: Robert Walker
  Cc: Mathieu Poirier, Peter Zijlstra, Ingo Molnar, Alexander Shishkin,
	Jiri Olsa, Namhyung Kim, linux-kernel, linux-arm-kernel,
	CoreSight

Em Mon, Dec 03, 2018 at 12:18:46PM +0000, Robert Walker escreveu:
> This patch adds support for generating instruction samples from trace of
> AArch32 programs using the A32 and T32 instruction sets.
> 
> T32 has variable 2 or 4 byte instruction size, so the conversion between
> addresses and instruction counts requires extra information from the trace
> decoder, requiring version 0.10.0 of OpenCSD.  A check for the OpenCSD
> library version has been added to the feature check for OpenCSD.
> 
> Signed-off-by: Robert Walker <robert.walker@arm.com>
> ---
> 
> Hi Arnaldo,
> 
> Please add this patch to the queue for 4.21 (originally sent as https://lkml.org/lkml/2018/11/9/965)
> 
> Mathieu has reviewed it: https://lkml.org/lkml/2018/11/9/1432

Ok, and reading responses to this patch in previous submissions I notice
that it got some Reviewed-by and Tested-by, please collect those tags
when reposting, that of course, if the patch contents are the same.

Thanks, applied.

- Arnaldo
 
> Thanks
> 
> Robert Walker
> 
> 
> Changes since v4:
>  Formatting of comment block
> 
>  tools/build/feature/test-libopencsd.c           |  8 +++
>  tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++
>  tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++
>  tools/perf/util/cs-etm.c                        | 70 +++++++++++--------------
>  4 files changed, 78 insertions(+), 39 deletions(-)
> 
> diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
> index 5ff1246..d68eb4f 100644
> --- a/tools/build/feature/test-libopencsd.c
> +++ b/tools/build/feature/test-libopencsd.c
> @@ -1,6 +1,14 @@
>  // SPDX-License-Identifier: GPL-2.0
>  #include <opencsd/c_api/opencsd_c_api.h>
>  
> +/*
> + * Check OpenCSD library version is sufficient to provide required features
> + */
> +#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
> +#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
> +#error "OpenCSD >= 0.10.0 is required"
> +#endif
> +
>  int main(void)
>  {
>  	(void)ocsd_get_version();
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 938def6..5efb616 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
>  	decoder->tail = 0;
>  	decoder->packet_count = 0;
>  	for (i = 0; i < MAX_BUFFER; i++) {
> +		decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
>  		decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
>  		decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
> +		decoder->packet_buffer[i].instr_count = 0;
>  		decoder->packet_buffer[i].last_instr_taken_branch = false;
> +		decoder->packet_buffer[i].last_instr_size = 0;
>  		decoder->packet_buffer[i].exc = false;
>  		decoder->packet_buffer[i].exc_ret = false;
>  		decoder->packet_buffer[i].cpu = INT_MIN;
> @@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
>  	decoder->packet_count++;
>  
>  	decoder->packet_buffer[et].sample_type = sample_type;
> +	decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
>  	decoder->packet_buffer[et].exc = false;
>  	decoder->packet_buffer[et].exc_ret = false;
>  	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
>  	decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
>  	decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
> +	decoder->packet_buffer[et].instr_count = 0;
> +	decoder->packet_buffer[et].last_instr_taken_branch = false;
> +	decoder->packet_buffer[et].last_instr_size = 0;
>  
>  	if (decoder->packet_count == MAX_BUFFER - 1)
>  		return OCSD_RESP_WAIT;
> @@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
>  
>  	packet = &decoder->packet_buffer[decoder->tail];
>  
> +	switch (elem->isa) {
> +	case ocsd_isa_aarch64:
> +		packet->isa = CS_ETM_ISA_A64;
> +		break;
> +	case ocsd_isa_arm:
> +		packet->isa = CS_ETM_ISA_A32;
> +		break;
> +	case ocsd_isa_thumb2:
> +		packet->isa = CS_ETM_ISA_T32;
> +		break;
> +	case ocsd_isa_tee:
> +	case ocsd_isa_jazelle:
> +	case ocsd_isa_custom:
> +	case ocsd_isa_unknown:
> +	default:
> +		packet->isa = CS_ETM_ISA_UNKNOWN;
> +	}
> +
>  	packet->start_addr = elem->st_addr;
>  	packet->end_addr = elem->en_addr;
> +	packet->instr_count = elem->num_instr_range;
> +
>  	switch (elem->last_i_type) {
>  	case OCSD_INSTR_BR:
>  	case OCSD_INSTR_BR_INDIRECT:
> @@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
>  		break;
>  	}
>  
> +	packet->last_instr_size = elem->last_instr_sz;
> +
>  	return ret;
>  }
>  
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> index 612b575..9351bd1 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> @@ -28,11 +28,21 @@ enum cs_etm_sample_type {
>  	CS_ETM_TRACE_ON = 1 << 1,
>  };
>  
> +enum cs_etm_isa {
> +	CS_ETM_ISA_UNKNOWN,
> +	CS_ETM_ISA_A64,
> +	CS_ETM_ISA_A32,
> +	CS_ETM_ISA_T32,
> +};
> +
>  struct cs_etm_packet {
>  	enum cs_etm_sample_type sample_type;
> +	enum cs_etm_isa isa;
>  	u64 start_addr;
>  	u64 end_addr;
> +	u32 instr_count;
>  	u8 last_instr_taken_branch;
> +	u8 last_instr_size;
>  	u8 exc;
>  	u8 exc_ret;
>  	int cpu;
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 73430b7..48ad217 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -31,14 +31,6 @@
>  
>  #define MAX_TIMESTAMP (~0ULL)
>  
> -/*
> - * A64 instructions are always 4 bytes
> - *
> - * Only A64 is supported, so can use this constant for converting between
> - * addresses and instruction counts, calculting offsets etc
> - */
> -#define A64_INSTR_SIZE 4
> -
>  struct cs_etm_auxtrace {
>  	struct auxtrace auxtrace;
>  	struct auxtrace_queues queues;
> @@ -510,21 +502,17 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
>  	etmq->last_branch_rb->nr = 0;
>  }
>  
> -static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
> -{
> -	/* Returns 0 for the CS_ETM_TRACE_ON packet */
> -	if (packet->sample_type == CS_ETM_TRACE_ON)
> -		return 0;
> +static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
> +					 u64 addr) {
> +	u8 instrBytes[2];
>  
> +	cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
>  	/*
> -	 * The packet records the execution range with an exclusive end address
> -	 *
> -	 * A64 instructions are constant size, so the last executed
> -	 * instruction is A64_INSTR_SIZE before the end address
> -	 * Will need to do instruction level decode for T32 instructions as
> -	 * they can be variable size (not yet supported).
> +	 * T32 instruction size is indicated by bits[15:11] of the first
> +	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
> +	 * denote a 32-bit instruction.
>  	 */
> -	return packet->end_addr - A64_INSTR_SIZE;
> +	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
>  }
>  
>  static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
> @@ -536,27 +524,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
>  	return packet->start_addr;
>  }
>  
> -static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
> +static inline
> +u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
>  {
> -	/*
> -	 * Only A64 instructions are currently supported, so can get
> -	 * instruction count by dividing.
> -	 * Will need to do instruction level decode for T32 instructions as
> -	 * they can be variable size (not yet supported).
> -	 */
> -	return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
> +	/* Returns 0 for the CS_ETM_TRACE_ON packet */
> +	if (packet->sample_type == CS_ETM_TRACE_ON)
> +		return 0;
> +
> +	return packet->end_addr - packet->last_instr_size;
>  }
>  
> -static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
> +static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
> +				     const struct cs_etm_packet *packet,
>  				     u64 offset)
>  {
> -	/*
> -	 * Only A64 instructions are currently supported, so can get
> -	 * instruction address by muliplying.
> -	 * Will need to do instruction level decode for T32 instructions as
> -	 * they can be variable size (not yet supported).
> -	 */
> -	return packet->start_addr + offset * A64_INSTR_SIZE;
> +	if (packet->isa == CS_ETM_ISA_T32) {
> +		u64 addr = packet->start_addr;
> +
> +		while (offset > 0) {
> +			addr += cs_etm__t32_instr_size(etmq, addr);
> +			offset--;
> +		}
> +		return addr;
> +	}
> +
> +	/* Assume a 4 byte instruction size (A32/A64) */
> +	return packet->start_addr + offset * 4;
>  }
>  
>  static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
> @@ -888,9 +881,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
>  	struct cs_etm_auxtrace *etm = etmq->etm;
>  	struct cs_etm_packet *tmp;
>  	int ret;
> -	u64 instrs_executed;
> +	u64 instrs_executed = etmq->packet->instr_count;
>  
> -	instrs_executed = cs_etm__instr_count(etmq->packet);
>  	etmq->period_instructions += instrs_executed;
>  
>  	/*
> @@ -920,7 +912,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
>  		 * executed, but PC has not advanced to next instruction)
>  		 */
>  		u64 offset = (instrs_executed - instrs_over - 1);
> -		u64 addr = cs_etm__instr_addr(etmq->packet, offset);
> +		u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
>  
>  		ret = cs_etm__synth_instruction_sample(
>  			etmq, addr, etm->instructions_sample_period);
> -- 
> 2.7.4

-- 

- Arnaldo

^ permalink raw reply	[flat|nested] 6+ messages in thread

* [tip:perf/core] perf cs-etm: Support for ARM A32/T32 instruction sets in CoreSight trace
  2018-12-03 12:18 ` Robert Walker
  (?)
  (?)
@ 2018-12-14 20:38 ` tip-bot for Robert Walker
  -1 siblings, 0 replies; 6+ messages in thread
From: tip-bot for Robert Walker @ 2018-12-14 20:38 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: mingo, acme, robert.walker, peterz, linux-kernel, leo.yan, jolsa,
	mathieu.poirier, hpa, namhyung, tglx, alexander.shishkin

Commit-ID:  c7d74f860abca8027ec77e0801a95a90a5d9bf90
Gitweb:     https://git.kernel.org/tip/c7d74f860abca8027ec77e0801a95a90a5d9bf90
Author:     Robert Walker <robert.walker@arm.com>
AuthorDate: Mon, 3 Dec 2018 12:18:46 +0000
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 3 Dec 2018 12:02:51 -0300

perf cs-etm: Support for ARM A32/T32 instruction sets in CoreSight trace

This patch adds support for generating instruction samples from trace of
AArch32 programs using the A32 and T32 instruction sets.

T32 has variable 2 or 4 byte instruction size, so the conversion between
addresses and instruction counts requires extra information from the
trace decoder, requiring version 0.10.0 of OpenCSD.  A check for the
OpenCSD library version has been added to the feature check for OpenCSD.

Signed-off-by: Robert Walker <robert.walker@arm.com>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Tested-by: Leo Yan <leo.yan@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1543839526-30348-1-git-send-email-robert.walker@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/feature/test-libopencsd.c           |  8 +++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++
 tools/perf/util/cs-etm.c                        | 70 +++++++++++--------------
 4 files changed, 78 insertions(+), 39 deletions(-)

diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 5ff1246e6194..d68eb4fb40cc 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -1,6 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <opencsd/c_api/opencsd_c_api.h>
 
+/*
+ * Check OpenCSD library version is sufficient to provide required features
+ */
+#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
+#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
+#error "OpenCSD >= 0.10.0 is required"
+#endif
+
 int main(void)
 {
 	(void)ocsd_get_version();
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 938def6d0bb9..5efb616bd609 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
 	decoder->tail = 0;
 	decoder->packet_count = 0;
 	for (i = 0; i < MAX_BUFFER; i++) {
+		decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
 		decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
 		decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+		decoder->packet_buffer[i].instr_count = 0;
 		decoder->packet_buffer[i].last_instr_taken_branch = false;
+		decoder->packet_buffer[i].last_instr_size = 0;
 		decoder->packet_buffer[i].exc = false;
 		decoder->packet_buffer[i].exc_ret = false;
 		decoder->packet_buffer[i].cpu = INT_MIN;
@@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
 	decoder->packet_count++;
 
 	decoder->packet_buffer[et].sample_type = sample_type;
+	decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
 	decoder->packet_buffer[et].exc = false;
 	decoder->packet_buffer[et].exc_ret = false;
 	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
 	decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
 	decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+	decoder->packet_buffer[et].instr_count = 0;
+	decoder->packet_buffer[et].last_instr_taken_branch = false;
+	decoder->packet_buffer[et].last_instr_size = 0;
 
 	if (decoder->packet_count == MAX_BUFFER - 1)
 		return OCSD_RESP_WAIT;
@@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 
 	packet = &decoder->packet_buffer[decoder->tail];
 
+	switch (elem->isa) {
+	case ocsd_isa_aarch64:
+		packet->isa = CS_ETM_ISA_A64;
+		break;
+	case ocsd_isa_arm:
+		packet->isa = CS_ETM_ISA_A32;
+		break;
+	case ocsd_isa_thumb2:
+		packet->isa = CS_ETM_ISA_T32;
+		break;
+	case ocsd_isa_tee:
+	case ocsd_isa_jazelle:
+	case ocsd_isa_custom:
+	case ocsd_isa_unknown:
+	default:
+		packet->isa = CS_ETM_ISA_UNKNOWN;
+	}
+
 	packet->start_addr = elem->st_addr;
 	packet->end_addr = elem->en_addr;
+	packet->instr_count = elem->num_instr_range;
+
 	switch (elem->last_i_type) {
 	case OCSD_INSTR_BR:
 	case OCSD_INSTR_BR_INDIRECT:
@@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 		break;
 	}
 
+	packet->last_instr_size = elem->last_instr_sz;
+
 	return ret;
 }
 
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 612b5755f742..9351bd10d864 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -28,11 +28,21 @@ enum cs_etm_sample_type {
 	CS_ETM_TRACE_ON = 1 << 1,
 };
 
+enum cs_etm_isa {
+	CS_ETM_ISA_UNKNOWN,
+	CS_ETM_ISA_A64,
+	CS_ETM_ISA_A32,
+	CS_ETM_ISA_T32,
+};
+
 struct cs_etm_packet {
 	enum cs_etm_sample_type sample_type;
+	enum cs_etm_isa isa;
 	u64 start_addr;
 	u64 end_addr;
+	u32 instr_count;
 	u8 last_instr_taken_branch;
+	u8 last_instr_size;
 	u8 exc;
 	u8 exc_ret;
 	int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 73430b73570d..48ad217bf0df 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -31,14 +31,6 @@
 
 #define MAX_TIMESTAMP (~0ULL)
 
-/*
- * A64 instructions are always 4 bytes
- *
- * Only A64 is supported, so can use this constant for converting between
- * addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
-
 struct cs_etm_auxtrace {
 	struct auxtrace auxtrace;
 	struct auxtrace_queues queues;
@@ -510,21 +502,17 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
 	etmq->last_branch_rb->nr = 0;
 }
 
-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
-{
-	/* Returns 0 for the CS_ETM_TRACE_ON packet */
-	if (packet->sample_type == CS_ETM_TRACE_ON)
-		return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+					 u64 addr) {
+	u8 instrBytes[2];
 
+	cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
 	/*
-	 * The packet records the execution range with an exclusive end address
-	 *
-	 * A64 instructions are constant size, so the last executed
-	 * instruction is A64_INSTR_SIZE before the end address
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
+	 * T32 instruction size is indicated by bits[15:11] of the first
+	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+	 * denote a 32-bit instruction.
 	 */
-	return packet->end_addr - A64_INSTR_SIZE;
+	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
 }
 
 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
@@ -536,27 +524,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
 	return packet->start_addr;
 }
 
-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
 {
-	/*
-	 * Only A64 instructions are currently supported, so can get
-	 * instruction count by dividing.
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
-	 */
-	return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+	/* Returns 0 for the CS_ETM_TRACE_ON packet */
+	if (packet->sample_type == CS_ETM_TRACE_ON)
+		return 0;
+
+	return packet->end_addr - packet->last_instr_size;
 }
 
-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+				     const struct cs_etm_packet *packet,
 				     u64 offset)
 {
-	/*
-	 * Only A64 instructions are currently supported, so can get
-	 * instruction address by muliplying.
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
-	 */
-	return packet->start_addr + offset * A64_INSTR_SIZE;
+	if (packet->isa == CS_ETM_ISA_T32) {
+		u64 addr = packet->start_addr;
+
+		while (offset > 0) {
+			addr += cs_etm__t32_instr_size(etmq, addr);
+			offset--;
+		}
+		return addr;
+	}
+
+	/* Assume a 4 byte instruction size (A32/A64) */
+	return packet->start_addr + offset * 4;
 }
 
 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -888,9 +881,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 	struct cs_etm_auxtrace *etm = etmq->etm;
 	struct cs_etm_packet *tmp;
 	int ret;
-	u64 instrs_executed;
+	u64 instrs_executed = etmq->packet->instr_count;
 
-	instrs_executed = cs_etm__instr_count(etmq->packet);
 	etmq->period_instructions += instrs_executed;
 
 	/*
@@ -920,7 +912,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 		 * executed, but PC has not advanced to next instruction)
 		 */
 		u64 offset = (instrs_executed - instrs_over - 1);
-		u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+		u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
 
 		ret = cs_etm__synth_instruction_sample(
 			etmq, addr, etm->instructions_sample_period);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [tip:perf/core] perf cs-etm: Support for ARM A32/T32 instruction sets in CoreSight trace
  2018-12-03 12:18 ` Robert Walker
                   ` (2 preceding siblings ...)
  (?)
@ 2018-12-18 14:05 ` tip-bot for Robert Walker
  -1 siblings, 0 replies; 6+ messages in thread
From: tip-bot for Robert Walker @ 2018-12-18 14:05 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: robert.walker, tglx, acme, namhyung, mathieu.poirier, jolsa,
	alexander.shishkin, hpa, mingo, linux-kernel, peterz, leo.yan

Commit-ID:  a7ee4d625ede4f62146ff3bb2aeee074e4cf5fa1
Gitweb:     https://git.kernel.org/tip/a7ee4d625ede4f62146ff3bb2aeee074e4cf5fa1
Author:     Robert Walker <robert.walker@arm.com>
AuthorDate: Mon, 3 Dec 2018 12:18:46 +0000
Committer:  Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 17 Dec 2018 14:56:18 -0300

perf cs-etm: Support for ARM A32/T32 instruction sets in CoreSight trace

This patch adds support for generating instruction samples from trace of
AArch32 programs using the A32 and T32 instruction sets.

T32 has variable 2 or 4 byte instruction size, so the conversion between
addresses and instruction counts requires extra information from the
trace decoder, requiring version 0.10.0 of OpenCSD.  A check for the
OpenCSD library version has been added to the feature check for OpenCSD.

Signed-off-by: Robert Walker <robert.walker@arm.com>
Reviewed-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Tested-by: Leo Yan <leo.yan@linaro.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: coresight@lists.linaro.org
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/1543839526-30348-1-git-send-email-robert.walker@arm.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/build/feature/test-libopencsd.c           |  8 +++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 29 ++++++++++
 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 10 ++++
 tools/perf/util/cs-etm.c                        | 70 +++++++++++--------------
 4 files changed, 78 insertions(+), 39 deletions(-)

diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 5ff1246e6194..d68eb4fb40cc 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -1,6 +1,14 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <opencsd/c_api/opencsd_c_api.h>
 
+/*
+ * Check OpenCSD library version is sufficient to provide required features
+ */
+#define OCSD_MIN_VER ((0 << 16) | (10 << 8) | (0))
+#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
+#error "OpenCSD >= 0.10.0 is required"
+#endif
+
 int main(void)
 {
 	(void)ocsd_get_version();
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 938def6d0bb9..5efb616bd609 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -263,9 +263,12 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder)
 	decoder->tail = 0;
 	decoder->packet_count = 0;
 	for (i = 0; i < MAX_BUFFER; i++) {
+		decoder->packet_buffer[i].isa = CS_ETM_ISA_UNKNOWN;
 		decoder->packet_buffer[i].start_addr = CS_ETM_INVAL_ADDR;
 		decoder->packet_buffer[i].end_addr = CS_ETM_INVAL_ADDR;
+		decoder->packet_buffer[i].instr_count = 0;
 		decoder->packet_buffer[i].last_instr_taken_branch = false;
+		decoder->packet_buffer[i].last_instr_size = 0;
 		decoder->packet_buffer[i].exc = false;
 		decoder->packet_buffer[i].exc_ret = false;
 		decoder->packet_buffer[i].cpu = INT_MIN;
@@ -294,11 +297,15 @@ cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder,
 	decoder->packet_count++;
 
 	decoder->packet_buffer[et].sample_type = sample_type;
+	decoder->packet_buffer[et].isa = CS_ETM_ISA_UNKNOWN;
 	decoder->packet_buffer[et].exc = false;
 	decoder->packet_buffer[et].exc_ret = false;
 	decoder->packet_buffer[et].cpu = *((int *)inode->priv);
 	decoder->packet_buffer[et].start_addr = CS_ETM_INVAL_ADDR;
 	decoder->packet_buffer[et].end_addr = CS_ETM_INVAL_ADDR;
+	decoder->packet_buffer[et].instr_count = 0;
+	decoder->packet_buffer[et].last_instr_taken_branch = false;
+	decoder->packet_buffer[et].last_instr_size = 0;
 
 	if (decoder->packet_count == MAX_BUFFER - 1)
 		return OCSD_RESP_WAIT;
@@ -321,8 +328,28 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 
 	packet = &decoder->packet_buffer[decoder->tail];
 
+	switch (elem->isa) {
+	case ocsd_isa_aarch64:
+		packet->isa = CS_ETM_ISA_A64;
+		break;
+	case ocsd_isa_arm:
+		packet->isa = CS_ETM_ISA_A32;
+		break;
+	case ocsd_isa_thumb2:
+		packet->isa = CS_ETM_ISA_T32;
+		break;
+	case ocsd_isa_tee:
+	case ocsd_isa_jazelle:
+	case ocsd_isa_custom:
+	case ocsd_isa_unknown:
+	default:
+		packet->isa = CS_ETM_ISA_UNKNOWN;
+	}
+
 	packet->start_addr = elem->st_addr;
 	packet->end_addr = elem->en_addr;
+	packet->instr_count = elem->num_instr_range;
+
 	switch (elem->last_i_type) {
 	case OCSD_INSTR_BR:
 	case OCSD_INSTR_BR_INDIRECT:
@@ -336,6 +363,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_decoder *decoder,
 		break;
 	}
 
+	packet->last_instr_size = elem->last_instr_sz;
+
 	return ret;
 }
 
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 612b5755f742..9351bd10d864 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -28,11 +28,21 @@ enum cs_etm_sample_type {
 	CS_ETM_TRACE_ON = 1 << 1,
 };
 
+enum cs_etm_isa {
+	CS_ETM_ISA_UNKNOWN,
+	CS_ETM_ISA_A64,
+	CS_ETM_ISA_A32,
+	CS_ETM_ISA_T32,
+};
+
 struct cs_etm_packet {
 	enum cs_etm_sample_type sample_type;
+	enum cs_etm_isa isa;
 	u64 start_addr;
 	u64 end_addr;
+	u32 instr_count;
 	u8 last_instr_taken_branch;
+	u8 last_instr_size;
 	u8 exc;
 	u8 exc_ret;
 	int cpu;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 73430b73570d..48ad217bf0df 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -31,14 +31,6 @@
 
 #define MAX_TIMESTAMP (~0ULL)
 
-/*
- * A64 instructions are always 4 bytes
- *
- * Only A64 is supported, so can use this constant for converting between
- * addresses and instruction counts, calculting offsets etc
- */
-#define A64_INSTR_SIZE 4
-
 struct cs_etm_auxtrace {
 	struct auxtrace auxtrace;
 	struct auxtrace_queues queues;
@@ -510,21 +502,17 @@ static inline void cs_etm__reset_last_branch_rb(struct cs_etm_queue *etmq)
 	etmq->last_branch_rb->nr = 0;
 }
 
-static inline u64 cs_etm__last_executed_instr(struct cs_etm_packet *packet)
-{
-	/* Returns 0 for the CS_ETM_TRACE_ON packet */
-	if (packet->sample_type == CS_ETM_TRACE_ON)
-		return 0;
+static inline int cs_etm__t32_instr_size(struct cs_etm_queue *etmq,
+					 u64 addr) {
+	u8 instrBytes[2];
 
+	cs_etm__mem_access(etmq, addr, ARRAY_SIZE(instrBytes), instrBytes);
 	/*
-	 * The packet records the execution range with an exclusive end address
-	 *
-	 * A64 instructions are constant size, so the last executed
-	 * instruction is A64_INSTR_SIZE before the end address
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
+	 * T32 instruction size is indicated by bits[15:11] of the first
+	 * 16-bit word of the instruction: 0b11101, 0b11110 and 0b11111
+	 * denote a 32-bit instruction.
 	 */
-	return packet->end_addr - A64_INSTR_SIZE;
+	return ((instrBytes[1] & 0xF8) >= 0xE8) ? 4 : 2;
 }
 
 static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
@@ -536,27 +524,32 @@ static inline u64 cs_etm__first_executed_instr(struct cs_etm_packet *packet)
 	return packet->start_addr;
 }
 
-static inline u64 cs_etm__instr_count(const struct cs_etm_packet *packet)
+static inline
+u64 cs_etm__last_executed_instr(const struct cs_etm_packet *packet)
 {
-	/*
-	 * Only A64 instructions are currently supported, so can get
-	 * instruction count by dividing.
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
-	 */
-	return (packet->end_addr - packet->start_addr) / A64_INSTR_SIZE;
+	/* Returns 0 for the CS_ETM_TRACE_ON packet */
+	if (packet->sample_type == CS_ETM_TRACE_ON)
+		return 0;
+
+	return packet->end_addr - packet->last_instr_size;
 }
 
-static inline u64 cs_etm__instr_addr(const struct cs_etm_packet *packet,
+static inline u64 cs_etm__instr_addr(struct cs_etm_queue *etmq,
+				     const struct cs_etm_packet *packet,
 				     u64 offset)
 {
-	/*
-	 * Only A64 instructions are currently supported, so can get
-	 * instruction address by muliplying.
-	 * Will need to do instruction level decode for T32 instructions as
-	 * they can be variable size (not yet supported).
-	 */
-	return packet->start_addr + offset * A64_INSTR_SIZE;
+	if (packet->isa == CS_ETM_ISA_T32) {
+		u64 addr = packet->start_addr;
+
+		while (offset > 0) {
+			addr += cs_etm__t32_instr_size(etmq, addr);
+			offset--;
+		}
+		return addr;
+	}
+
+	/* Assume a 4 byte instruction size (A32/A64) */
+	return packet->start_addr + offset * 4;
 }
 
 static void cs_etm__update_last_branch_rb(struct cs_etm_queue *etmq)
@@ -888,9 +881,8 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 	struct cs_etm_auxtrace *etm = etmq->etm;
 	struct cs_etm_packet *tmp;
 	int ret;
-	u64 instrs_executed;
+	u64 instrs_executed = etmq->packet->instr_count;
 
-	instrs_executed = cs_etm__instr_count(etmq->packet);
 	etmq->period_instructions += instrs_executed;
 
 	/*
@@ -920,7 +912,7 @@ static int cs_etm__sample(struct cs_etm_queue *etmq)
 		 * executed, but PC has not advanced to next instruction)
 		 */
 		u64 offset = (instrs_executed - instrs_over - 1);
-		u64 addr = cs_etm__instr_addr(etmq->packet, offset);
+		u64 addr = cs_etm__instr_addr(etmq, etmq->packet, offset);
 
 		ret = cs_etm__synth_instruction_sample(
 			etmq, addr, etm->instructions_sample_period);

^ permalink raw reply related	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2018-12-18 14:06 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-12-03 12:18 [PATCH v5 RESEND] perf: Support for Arm A32/T32 instruction sets in CoreSight trace Robert Walker
2018-12-03 12:18 ` Robert Walker
2018-12-03 14:53 ` Arnaldo Carvalho de Melo
2018-12-03 14:53   ` Arnaldo Carvalho de Melo
2018-12-14 20:38 ` [tip:perf/core] perf cs-etm: Support for ARM " tip-bot for Robert Walker
2018-12-18 14:05 ` tip-bot for Robert Walker

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.