* [PATCH 1/4] perf event: Add simd_flags field to perf_sample
2022-12-19 16:12 [PATCH 0/4] Enable display of partial and empty SVE predicates from Arm SPE data James Clark
@ 2022-12-19 16:12 ` James Clark
2022-12-19 18:21 ` Namhyung Kim
2022-12-19 16:12 ` [PATCH 2/4] perf arm-spe: Refactor arm-spe to support operation packet type James Clark
` (2 subsequent siblings)
3 siblings, 1 reply; 7+ messages in thread
From: James Clark @ 2022-12-19 16:12 UTC (permalink / raw)
To: linux-perf-users
Cc: robh, German Gomez, James Clark, Peter Zijlstra, Ingo Molnar,
Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
Jiri Olsa, Namhyung Kim, John Garry, Will Deacon, Mike Leach,
Leo Yan, linux-kernel, linux-arm-kernel
From: German Gomez <german.gomez@arm.com>
Add new field to the struct perf_sample to store flags related to SIMD
ops.
It will be used to store SIMD information from SVE and NEON when
profiling using ARM SPE.
Signed-off-by: German Gomez <german.gomez@arm.com>
Signed-off-by: James Clark <james.clark@arm.com>
---
tools/perf/util/sample.h | 13 +++++++++++++
1 file changed, 13 insertions(+)
diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
index 60ec79d4eea4..bdf52faf165f 100644
--- a/tools/perf/util/sample.h
+++ b/tools/perf/util/sample.h
@@ -66,6 +66,18 @@ struct aux_sample {
void *data;
};
+struct simd_flags {
+ u64 arch:1, /* architecture (isa) */
+ pred:2; /* predication */
+};
+
+/* simd architecture flags */
+#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* ARM SVE */
+
+/* simd predicate flags */
+#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
+#define SIMD_OP_FLAGS_PRED_EMPTY 0x02 /* empty predicate */
+
struct perf_sample {
u64 ip;
u32 pid, tid;
@@ -103,6 +115,7 @@ struct perf_sample {
struct stack_dump user_stack;
struct sample_read read;
struct aux_sample aux_sample;
+ struct simd_flags simd_flags;
};
/*
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* Re: [PATCH 1/4] perf event: Add simd_flags field to perf_sample
2022-12-19 16:12 ` [PATCH 1/4] perf event: Add simd_flags field to perf_sample James Clark
@ 2022-12-19 18:21 ` Namhyung Kim
2022-12-20 10:22 ` James Clark
0 siblings, 1 reply; 7+ messages in thread
From: Namhyung Kim @ 2022-12-19 18:21 UTC (permalink / raw)
To: James Clark
Cc: linux-perf-users, robh, German Gomez, Peter Zijlstra, Ingo Molnar,
Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
Jiri Olsa, John Garry, Will Deacon, Mike Leach, Leo Yan,
linux-kernel, linux-arm-kernel
Hi James,
On Mon, Dec 19, 2022 at 8:13 AM James Clark <james.clark@arm.com> wrote:
>
> From: German Gomez <german.gomez@arm.com>
>
> Add new field to the struct perf_sample to store flags related to SIMD
> ops.
>
> It will be used to store SIMD information from SVE and NEON when
> profiling using ARM SPE.
>
> Signed-off-by: German Gomez <german.gomez@arm.com>
> Signed-off-by: James Clark <james.clark@arm.com>
> ---
> tools/perf/util/sample.h | 13 +++++++++++++
> 1 file changed, 13 insertions(+)
>
> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
> index 60ec79d4eea4..bdf52faf165f 100644
> --- a/tools/perf/util/sample.h
> +++ b/tools/perf/util/sample.h
> @@ -66,6 +66,18 @@ struct aux_sample {
> void *data;
> };
>
> +struct simd_flags {
> + u64 arch:1, /* architecture (isa) */
> + pred:2; /* predication */
Can we reserve more bits for possible future extension or
other arch support? It seems to be too tight for each field.
Do you plan to add more info to the struct in the future?
Thanks,
Namhyung
> +};
> +
> +/* simd architecture flags */
> +#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* ARM SVE */
> +
> +/* simd predicate flags */
> +#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
> +#define SIMD_OP_FLAGS_PRED_EMPTY 0x02 /* empty predicate */
> +
> struct perf_sample {
> u64 ip;
> u32 pid, tid;
> @@ -103,6 +115,7 @@ struct perf_sample {
> struct stack_dump user_stack;
> struct sample_read read;
> struct aux_sample aux_sample;
> + struct simd_flags simd_flags;
> };
>
> /*
> --
> 2.25.1
>
^ permalink raw reply [flat|nested] 7+ messages in thread
* Re: [PATCH 1/4] perf event: Add simd_flags field to perf_sample
2022-12-19 18:21 ` Namhyung Kim
@ 2022-12-20 10:22 ` James Clark
0 siblings, 0 replies; 7+ messages in thread
From: James Clark @ 2022-12-20 10:22 UTC (permalink / raw)
To: Namhyung Kim
Cc: linux-perf-users, robh, German Gomez, Peter Zijlstra, Ingo Molnar,
Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
Jiri Olsa, John Garry, Will Deacon, Mike Leach, Leo Yan,
linux-kernel, linux-arm-kernel
On 19/12/2022 18:21, Namhyung Kim wrote:
> Hi James,
>
> On Mon, Dec 19, 2022 at 8:13 AM James Clark <james.clark@arm.com> wrote:
>>
>> From: German Gomez <german.gomez@arm.com>
>>
>> Add new field to the struct perf_sample to store flags related to SIMD
>> ops.
>>
>> It will be used to store SIMD information from SVE and NEON when
>> profiling using ARM SPE.
>>
>> Signed-off-by: German Gomez <german.gomez@arm.com>
>> Signed-off-by: James Clark <james.clark@arm.com>
>> ---
>> tools/perf/util/sample.h | 13 +++++++++++++
>> 1 file changed, 13 insertions(+)
>>
>> diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h
>> index 60ec79d4eea4..bdf52faf165f 100644
>> --- a/tools/perf/util/sample.h
>> +++ b/tools/perf/util/sample.h
>> @@ -66,6 +66,18 @@ struct aux_sample {
>> void *data;
>> };
>>
>> +struct simd_flags {
>> + u64 arch:1, /* architecture (isa) */
>> + pred:2; /* predication */
>
> Can we reserve more bits for possible future extension or
> other arch support? It seems to be too tight for each field.
> Do you plan to add more info to the struct in the future?
As far as I can see because this is userspace only, reserving bits
doesn't be done ahead of time. When we need more bits we can just add
it. It never gets written to a file either so there is no need for
backwards compatibility.
>
> Thanks,
> Namhyung
>
>
>> +};
>> +
>> +/* simd architecture flags */
>> +#define SIMD_OP_FLAGS_ARCH_SVE 0x01 /* ARM SVE */
>> +
>> +/* simd predicate flags */
>> +#define SIMD_OP_FLAGS_PRED_PARTIAL 0x01 /* partial predicate */
>> +#define SIMD_OP_FLAGS_PRED_EMPTY 0x02 /* empty predicate */
>> +
>> struct perf_sample {
>> u64 ip;
>> u32 pid, tid;
>> @@ -103,6 +115,7 @@ struct perf_sample {
>> struct stack_dump user_stack;
>> struct sample_read read;
>> struct aux_sample aux_sample;
>> + struct simd_flags simd_flags;
>> };
>>
>> /*
>> --
>> 2.25.1
>>
^ permalink raw reply [flat|nested] 7+ messages in thread
* [PATCH 2/4] perf arm-spe: Refactor arm-spe to support operation packet type
2022-12-19 16:12 [PATCH 0/4] Enable display of partial and empty SVE predicates from Arm SPE data James Clark
2022-12-19 16:12 ` [PATCH 1/4] perf event: Add simd_flags field to perf_sample James Clark
@ 2022-12-19 16:12 ` James Clark
2022-12-19 16:12 ` [PATCH 3/4] perf arm-spe: Add SVE flags to the SPE samples James Clark
2022-12-19 16:12 ` [PATCH 4/4] perf report: Add 'simd' sort field James Clark
3 siblings, 0 replies; 7+ messages in thread
From: James Clark @ 2022-12-19 16:12 UTC (permalink / raw)
To: linux-perf-users
Cc: robh, German Gomez, Leo Yan, James Clark, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Mark Rutland,
Alexander Shishkin, Jiri Olsa, Namhyung Kim, John Garry,
Will Deacon, Mike Leach, linux-kernel, linux-arm-kernel
From: German Gomez <german.gomez@arm.com>
Extend the decoder of Arm SPE records to support more fields from the
operation packet type.
Not all fields are being decoded by this commit. Only those needed to
support the use-case SVE load/store/other operations.
Suggested-by: Leo Yan <leo.yan@linaro.org>
Signed-off-by: German Gomez <german.gomez@arm.com>
Signed-off-by: James Clark <james.clark@arm.com>
---
.../util/arm-spe-decoder/arm-spe-decoder.c | 30 ++++++++++--
.../util/arm-spe-decoder/arm-spe-decoder.h | 47 +++++++++++++++----
tools/perf/util/arm-spe.c | 8 ++--
3 files changed, 67 insertions(+), 18 deletions(-)
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
index 091987dd3966..709d3f6b58c6 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.c
@@ -186,11 +186,27 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
decoder->record.context_id = payload;
break;
case ARM_SPE_OP_TYPE:
- if (idx == SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC) {
- if (payload & 0x1)
- decoder->record.op = ARM_SPE_ST;
+ switch (idx) {
+ case SPE_OP_PKT_HDR_CLASS_LD_ST_ATOMIC:
+ decoder->record.op |= ARM_SPE_OP_LDST;
+ if (payload & SPE_OP_PKT_ST)
+ decoder->record.op |= ARM_SPE_OP_ST;
else
- decoder->record.op = ARM_SPE_LD;
+ decoder->record.op |= ARM_SPE_OP_LD;
+ if (SPE_OP_PKT_IS_LDST_SVE(payload))
+ decoder->record.op |= ARM_SPE_OP_SVE_LDST;
+ break;
+ case SPE_OP_PKT_HDR_CLASS_OTHER:
+ decoder->record.op |= ARM_SPE_OP_OTHER;
+ if (SPE_OP_PKT_IS_OTHER_SVE_OP(payload))
+ decoder->record.op |= ARM_SPE_OP_SVE_OTHER;
+ break;
+ case SPE_OP_PKT_HDR_CLASS_BR_ERET:
+ decoder->record.op |= ARM_SPE_OP_BRANCH_ERET;
+ break;
+ default:
+ pr_err("Get packet error!\n");
+ return -1;
}
break;
case ARM_SPE_EVENTS:
@@ -218,6 +234,12 @@ static int arm_spe_read_record(struct arm_spe_decoder *decoder)
if (payload & BIT(EV_MISPRED))
decoder->record.type |= ARM_SPE_BRANCH_MISS;
+ if (payload & BIT(EV_PARTIAL_PREDICATE))
+ decoder->record.type |= ARM_SPE_SVE_PARTIAL_PRED;
+
+ if (payload & BIT(EV_EMPTY_PREDICATE))
+ decoder->record.type |= ARM_SPE_SVE_EMPTY_PRED;
+
break;
case ARM_SPE_DATA_SOURCE:
decoder->record.source = payload;
diff --git a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
index 46a61df1145b..1443c28545a9 100644
--- a/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
+++ b/tools/perf/util/arm-spe-decoder/arm-spe-decoder.h
@@ -14,19 +14,46 @@
#include "arm-spe-pkt-decoder.h"
enum arm_spe_sample_type {
- ARM_SPE_L1D_ACCESS = 1 << 0,
- ARM_SPE_L1D_MISS = 1 << 1,
- ARM_SPE_LLC_ACCESS = 1 << 2,
- ARM_SPE_LLC_MISS = 1 << 3,
- ARM_SPE_TLB_ACCESS = 1 << 4,
- ARM_SPE_TLB_MISS = 1 << 5,
- ARM_SPE_BRANCH_MISS = 1 << 6,
- ARM_SPE_REMOTE_ACCESS = 1 << 7,
+ ARM_SPE_L1D_ACCESS = 1 << 0,
+ ARM_SPE_L1D_MISS = 1 << 1,
+ ARM_SPE_LLC_ACCESS = 1 << 2,
+ ARM_SPE_LLC_MISS = 1 << 3,
+ ARM_SPE_TLB_ACCESS = 1 << 4,
+ ARM_SPE_TLB_MISS = 1 << 5,
+ ARM_SPE_BRANCH_MISS = 1 << 6,
+ ARM_SPE_REMOTE_ACCESS = 1 << 7,
+ ARM_SPE_SVE_PARTIAL_PRED = 1 << 8,
+ ARM_SPE_SVE_EMPTY_PRED = 1 << 9,
};
enum arm_spe_op_type {
- ARM_SPE_LD = 1 << 0,
- ARM_SPE_ST = 1 << 1,
+ /* First level operation type */
+ ARM_SPE_OP_OTHER = 1 << 0,
+ ARM_SPE_OP_LDST = 1 << 1,
+ ARM_SPE_OP_BRANCH_ERET = 1 << 2,
+
+ /* Second level operation type for OTHER */
+ ARM_SPE_OP_SVE_OTHER = 1 << 16,
+ ARM_SPE_OP_SVE_FP = 1 << 17,
+ ARM_SPE_OP_SVE_PRED_OTHER = 1 << 18,
+
+ /* Second level operation type for LDST */
+ ARM_SPE_OP_LD = 1 << 16,
+ ARM_SPE_OP_ST = 1 << 17,
+ ARM_SPE_OP_ATOMIC = 1 << 18,
+ ARM_SPE_OP_EXCL = 1 << 19,
+ ARM_SPE_OP_AR = 1 << 20,
+ ARM_SPE_OP_SIMD_FP = 1 << 21,
+ ARM_SPE_OP_GP_REG = 1 << 22,
+ ARM_SPE_OP_UNSPEC_REG = 1 << 23,
+ ARM_SPE_OP_NV_SYSREG = 1 << 24,
+ ARM_SPE_OP_SVE_LDST = 1 << 25,
+ ARM_SPE_OP_SVE_PRED_LDST = 1 << 26,
+ ARM_SPE_OP_SVE_SG = 1 << 27,
+
+ /* Second level operation type for BRANCH_ERET */
+ ARM_SPE_OP_BR_COND = 1 << 16,
+ ARM_SPE_OP_BR_INDIRECT = 1 << 17,
};
enum arm_spe_neoverse_data_source {
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index 906476a839e1..bfae4731a47a 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -411,7 +411,7 @@ static void arm_spe__synth_data_source_neoverse(const struct arm_spe_record *rec
* We have no data on the hit level or data source for stores in the
* Neoverse SPE records.
*/
- if (record->op & ARM_SPE_ST) {
+ if (record->op & ARM_SPE_OP_ST) {
data_src->mem_lvl = PERF_MEM_LVL_NA;
data_src->mem_lvl_num = PERF_MEM_LVLNUM_NA;
data_src->mem_snoop = PERF_MEM_SNOOP_NA;
@@ -497,12 +497,12 @@ static void arm_spe__synth_data_source_generic(const struct arm_spe_record *reco
static u64 arm_spe__synth_data_source(const struct arm_spe_record *record, u64 midr)
{
- union perf_mem_data_src data_src = { 0 };
+ union perf_mem_data_src data_src = { .mem_op = PERF_MEM_OP_NA };
bool is_neoverse = is_midr_in_range_list(midr, neoverse_spe);
- if (record->op == ARM_SPE_LD)
+ if (record->op & ARM_SPE_OP_LD)
data_src.mem_op = PERF_MEM_OP_LOAD;
- else if (record->op == ARM_SPE_ST)
+ else if (record->op & ARM_SPE_OP_ST)
data_src.mem_op = PERF_MEM_OP_STORE;
else
return 0;
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 3/4] perf arm-spe: Add SVE flags to the SPE samples
2022-12-19 16:12 [PATCH 0/4] Enable display of partial and empty SVE predicates from Arm SPE data James Clark
2022-12-19 16:12 ` [PATCH 1/4] perf event: Add simd_flags field to perf_sample James Clark
2022-12-19 16:12 ` [PATCH 2/4] perf arm-spe: Refactor arm-spe to support operation packet type James Clark
@ 2022-12-19 16:12 ` James Clark
2022-12-19 16:12 ` [PATCH 4/4] perf report: Add 'simd' sort field James Clark
3 siblings, 0 replies; 7+ messages in thread
From: James Clark @ 2022-12-19 16:12 UTC (permalink / raw)
To: linux-perf-users
Cc: robh, German Gomez, James Clark, Peter Zijlstra, Ingo Molnar,
Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
Jiri Olsa, Namhyung Kim, John Garry, Will Deacon, Mike Leach,
Leo Yan, linux-kernel, linux-arm-kernel
From: German Gomez <german.gomez@arm.com>
Add flags from the Scalable Vector Extension (SVE) to the SPE samples
which are available from Armv8.3 (FEAT_SPEv1p1).
These will be displayed in a new SIMD sort field in a later commit.
Signed-off-by: German Gomez <german.gomez@arm.com>
Signed-off-by: James Clark <james.clark@arm.com>
---
tools/perf/util/arm-spe.c | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
index bfae4731a47a..7b36ba6b4079 100644
--- a/tools/perf/util/arm-spe.c
+++ b/tools/perf/util/arm-spe.c
@@ -273,6 +273,25 @@ static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
return 0;
}
+static struct simd_flags arm_spe__synth_simd_flags(const struct arm_spe_record *record)
+{
+ struct simd_flags simd_flags = {};
+
+ if ((record->op & ARM_SPE_OP_LDST) && (record->op & ARM_SPE_OP_SVE_LDST))
+ simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
+
+ if ((record->op & ARM_SPE_OP_OTHER) && (record->op & ARM_SPE_OP_SVE_OTHER))
+ simd_flags.arch |= SIMD_OP_FLAGS_ARCH_SVE;
+
+ if (record->type & ARM_SPE_SVE_PARTIAL_PRED)
+ simd_flags.pred |= SIMD_OP_FLAGS_PRED_PARTIAL;
+
+ if (record->type & ARM_SPE_SVE_EMPTY_PRED)
+ simd_flags.pred |= SIMD_OP_FLAGS_PRED_EMPTY;
+
+ return simd_flags;
+}
+
static void arm_spe_prep_sample(struct arm_spe *spe,
struct arm_spe_queue *speq,
union perf_event *event,
@@ -289,6 +308,7 @@ static void arm_spe_prep_sample(struct arm_spe *spe,
sample->tid = speq->tid;
sample->period = 1;
sample->cpu = speq->cpu;
+ sample->simd_flags = arm_spe__synth_simd_flags(record);
event->sample.header.type = PERF_RECORD_SAMPLE;
event->sample.header.misc = sample->cpumode;
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread
* [PATCH 4/4] perf report: Add 'simd' sort field
2022-12-19 16:12 [PATCH 0/4] Enable display of partial and empty SVE predicates from Arm SPE data James Clark
` (2 preceding siblings ...)
2022-12-19 16:12 ` [PATCH 3/4] perf arm-spe: Add SVE flags to the SPE samples James Clark
@ 2022-12-19 16:12 ` James Clark
3 siblings, 0 replies; 7+ messages in thread
From: James Clark @ 2022-12-19 16:12 UTC (permalink / raw)
To: linux-perf-users
Cc: robh, German Gomez, James Clark, Peter Zijlstra, Ingo Molnar,
Arnaldo Carvalho de Melo, Mark Rutland, Alexander Shishkin,
Jiri Olsa, Namhyung Kim, John Garry, Will Deacon, Mike Leach,
Leo Yan, linux-kernel, linux-arm-kernel
From: German Gomez <german.gomez@arm.com>
Add 'simd' sort field to visualize SIMD ops in perf-report.
Rows are labeled with the SIMD isa, and the type of predicate (if any):
- [p] partial predicate
- [e] empty predicate (no elements in the vector being used)
Example with Arm SPE and SVE (Scalable Vector Extension):
#include <arm_sve.h>
double src[1025], dst[1025];
int main(void) {
svfloat64_t vc = svdup_f64(1);
for(;;)
for(int i = 0; i < 1025; i += svcntd())
{
svbool_t pg = svwhilelt_b64(i, 1025);
svfloat64_t vsrc = svld1(pg, &src[i]);
svfloat64_t vdst = svadd_x(pg, vsrc, vc);
svst1(pg, &dst[i], vdst);
}
return 0;
}
... compiled using "gcc-11 -march=armv8-a+sve -O3"
Profiling on a platform that implements FEAT_SVE and FEAT_SPEv1p1:
$ perf record -e arm_spe_0// -- ./a.out
$ perf report --itrace=i1i -s overhead,pid,simd,sym
Overhead Pid:Command Simd Symbol
........ ................ ....... ......................
53.76% 10758:program [.] main
46.14% 10758:program [.] SVE [.] main
0.09% 10758:program [p] SVE [.] main
The report shows 0.09% of the sampled SVE operations use partial
predicates due to src and dst arrays not being multiples of the vector
register lengths.
Signed-off-by: German Gomez <german.gomez@arm.com>
Signed-off-by: James Clark <james.clark@arm.com>
---
tools/perf/Documentation/perf-report.txt | 1 +
tools/perf/util/hist.c | 1 +
tools/perf/util/hist.h | 1 +
tools/perf/util/sort.c | 47 ++++++++++++++++++++++++
tools/perf/util/sort.h | 2 +
5 files changed, 52 insertions(+)
diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt
index 4fa509b15948..ff524d83a4a7 100644
--- a/tools/perf/Documentation/perf-report.txt
+++ b/tools/perf/Documentation/perf-report.txt
@@ -115,6 +115,7 @@ OPTIONS
- p_stage_cyc: On powerpc, this presents the number of cycles spent in a
pipeline stage. And currently supported only on powerpc.
- addr: (Full) virtual address of the sampled instruction
+ - simd: Flags describing a SIMD operation. "e" for empty Arm SVE predicate. "p" for partial Arm SVE predicate
By default, comm, dso and symbol keys are used.
(i.e. --sort comm,dso,symbol)
diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c
index 17a05e943b44..e2390114c495 100644
--- a/tools/perf/util/hist.c
+++ b/tools/perf/util/hist.c
@@ -742,6 +742,7 @@ __hists__add_entry(struct hists *hists,
.weight = sample->weight,
.ins_lat = sample->ins_lat,
.p_stage_cyc = sample->p_stage_cyc,
+ .simd_flags = sample->simd_flags,
}, *he = hists__findnew_entry(hists, &entry, al, sample_self);
if (!hists->has_callchains && he && he->callchain_size != 0)
diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h
index ebd8a8f783ee..e6ecb4453053 100644
--- a/tools/perf/util/hist.h
+++ b/tools/perf/util/hist.h
@@ -80,6 +80,7 @@ enum hist_column {
HISTC_ADDR_FROM,
HISTC_ADDR_TO,
HISTC_ADDR,
+ HISTC_SIMD,
HISTC_NR_COLS, /* Last entry */
};
diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c
index 0ecc2cb13792..5c8bfea2ce34 100644
--- a/tools/perf/util/sort.c
+++ b/tools/perf/util/sort.c
@@ -131,6 +131,52 @@ struct sort_entry sort_thread = {
.se_width_idx = HISTC_THREAD,
};
+/* --sort simd */
+
+static int64_t
+sort__simd_cmp(struct hist_entry *left, struct hist_entry *right)
+{
+ if (left->simd_flags.arch != right->simd_flags.arch)
+ return (int64_t) left->simd_flags.arch - right->simd_flags.arch;
+
+ return (int64_t) left->simd_flags.pred - right->simd_flags.pred;
+}
+
+static const char *hist_entry__get_simd_name(struct simd_flags *simd_flags)
+{
+ u64 arch = simd_flags->arch;
+
+ if (arch & SIMD_OP_FLAGS_ARCH_SVE)
+ return "SVE";
+ else
+ return "n/a";
+}
+
+static int hist_entry__simd_snprintf(struct hist_entry *he, char *bf,
+ size_t size, unsigned int width __maybe_unused)
+{
+ const char *name;
+
+ if (!he->simd_flags.arch)
+ return repsep_snprintf(bf, size, "");
+
+ name = hist_entry__get_simd_name(&he->simd_flags);
+
+ if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_EMPTY)
+ return repsep_snprintf(bf, size, "[e] %s", name);
+ else if (he->simd_flags.pred & SIMD_OP_FLAGS_PRED_PARTIAL)
+ return repsep_snprintf(bf, size, "[p] %s", name);
+
+ return repsep_snprintf(bf, size, "[.] %s", name);
+}
+
+struct sort_entry sort_simd = {
+ .se_header = "Simd ",
+ .se_cmp = sort__simd_cmp,
+ .se_snprintf = hist_entry__simd_snprintf,
+ .se_width_idx = HISTC_SIMD,
+};
+
/* --sort comm */
/*
@@ -2042,6 +2088,7 @@ static struct sort_dimension common_sort_dimensions[] = {
DIM(SORT_LOCAL_PIPELINE_STAGE_CYC, "local_p_stage_cyc", sort_local_p_stage_cyc),
DIM(SORT_GLOBAL_PIPELINE_STAGE_CYC, "p_stage_cyc", sort_global_p_stage_cyc),
DIM(SORT_ADDR, "addr", sort_addr),
+ DIM(SORT_SIMD, "simd", sort_simd)
};
#undef DIM
diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h
index 04ff8b61a2a7..8e69a2a53dc1 100644
--- a/tools/perf/util/sort.h
+++ b/tools/perf/util/sort.h
@@ -110,6 +110,7 @@ struct hist_entry {
u64 p_stage_cyc;
u8 cpumode;
u8 depth;
+ struct simd_flags simd_flags;
/* We are added by hists__add_dummy_entry. */
bool dummy;
@@ -237,6 +238,7 @@ enum sort_type {
SORT_LOCAL_PIPELINE_STAGE_CYC,
SORT_GLOBAL_PIPELINE_STAGE_CYC,
SORT_ADDR,
+ SORT_SIMD,
/* branch stack specific sort keys */
__SORT_BRANCH_STACK,
--
2.25.1
^ permalink raw reply related [flat|nested] 7+ messages in thread