From: James Clark <james.clark@arm.com>
To: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
Cc: mathieu.poirier@linaro.org, acme@kernel.org,
darren@os.amperecomputing.com, scott@os.amperecomputing.com,
scclevenger@os.amperecomputing.com, linux-kernel@vger.kernel.org,
coresight@lists.linaro.org, linux-arm-kernel@lists.infradead.org,
mike.leach@linaro.org
Subject: Re: [PATCH] perf cs-etm: Add support for coresight trace for any range of CPUs
Date: Thu, 20 Apr 2023 11:14:47 +0100 [thread overview]
Message-ID: <c64df0c7-1117-19ff-e94a-41b07a78c496@arm.com> (raw)
In-Reply-To: <d758c5e2-aa32-d829-35ee-a685bdb56f75@arm.com>
On 20/04/2023 10:43, James Clark wrote:
>
>
> On 19/04/2023 18:21, Ganapatrao Kulkarni wrote:
>> The current implementation supports coresight trace for a range of
>> CPUs, if the first CPU is CPU0.
>>
>> Adding changes to enable coresight trace for any range of CPUs by
>> decoding the first CPU also from the header.
>> Later, first CPU id is used instead of CPU0 across the decoder functions.
>>
>> Signed-off-by: Ganapatrao Kulkarni <gankulkarni@os.amperecomputing.com>
>> ---
>> .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 +-
>> .../perf/util/cs-etm-decoder/cs-etm-decoder.h | 3 +-
>> tools/perf/util/cs-etm.c | 62 ++++++++++++-------
>> 3 files changed, 42 insertions(+), 27 deletions(-)
>>
>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> index 82a27ab90c8b..41ab299b643b 100644
>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> @@ -724,7 +724,7 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
>> }
>>
>> struct cs_etm_decoder *
>> -cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
>> +cs_etm_decoder__new(int first_decoder, int decoders, struct cs_etm_decoder_params *d_params,
>> struct cs_etm_trace_params t_params[])
>> {
>> struct cs_etm_decoder *decoder;
>> @@ -769,7 +769,7 @@ cs_etm_decoder__new(int decoders, struct cs_etm_decoder_params *d_params,
>> /* init raw frame logging if required */
>> cs_etm_decoder__init_raw_frame_logging(d_params, decoder);
>>
>> - for (i = 0; i < decoders; i++) {
>> + for (i = first_decoder; i < decoders; i++) {
>> ret = cs_etm_decoder__create_etm_decoder(d_params,
>> &t_params[i],
>> decoder);
>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>> index 92a855fbe5b8..b06193fc75b4 100644
>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>> @@ -90,7 +90,8 @@ int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder,
>> size_t len, size_t *consumed);
>>
>> struct cs_etm_decoder *
>> -cs_etm_decoder__new(int num_cpu,
>> +cs_etm_decoder__new(int first_decoder,
>> + int decoders,
>> struct cs_etm_decoder_params *d_params,
>> struct cs_etm_trace_params t_params[]);
>>
>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>> index 94e2d02009eb..2619513ae088 100644
>> --- a/tools/perf/util/cs-etm.c
>> +++ b/tools/perf/util/cs-etm.c
>> @@ -55,6 +55,8 @@ struct cs_etm_auxtrace {
>> u8 has_virtual_ts; /* Virtual/Kernel timestamps in the trace. */
>>
>> int num_cpu;
>> + int first_cpu;
>> + int last_cpu;
>> u64 latest_kernel_timestamp;
>> u32 auxtrace_type;
>> u64 branches_sample_type;
>> @@ -638,14 +640,13 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
>> }
>>
>> static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
>> - struct cs_etm_auxtrace *etm,
>> - int decoders)
>> + struct cs_etm_auxtrace *etm)
>> {
>> int i;
>> u32 etmidr;
>> u64 architecture;
>>
>> - for (i = 0; i < decoders; i++) {
>> + for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>> architecture = etm->metadata[i][CS_ETM_MAGIC];
>>
>> switch (architecture) {
>> @@ -817,7 +818,7 @@ static void cs_etm__free(struct perf_session *session)
>> /* Then the RB tree itself */
>> intlist__delete(traceid_list);
>>
>> - for (i = 0; i < aux->num_cpu; i++)
>> + for (i = aux->first_cpu; i < aux->last_cpu; i++)
>> zfree(&aux->metadata[i]);
>>
>> thread__zput(aux->unknown_thread);
>> @@ -921,7 +922,8 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>> * Each queue can only contain data from one CPU when unformatted, so only one decoder is
>> * needed.
>> */
>> - int decoders = formatted ? etm->num_cpu : 1;
>> + int first_decoder = formatted ? etm->first_cpu : 0;
>> + int decoders = first_decoder + (formatted ? etm->num_cpu : 1);
>>
>> etmq = zalloc(sizeof(*etmq));
>> if (!etmq)
>> @@ -937,7 +939,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>> if (!t_params)
>> goto out_free;
>>
>> - if (cs_etm__init_trace_params(t_params, etm, decoders))
>> + if (cs_etm__init_trace_params(t_params, etm))
>> goto out_free;
>>
>> /* Set decoder parameters to decode trace packets */
>> @@ -947,8 +949,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
>> formatted))
>> goto out_free;
>>
>> - etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
>> - t_params);
>> + etmq->decoder = cs_etm_decoder__new(first_decoder, decoders, &d_params, t_params);
>>
>> if (!etmq->decoder)
>> goto out_free;
>> @@ -2959,11 +2960,11 @@ static int cs_etm__queue_aux_records(struct perf_session *session)
>> * Loop through the ETMs and complain if we find at least one where ts_source != 1 (virtual
>> * timestamps).
>> */
>> -static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
>> +static bool cs_etm__has_virtual_ts(u64 **metadata, struct cs_etm_auxtrace *etm)
>> {
>> int j;
>>
>> - for (j = 0; j < num_cpu; j++) {
>> + for (j = etm->first_cpu; j < etm->last_cpu; j++) {
>> switch (metadata[j][CS_ETM_MAGIC]) {
>> case __perf_cs_etmv4_magic:
>> if (HAS_PARAM(j, ETMV4, TS_SOURCE) || metadata[j][CS_ETMV4_TS_SOURCE] != 1)
>> @@ -2982,13 +2983,14 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
>> }
>>
>> /* map trace ids to correct metadata block, from information in metadata */
>> -static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
>> +static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm)
>> {
>> u64 cs_etm_magic;
>> + u64 **metadata = etm->metadata;
>> u8 trace_chan_id;
>> int i, err;
>>
>> - for (i = 0; i < num_cpu; i++) {
>> + for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>> cs_etm_magic = metadata[i][CS_ETM_MAGIC];
>> switch (cs_etm_magic) {
>> case __perf_cs_etmv3_magic:
>> @@ -3015,12 +3017,13 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
>> * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
>> * unused value to reduce the number of unneeded decoders created.
>> */
>> -static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
>> +static int cs_etm__clear_unused_trace_ids_metadata(struct cs_etm_auxtrace *etm)
>> {
>> u64 cs_etm_magic;
>> + u64 **metadata = etm->metadata;
>> int i;
>>
>> - for (i = 0; i < num_cpu; i++) {
>> + for (i = etm->first_cpu; i < etm->last_cpu; i++) {
>> cs_etm_magic = metadata[i][CS_ETM_MAGIC];
>> switch (cs_etm_magic) {
>> case __perf_cs_etmv3_magic:
>> @@ -3049,7 +3052,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
>> int event_header_size = sizeof(struct perf_event_header);
>> int total_size = auxtrace_info->header.size;
>> int priv_size = 0;
>> - int num_cpu;
>> + int num_cpu, first_cpu = 0, last_cpu;
>> int err = 0;
>> int aux_hw_id_found;
>> int i, j;
>> @@ -3068,22 +3071,31 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
>> /* First the global part */
>> ptr = (u64 *) auxtrace_info->priv;
>> num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
>> - metadata = zalloc(sizeof(*metadata) * num_cpu);
>> +
>> + /* Start parsing after the common part of the header */
>> + i = CS_HEADER_VERSION_MAX;
>> +
>> + /*Get CPU id of first event */
>> + first_cpu = ptr[i + CS_ETM_CPU];
>> + last_cpu = first_cpu + num_cpu;
>> +
>> + if (first_cpu > cpu__max_cpu().cpu ||
>> + last_cpu > cpu__max_cpu().cpu)
>> + return -EINVAL;
>> +
>> + metadata = zalloc(sizeof(*metadata) * last_cpu);
>
> Hi Ganapatrao,
>
> I think I see what the problem is, but I'm wondering if a better fix
> would be to further decouple the CPU ID from the index in the array.
>
> With your change it's not clear what happens with sparse recordings, for
> example 'perf record -e cs_etm// -C 1,3,5'. And it seems like there is
> some wastage in the zalloc here for example if only CPU 256 is traced
> then we'd still make 256 decoders but 255 of them would be unused?
>
> I tried to test sparse recordings, but your change doesn't apply to the
> latest coresight/next branch. I did notice that 'perf report -D' doesn't
> work with them on coresight/next (it just quits), but I couldn't see if
> that's fixed with your change.
>
> Would a better fix not be to keep the metadata loops from 0-N and
> instead save the CPU ID in cs_etm_decoder_params or the decoder. That
> way it would support both sparse and not starting from 0 cases? I think
> the code would be better if it's worded like "i < recorded_cpus" rather
> than "i < cpu" to make it clear that i isn't actually the CPU ID it's
> just an index.
>
> Also a new test for this scenario would probably be a good idea.
>
> Thanks
> James
>
BTW for the test, I'm currently working on something where I've added
something like this to test_arm_coresight.sh:
arm_cs_etm_basic_test() {
echo "Recording trace with $@"
perf record -o ${perfdata} "$@" -- ls > /dev/null 2>&1
perf_script_branch_samples ls &&
perf_report_branch_samples ls &&
perf_report_instruction_samples ls
err=$?
arm_cs_report "CoreSight basic testing with %@" $err
}
# Test all combinations of per-thread, system-wide and normal mode with
# and without timestamps
arm_cs_etm_basic_test -e cs_etm/timestamp=0/ --per-thread
arm_cs_etm_basic_test -e cs_etm/timestamp=1/ --per-thread
arm_cs_etm_basic_test -e cs_etm/timestamp=0/ -a
arm_cs_etm_basic_test -e cs_etm/timestamp=1/ -a
arm_cs_etm_basic_test -e cs_etm/timestamp=0/
arm_cs_etm_basic_test -e cs_etm/timestamp=1/
I think you should be able to add this to cover your cases as well:
# Test non-zero indexed and sparse CPU lists
arm_cs_etm_basic_test -e cs_etm// -C 1,2
arm_cs_etm_basic_test -e cs_etm// -C 0,2
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
next prev parent reply other threads:[~2023-04-20 10:15 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2023-04-19 17:21 [PATCH] perf cs-etm: Add support for coresight trace for any range of CPUs Ganapatrao Kulkarni
2023-04-20 9:43 ` James Clark
2023-04-20 10:14 ` James Clark [this message]
2023-04-20 11:47 ` Ganapatrao Kulkarni
2023-04-20 12:30 ` James Clark
2023-04-20 12:37 ` Ganapatrao Kulkarni
2023-04-20 13:03 ` Suzuki K Poulose
2023-04-20 15:44 ` James Clark
2023-04-20 18:33 ` Arnaldo Carvalho de Melo
2023-04-21 8:55 ` Suzuki K Poulose
2023-04-20 14:30 ` Ganapatrao Kulkarni
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=c64df0c7-1117-19ff-e94a-41b07a78c496@arm.com \
--to=james.clark@arm.com \
--cc=acme@kernel.org \
--cc=coresight@lists.linaro.org \
--cc=darren@os.amperecomputing.com \
--cc=gankulkarni@os.amperecomputing.com \
--cc=linux-arm-kernel@lists.infradead.org \
--cc=linux-kernel@vger.kernel.org \
--cc=mathieu.poirier@linaro.org \
--cc=mike.leach@linaro.org \
--cc=scclevenger@os.amperecomputing.com \
--cc=scott@os.amperecomputing.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox