* [PATCH v5 01/17] perf: cs-etm: Create decoders after both AUX and HW_ID search passes
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-18 13:23 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 02/17] perf: cs-etm: Allocate queues for all CPUs James Clark
` (15 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
Both of these passes gather information about how to create the
decoders. AUX records determine formatted/unformatted, and the HW_IDs
determine the traceID/metadata mappings. Therefore it makes sense to
cache the information and wait until both passes are over until creating
the decoders, rather than creating them at the first HW_ID found. This
will allow a simplification of the creation process where
cs_etm_queue->traceid_list will exclusively used to create the decoders,
rather than the current two methods depending on whether the trace is
formatted or not.
Previously the sample CPU from the AUX record was used to initialize
the decoder CPU, but actually sample CPU == AUX queue index in per-CPU
mode, so saving the sample CPU isn't required. Similarly
formatted/unformatted was used upfront to create the decoders, but now
it's cached until later.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
tools/perf/util/cs-etm.c | 167 ++++++++++++++++++++++++---------------
1 file changed, 102 insertions(+), 65 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 32818bd7cd17..f09004c4ba44 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -103,6 +103,7 @@ struct cs_etm_queue {
struct auxtrace_buffer *buffer;
unsigned int queue_nr;
u8 pending_timestamp_chan_id;
+ bool formatted;
u64 offset;
const unsigned char *buf;
size_t buf_len, buf_used;
@@ -738,8 +739,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
struct cs_etm_queue *etmq,
- enum cs_etm_decoder_operation mode,
- bool formatted)
+ enum cs_etm_decoder_operation mode)
{
int ret = -EINVAL;
@@ -749,7 +749,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
d_params->packet_printer = cs_etm__packet_dump;
d_params->operation = mode;
d_params->data = etmq;
- d_params->formatted = formatted;
+ d_params->formatted = etmq->formatted;
d_params->fsyncs = false;
d_params->hsyncs = false;
d_params->frame_aligned = true;
@@ -1041,81 +1041,34 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
return ret;
}
-static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
- bool formatted, int sample_cpu)
+static struct cs_etm_queue *cs_etm__alloc_queue(void)
{
- struct cs_etm_decoder_params d_params;
- struct cs_etm_trace_params *t_params = NULL;
- struct cs_etm_queue *etmq;
- /*
- * Each queue can only contain data from one CPU when unformatted, so only one decoder is
- * needed.
- */
- int decoders = formatted ? etm->num_cpu : 1;
-
- etmq = zalloc(sizeof(*etmq));
+ struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
if (!etmq)
return NULL;
etmq->traceid_queues_list = intlist__new(NULL);
if (!etmq->traceid_queues_list)
- goto out_free;
-
- /* Use metadata to fill in trace parameters for trace decoder */
- t_params = zalloc(sizeof(*t_params) * decoders);
+ free(etmq);
- if (!t_params)
- goto out_free;
-
- if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
- goto out_free;
-
- /* Set decoder parameters to decode trace packets */
- if (cs_etm__init_decoder_params(&d_params, etmq,
- dump_trace ? CS_ETM_OPERATION_PRINT :
- CS_ETM_OPERATION_DECODE,
- formatted))
- goto out_free;
-
- etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
- t_params);
-
- if (!etmq->decoder)
- goto out_free;
-
- /*
- * Register a function to handle all memory accesses required by
- * the trace decoder library.
- */
- if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
- 0x0L, ((u64) -1L),
- cs_etm__mem_access))
- goto out_free_decoder;
-
- zfree(&t_params);
return etmq;
-
-out_free_decoder:
- cs_etm_decoder__free(etmq->decoder);
-out_free:
- intlist__delete(etmq->traceid_queues_list);
- free(etmq);
-
- return NULL;
}
static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
struct auxtrace_queue *queue,
- unsigned int queue_nr,
- bool formatted,
- int sample_cpu)
+ unsigned int queue_nr, bool formatted)
{
struct cs_etm_queue *etmq = queue->priv;
+ if (etmq && formatted != etmq->formatted) {
+ pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
+ return -EINVAL;
+ }
+
if (list_empty(&queue->head) || etmq)
return 0;
- etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
+ etmq = cs_etm__alloc_queue();
if (!etmq)
return -ENOMEM;
@@ -1123,7 +1076,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
queue->priv = etmq;
etmq->etm = etm;
etmq->queue_nr = queue_nr;
+ queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
etmq->offset = 0;
+ etmq->formatted = formatted;
return 0;
}
@@ -2843,7 +2798,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
* formatted in piped mode (true).
*/
err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
- idx, true, -1);
+ idx, true);
if (err)
return err;
@@ -3048,8 +3003,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
idx = auxtrace_event->idx;
formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
- return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
- idx, formatted, sample->cpu);
+
+ return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx, formatted);
}
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
@@ -3233,6 +3188,84 @@ static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
return 0;
}
+/*
+ * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
+ * (formatted or not) packets to create the decoders.
+ */
+static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
+{
+ struct cs_etm_decoder_params d_params;
+
+ /*
+ * Each queue can only contain data from one CPU when unformatted, so only one decoder is
+ * needed.
+ */
+ int decoders = etmq->formatted ? etmq->etm->num_cpu : 1;
+
+ /* Use metadata to fill in trace parameters for trace decoder */
+ struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders);
+
+ if (!t_params)
+ goto out_free;
+
+ if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->formatted,
+ etmq->queue_nr, decoders))
+ goto out_free;
+
+ /* Set decoder parameters to decode trace packets */
+ if (cs_etm__init_decoder_params(&d_params, etmq,
+ dump_trace ? CS_ETM_OPERATION_PRINT :
+ CS_ETM_OPERATION_DECODE))
+ goto out_free;
+
+ etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
+ t_params);
+
+ if (!etmq->decoder)
+ goto out_free;
+
+ /*
+ * Register a function to handle all memory accesses required by
+ * the trace decoder library.
+ */
+ if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
+ 0x0L, ((u64) -1L),
+ cs_etm__mem_access))
+ goto out_free_decoder;
+
+ zfree(&t_params);
+ return 0;
+
+out_free_decoder:
+ cs_etm_decoder__free(etmq->decoder);
+out_free:
+ zfree(&t_params);
+ return -EINVAL;
+}
+
+static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
+{
+ struct auxtrace_queues *queues = &etm->queues;
+
+ for (unsigned int i = 0; i < queues->nr_queues; i++) {
+ bool empty = list_empty(&queues->queue_array[i].head);
+ struct cs_etm_queue *etmq = queues->queue_array[i].priv;
+ int ret;
+
+ /*
+ * Don't create decoders for empty queues, mainly because
+ * etmq->formatted is unknown for empty queues.
+ */
+ if (empty)
+ continue;
+
+ ret = cs_etm__create_queue_decoders(etmq);
+ if (ret)
+ return ret;
+ }
+ return 0;
+}
+
int cs_etm__process_auxtrace_info_full(union perf_event *event,
struct perf_session *session)
{
@@ -3396,6 +3429,10 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
goto err_free_queues;
+ err = cs_etm__queue_aux_records(session);
+ if (err)
+ goto err_free_queues;
+
/*
* Map Trace ID values to CPU metadata.
*
@@ -3418,7 +3455,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
* flags if present.
*/
- /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
+ /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
aux_hw_id_found = 0;
err = perf_session__peek_events(session, session->header.data_offset,
session->header.data_size,
@@ -3436,7 +3473,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
goto err_free_queues;
- err = cs_etm__queue_aux_records(session);
+ err = cs_etm__create_decoders(etm);
if (err)
goto err_free_queues;
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 01/17] perf: cs-etm: Create decoders after both AUX and HW_ID search passes
2024-07-12 10:20 ` [PATCH v5 01/17] perf: cs-etm: Create decoders after both AUX and HW_ID search passes James Clark
@ 2024-07-18 13:23 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-18 13:23 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> Both of these passes gather information about how to create the
> decoders. AUX records determine formatted/unformatted, and the HW_IDs
> determine the traceID/metadata mappings. Therefore it makes sense to
> cache the information and wait until both passes are over until creating
> the decoders, rather than creating them at the first HW_ID found. This
> will allow a simplification of the creation process where
> cs_etm_queue->traceid_list will exclusively used to create the decoders,
> rather than the current two methods depending on whether the trace is
> formatted or not.
>
> Previously the sample CPU from the AUX record was used to initialize
> the decoder CPU, but actually sample CPU == AUX queue index in per-CPU
> mode, so saving the sample CPU isn't required. Similarly
> formatted/unformatted was used upfront to create the decoders, but now
> it's cached until later.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> tools/perf/util/cs-etm.c | 167 ++++++++++++++++++++++++---------------
> 1 file changed, 102 insertions(+), 65 deletions(-)
>
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 32818bd7cd17..f09004c4ba44 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -103,6 +103,7 @@ struct cs_etm_queue {
> struct auxtrace_buffer *buffer;
> unsigned int queue_nr;
> u8 pending_timestamp_chan_id;
> + bool formatted;
> u64 offset;
> const unsigned char *buf;
> size_t buf_len, buf_used;
> @@ -738,8 +739,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
>
> static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
> struct cs_etm_queue *etmq,
> - enum cs_etm_decoder_operation mode,
> - bool formatted)
> + enum cs_etm_decoder_operation mode)
> {
> int ret = -EINVAL;
>
> @@ -749,7 +749,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
> d_params->packet_printer = cs_etm__packet_dump;
> d_params->operation = mode;
> d_params->data = etmq;
> - d_params->formatted = formatted;
> + d_params->formatted = etmq->formatted;
> d_params->fsyncs = false;
> d_params->hsyncs = false;
> d_params->frame_aligned = true;
> @@ -1041,81 +1041,34 @@ static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u8 trace_chan_id,
> return ret;
> }
>
> -static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm,
> - bool formatted, int sample_cpu)
> +static struct cs_etm_queue *cs_etm__alloc_queue(void)
> {
> - struct cs_etm_decoder_params d_params;
> - struct cs_etm_trace_params *t_params = NULL;
> - struct cs_etm_queue *etmq;
> - /*
> - * Each queue can only contain data from one CPU when unformatted, so only one decoder is
> - * needed.
> - */
> - int decoders = formatted ? etm->num_cpu : 1;
> -
> - etmq = zalloc(sizeof(*etmq));
> + struct cs_etm_queue *etmq = zalloc(sizeof(*etmq));
> if (!etmq)
> return NULL;
>
> etmq->traceid_queues_list = intlist__new(NULL);
> if (!etmq->traceid_queues_list)
> - goto out_free;
> -
> - /* Use metadata to fill in trace parameters for trace decoder */
> - t_params = zalloc(sizeof(*t_params) * decoders);
> + free(etmq);
>
> - if (!t_params)
> - goto out_free;
> -
> - if (cs_etm__init_trace_params(t_params, etm, formatted, sample_cpu, decoders))
> - goto out_free;
> -
> - /* Set decoder parameters to decode trace packets */
> - if (cs_etm__init_decoder_params(&d_params, etmq,
> - dump_trace ? CS_ETM_OPERATION_PRINT :
> - CS_ETM_OPERATION_DECODE,
> - formatted))
> - goto out_free;
> -
> - etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
> - t_params);
> -
> - if (!etmq->decoder)
> - goto out_free;
> -
> - /*
> - * Register a function to handle all memory accesses required by
> - * the trace decoder library.
> - */
> - if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
> - 0x0L, ((u64) -1L),
> - cs_etm__mem_access))
> - goto out_free_decoder;
> -
> - zfree(&t_params);
> return etmq;
> -
> -out_free_decoder:
> - cs_etm_decoder__free(etmq->decoder);
> -out_free:
> - intlist__delete(etmq->traceid_queues_list);
> - free(etmq);
> -
> - return NULL;
> }
>
> static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> struct auxtrace_queue *queue,
> - unsigned int queue_nr,
> - bool formatted,
> - int sample_cpu)
> + unsigned int queue_nr, bool formatted)
> {
> struct cs_etm_queue *etmq = queue->priv;
>
> + if (etmq && formatted != etmq->formatted) {
> + pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
> + return -EINVAL;
> + }
> +
> if (list_empty(&queue->head) || etmq)
> return 0;
>
> - etmq = cs_etm__alloc_queue(etm, formatted, sample_cpu);
> + etmq = cs_etm__alloc_queue();
>
> if (!etmq)
> return -ENOMEM;
> @@ -1123,7 +1076,9 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> queue->priv = etmq;
> etmq->etm = etm;
> etmq->queue_nr = queue_nr;
> + queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
> etmq->offset = 0;
> + etmq->formatted = formatted;
>
> return 0;
> }
> @@ -2843,7 +2798,7 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
> * formatted in piped mode (true).
> */
> err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
> - idx, true, -1);
> + idx, true);
> if (err)
> return err;
>
> @@ -3048,8 +3003,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
>
> idx = auxtrace_event->idx;
> formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
> - return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
> - idx, formatted, sample->cpu);
> +
> + return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx, formatted);
> }
>
> /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
> @@ -3233,6 +3188,84 @@ static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
> return 0;
> }
>
> +/*
> + * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
> + * (formatted or not) packets to create the decoders.
> + */
> +static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
> +{
> + struct cs_etm_decoder_params d_params;
> +
> + /*
> + * Each queue can only contain data from one CPU when unformatted, so only one decoder is
> + * needed.
> + */
> + int decoders = etmq->formatted ? etmq->etm->num_cpu : 1;
> +
> + /* Use metadata to fill in trace parameters for trace decoder */
> + struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders);
> +
> + if (!t_params)
> + goto out_free;
> +
> + if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->formatted,
> + etmq->queue_nr, decoders))
> + goto out_free;
> +
> + /* Set decoder parameters to decode trace packets */
> + if (cs_etm__init_decoder_params(&d_params, etmq,
> + dump_trace ? CS_ETM_OPERATION_PRINT :
> + CS_ETM_OPERATION_DECODE))
> + goto out_free;
> +
> + etmq->decoder = cs_etm_decoder__new(decoders, &d_params,
> + t_params);
> +
> + if (!etmq->decoder)
> + goto out_free;
> +
> + /*
> + * Register a function to handle all memory accesses required by
> + * the trace decoder library.
> + */
> + if (cs_etm_decoder__add_mem_access_cb(etmq->decoder,
> + 0x0L, ((u64) -1L),
> + cs_etm__mem_access))
> + goto out_free_decoder;
> +
> + zfree(&t_params);
> + return 0;
> +
> +out_free_decoder:
> + cs_etm_decoder__free(etmq->decoder);
> +out_free:
> + zfree(&t_params);
> + return -EINVAL;
> +}
> +
> +static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
> +{
> + struct auxtrace_queues *queues = &etm->queues;
> +
> + for (unsigned int i = 0; i < queues->nr_queues; i++) {
> + bool empty = list_empty(&queues->queue_array[i].head);
> + struct cs_etm_queue *etmq = queues->queue_array[i].priv;
> + int ret;
> +
> + /*
> + * Don't create decoders for empty queues, mainly because
> + * etmq->formatted is unknown for empty queues.
> + */
> + if (empty)
> + continue;
> +
> + ret = cs_etm__create_queue_decoders(etmq);
> + if (ret)
> + return ret;
> + }
> + return 0;
> +}
> +
> int cs_etm__process_auxtrace_info_full(union perf_event *event,
> struct perf_session *session)
> {
> @@ -3396,6 +3429,10 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> if (err)
> goto err_free_queues;
>
> + err = cs_etm__queue_aux_records(session);
> + if (err)
> + goto err_free_queues;
> +
> /*
> * Map Trace ID values to CPU metadata.
> *
> @@ -3418,7 +3455,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> * flags if present.
> */
>
> - /* first scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
> + /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
> aux_hw_id_found = 0;
> err = perf_session__peek_events(session, session->header.data_offset,
> session->header.data_size,
> @@ -3436,7 +3473,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> if (err)
> goto err_free_queues;
>
> - err = cs_etm__queue_aux_records(session);
> + err = cs_etm__create_decoders(etm);
> if (err)
> goto err_free_queues;
>
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 02/17] perf: cs-etm: Allocate queues for all CPUs
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
2024-07-12 10:20 ` [PATCH v5 01/17] perf: cs-etm: Create decoders after both AUX and HW_ID search passes James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-18 13:24 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 03/17] perf: cs-etm: Move traceid_list to each queue James Clark
` (14 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
Make cs_etm__setup_queue() setup a queue even if it's empty, and
pre-allocate queues based on the max CPU that was recorded. In per-CPU
mode aux queues are indexed based on CPU ID even if all CPUs aren't
recorded, sparse queue arrays aren't used.
This will allow HW_IDs to be saved even if no aux data was received in
that queue without having to call cs_etm__setup_queue() from two
different places.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
tools/perf/util/cs-etm.c | 76 +++++++++++++++++++++-------------------
1 file changed, 40 insertions(+), 36 deletions(-)
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index f09004c4ba44..1a95c4bb898f 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -97,13 +97,19 @@ struct cs_etm_traceid_queue {
struct cs_etm_packet_queue packet_queue;
};
+enum cs_etm_format {
+ UNSET,
+ FORMATTED,
+ UNFORMATTED
+};
+
struct cs_etm_queue {
struct cs_etm_auxtrace *etm;
struct cs_etm_decoder *decoder;
struct auxtrace_buffer *buffer;
unsigned int queue_nr;
u8 pending_timestamp_chan_id;
- bool formatted;
+ enum cs_etm_format format;
u64 offset;
const unsigned char *buf;
size_t buf_len, buf_used;
@@ -697,7 +703,7 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
struct cs_etm_auxtrace *etm,
- bool formatted,
+ enum cs_etm_format format,
int sample_cpu,
int decoders)
{
@@ -706,7 +712,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
u64 architecture;
for (t_idx = 0; t_idx < decoders; t_idx++) {
- if (formatted)
+ if (format == FORMATTED)
m_idx = t_idx;
else {
m_idx = get_cpu_data_idx(etm, sample_cpu);
@@ -749,7 +755,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
d_params->packet_printer = cs_etm__packet_dump;
d_params->operation = mode;
d_params->data = etmq;
- d_params->formatted = etmq->formatted;
+ d_params->formatted = etmq->format == FORMATTED;
d_params->fsyncs = false;
d_params->hsyncs = false;
d_params->frame_aligned = true;
@@ -1056,16 +1062,11 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void)
static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
struct auxtrace_queue *queue,
- unsigned int queue_nr, bool formatted)
+ unsigned int queue_nr)
{
struct cs_etm_queue *etmq = queue->priv;
- if (etmq && formatted != etmq->formatted) {
- pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
- return -EINVAL;
- }
-
- if (list_empty(&queue->head) || etmq)
+ if (etmq)
return 0;
etmq = cs_etm__alloc_queue();
@@ -1078,7 +1079,6 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
etmq->queue_nr = queue_nr;
queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
etmq->offset = 0;
- etmq->formatted = formatted;
return 0;
}
@@ -2791,17 +2791,6 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
if (err)
return err;
- /*
- * Knowing if the trace is formatted or not requires a lookup of
- * the aux record so only works in non-piped mode where data is
- * queued in cs_etm__queue_aux_records(). Always assume
- * formatted in piped mode (true).
- */
- err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
- idx, true);
- if (err)
- return err;
-
if (dump_trace)
if (auxtrace_buffer__get_data(buffer, fd)) {
cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
@@ -2918,8 +2907,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
struct perf_record_auxtrace *auxtrace_event;
union perf_event auxtrace_fragment;
__u64 aux_offset, aux_size;
- __u32 idx;
- bool formatted;
+ enum cs_etm_format format;
struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
struct cs_etm_auxtrace,
@@ -2985,6 +2973,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
if (aux_offset >= auxtrace_event->offset &&
aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
+ struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
+
/*
* If this AUX event was inside this buffer somewhere, create a new auxtrace event
* based on the sizes of the aux event, and queue that fragment.
@@ -3001,10 +2991,14 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
if (err)
return err;
- idx = auxtrace_event->idx;
- formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
-
- return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx, formatted);
+ format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
+ UNFORMATTED : FORMATTED;
+ if (etmq->format != UNSET && format != etmq->format) {
+ pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
+ return -EINVAL;
+ }
+ etmq->format = format;
+ return 0;
}
/* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
@@ -3200,7 +3194,7 @@ static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
* Each queue can only contain data from one CPU when unformatted, so only one decoder is
* needed.
*/
- int decoders = etmq->formatted ? etmq->etm->num_cpu : 1;
+ int decoders = etmq->format == FORMATTED ? etmq->etm->num_cpu : 1;
/* Use metadata to fill in trace parameters for trace decoder */
struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders);
@@ -3208,7 +3202,7 @@ static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
if (!t_params)
goto out_free;
- if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->formatted,
+ if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->format,
etmq->queue_nr, decoders))
goto out_free;
@@ -3256,6 +3250,7 @@ static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
* Don't create decoders for empty queues, mainly because
* etmq->formatted is unknown for empty queues.
*/
+ assert(empty == (etmq->format == UNSET));
if (empty)
continue;
@@ -3275,10 +3270,10 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
int event_header_size = sizeof(struct perf_event_header);
int total_size = auxtrace_info->header.size;
int priv_size = 0;
- int num_cpu;
+ int num_cpu, max_cpu = 0;
int err = 0;
int aux_hw_id_found;
- int i, j;
+ int i;
u64 *ptr = NULL;
u64 **metadata = NULL;
@@ -3309,7 +3304,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
* required by the trace decoder to properly decode the trace due
* to its highly compressed nature.
*/
- for (j = 0; j < num_cpu; j++) {
+ for (int j = 0; j < num_cpu; j++) {
if (ptr[i] == __perf_cs_etmv3_magic) {
metadata[j] =
cs_etm__create_meta_blk(ptr, &i,
@@ -3333,6 +3328,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
err = -ENOMEM;
goto err_free_metadata;
}
+
+ if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
+ max_cpu = metadata[j][CS_ETM_CPU];
}
/*
@@ -3362,10 +3360,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
*/
etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
- err = auxtrace_queues__init(&etm->queues);
+ err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
if (err)
goto err_free_etm;
+ for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
+ err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
+ if (err)
+ goto err_free_queues;
+ }
+
if (session->itrace_synth_opts->set) {
etm->synth_opts = *session->itrace_synth_opts;
} else {
@@ -3487,7 +3491,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
zfree(&etm);
err_free_metadata:
/* No need to check @metadata[j], free(NULL) is supported */
- for (j = 0; j < num_cpu; j++)
+ for (int j = 0; j < num_cpu; j++)
zfree(&metadata[j]);
zfree(&metadata);
err_free_traceid_list:
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 02/17] perf: cs-etm: Allocate queues for all CPUs
2024-07-12 10:20 ` [PATCH v5 02/17] perf: cs-etm: Allocate queues for all CPUs James Clark
@ 2024-07-18 13:24 ` Mike Leach
2024-07-19 9:36 ` James Clark
0 siblings, 1 reply; 40+ messages in thread
From: Mike Leach @ 2024-07-18 13:24 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
Hi James
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> Make cs_etm__setup_queue() setup a queue even if it's empty, and
> pre-allocate queues based on the max CPU that was recorded. In per-CPU
> mode aux queues are indexed based on CPU ID even if all CPUs aren't
> recorded, sparse queue arrays aren't used.
>
> This will allow HW_IDs to be saved even if no aux data was received in
> that queue without having to call cs_etm__setup_queue() from two
> different places.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> tools/perf/util/cs-etm.c | 76 +++++++++++++++++++++-------------------
> 1 file changed, 40 insertions(+), 36 deletions(-)
>
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index f09004c4ba44..1a95c4bb898f 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -97,13 +97,19 @@ struct cs_etm_traceid_queue {
> struct cs_etm_packet_queue packet_queue;
> };
>
> +enum cs_etm_format {
> + UNSET,
> + FORMATTED,
> + UNFORMATTED
> +};
> +
Minor Nit: Could this...
> struct cs_etm_queue {
> struct cs_etm_auxtrace *etm;
> struct cs_etm_decoder *decoder;
> struct auxtrace_buffer *buffer;
> unsigned int queue_nr;
> u8 pending_timestamp_chan_id;
> - bool formatted;
> + enum cs_etm_format format;
and this be introduced in the previous set? Avoid a bit of churn?
> u64 offset;
> const unsigned char *buf;
> size_t buf_len, buf_used;
> @@ -697,7 +703,7 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
>
> static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
> struct cs_etm_auxtrace *etm,
> - bool formatted,
> + enum cs_etm_format format,
> int sample_cpu,
> int decoders)
> {
> @@ -706,7 +712,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
> u64 architecture;
>
> for (t_idx = 0; t_idx < decoders; t_idx++) {
> - if (formatted)
> + if (format == FORMATTED)
> m_idx = t_idx;
> else {
> m_idx = get_cpu_data_idx(etm, sample_cpu);
> @@ -749,7 +755,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
> d_params->packet_printer = cs_etm__packet_dump;
> d_params->operation = mode;
> d_params->data = etmq;
> - d_params->formatted = etmq->formatted;
> + d_params->formatted = etmq->format == FORMATTED;
> d_params->fsyncs = false;
> d_params->hsyncs = false;
> d_params->frame_aligned = true;
> @@ -1056,16 +1062,11 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void)
>
> static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> struct auxtrace_queue *queue,
> - unsigned int queue_nr, bool formatted)
> + unsigned int queue_nr)
> {
> struct cs_etm_queue *etmq = queue->priv;
>
> - if (etmq && formatted != etmq->formatted) {
> - pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
> - return -EINVAL;
> - }
> -
> - if (list_empty(&queue->head) || etmq)
> + if (etmq)
> return 0;
>
> etmq = cs_etm__alloc_queue();
> @@ -1078,7 +1079,6 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> etmq->queue_nr = queue_nr;
> queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
> etmq->offset = 0;
> - etmq->formatted = formatted;
>
> return 0;
> }
> @@ -2791,17 +2791,6 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
> if (err)
> return err;
>
> - /*
> - * Knowing if the trace is formatted or not requires a lookup of
> - * the aux record so only works in non-piped mode where data is
> - * queued in cs_etm__queue_aux_records(). Always assume
> - * formatted in piped mode (true).
> - */
> - err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
> - idx, true);
> - if (err)
> - return err;
> -
> if (dump_trace)
> if (auxtrace_buffer__get_data(buffer, fd)) {
> cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
> @@ -2918,8 +2907,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
> struct perf_record_auxtrace *auxtrace_event;
> union perf_event auxtrace_fragment;
> __u64 aux_offset, aux_size;
> - __u32 idx;
> - bool formatted;
> + enum cs_etm_format format;
>
> struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
> struct cs_etm_auxtrace,
> @@ -2985,6 +2973,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
>
> if (aux_offset >= auxtrace_event->offset &&
> aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
> + struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
> +
> /*
> * If this AUX event was inside this buffer somewhere, create a new auxtrace event
> * based on the sizes of the aux event, and queue that fragment.
> @@ -3001,10 +2991,14 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
> if (err)
> return err;
>
> - idx = auxtrace_event->idx;
> - formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
> -
> - return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx, formatted);
> + format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
> + UNFORMATTED : FORMATTED;
> + if (etmq->format != UNSET && format != etmq->format) {
> + pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
> + return -EINVAL;
> + }
> + etmq->format = format;
> + return 0;
> }
>
> /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
> @@ -3200,7 +3194,7 @@ static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
> * Each queue can only contain data from one CPU when unformatted, so only one decoder is
> * needed.
> */
> - int decoders = etmq->formatted ? etmq->etm->num_cpu : 1;
> + int decoders = etmq->format == FORMATTED ? etmq->etm->num_cpu : 1;
>
> /* Use metadata to fill in trace parameters for trace decoder */
> struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders);
> @@ -3208,7 +3202,7 @@ static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
> if (!t_params)
> goto out_free;
>
> - if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->formatted,
> + if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->format,
> etmq->queue_nr, decoders))
> goto out_free;
>
> @@ -3256,6 +3250,7 @@ static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
> * Don't create decoders for empty queues, mainly because
> * etmq->formatted is unknown for empty queues.
> */
> + assert(empty == (etmq->format == UNSET));
> if (empty)
> continue;
>
> @@ -3275,10 +3270,10 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> int event_header_size = sizeof(struct perf_event_header);
> int total_size = auxtrace_info->header.size;
> int priv_size = 0;
> - int num_cpu;
> + int num_cpu, max_cpu = 0;
> int err = 0;
> int aux_hw_id_found;
> - int i, j;
> + int i;
> u64 *ptr = NULL;
> u64 **metadata = NULL;
>
> @@ -3309,7 +3304,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> * required by the trace decoder to properly decode the trace due
> * to its highly compressed nature.
> */
> - for (j = 0; j < num_cpu; j++) {
> + for (int j = 0; j < num_cpu; j++) {
> if (ptr[i] == __perf_cs_etmv3_magic) {
> metadata[j] =
> cs_etm__create_meta_blk(ptr, &i,
> @@ -3333,6 +3328,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> err = -ENOMEM;
> goto err_free_metadata;
> }
> +
> + if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
> + max_cpu = metadata[j][CS_ETM_CPU];
> }
>
> /*
> @@ -3362,10 +3360,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> */
> etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
>
> - err = auxtrace_queues__init(&etm->queues);
> + err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
> if (err)
> goto err_free_etm;
>
> + for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
> + err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
> + if (err)
> + goto err_free_queues;
> + }
> +
> if (session->itrace_synth_opts->set) {
> etm->synth_opts = *session->itrace_synth_opts;
> } else {
> @@ -3487,7 +3491,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> zfree(&etm);
> err_free_metadata:
> /* No need to check @metadata[j], free(NULL) is supported */
> - for (j = 0; j < num_cpu; j++)
> + for (int j = 0; j < num_cpu; j++)
> zfree(&metadata[j]);
> zfree(&metadata);
> err_free_traceid_list:
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v5 02/17] perf: cs-etm: Allocate queues for all CPUs
2024-07-18 13:24 ` Mike Leach
@ 2024-07-19 9:36 ` James Clark
0 siblings, 0 replies; 40+ messages in thread
From: James Clark @ 2024-07-19 9:36 UTC (permalink / raw)
To: Mike Leach
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On 18/07/2024 2:24 pm, Mike Leach wrote:
> Hi James
>
> On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>>
>> From: James Clark <james.clark@arm.com>
>>
>> Make cs_etm__setup_queue() setup a queue even if it's empty, and
>> pre-allocate queues based on the max CPU that was recorded. In per-CPU
>> mode aux queues are indexed based on CPU ID even if all CPUs aren't
>> recorded, sparse queue arrays aren't used.
>>
>> This will allow HW_IDs to be saved even if no aux data was received in
>> that queue without having to call cs_etm__setup_queue() from two
>> different places.
>>
>> Signed-off-by: James Clark <james.clark@arm.com>
>> Signed-off-by: James Clark <james.clark@linaro.org>
>> ---
>> tools/perf/util/cs-etm.c | 76 +++++++++++++++++++++-------------------
>> 1 file changed, 40 insertions(+), 36 deletions(-)
>>
>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>> index f09004c4ba44..1a95c4bb898f 100644
>> --- a/tools/perf/util/cs-etm.c
>> +++ b/tools/perf/util/cs-etm.c
>> @@ -97,13 +97,19 @@ struct cs_etm_traceid_queue {
>> struct cs_etm_packet_queue packet_queue;
>> };
>>
>> +enum cs_etm_format {
>> + UNSET,
>> + FORMATTED,
>> + UNFORMATTED
>> +};
>> +
>
> Minor Nit: Could this...
>
>> struct cs_etm_queue {
>> struct cs_etm_auxtrace *etm;
>> struct cs_etm_decoder *decoder;
>> struct auxtrace_buffer *buffer;
>> unsigned int queue_nr;
>> u8 pending_timestamp_chan_id;
>> - bool formatted;
>> + enum cs_etm_format format;
>
> and this be introduced in the previous set? Avoid a bit of churn?
>
Yep, I'll move it
>> u64 offset;
>> const unsigned char *buf;
>> size_t buf_len, buf_used;
>> @@ -697,7 +703,7 @@ static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
>>
>> static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
>> struct cs_etm_auxtrace *etm,
>> - bool formatted,
>> + enum cs_etm_format format,
>> int sample_cpu,
>> int decoders)
>> {
>> @@ -706,7 +712,7 @@ static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
>> u64 architecture;
>>
>> for (t_idx = 0; t_idx < decoders; t_idx++) {
>> - if (formatted)
>> + if (format == FORMATTED)
>> m_idx = t_idx;
>> else {
>> m_idx = get_cpu_data_idx(etm, sample_cpu);
>> @@ -749,7 +755,7 @@ static int cs_etm__init_decoder_params(struct cs_etm_decoder_params *d_params,
>> d_params->packet_printer = cs_etm__packet_dump;
>> d_params->operation = mode;
>> d_params->data = etmq;
>> - d_params->formatted = etmq->formatted;
>> + d_params->formatted = etmq->format == FORMATTED;
>> d_params->fsyncs = false;
>> d_params->hsyncs = false;
>> d_params->frame_aligned = true;
>> @@ -1056,16 +1062,11 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void)
>>
>> static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
>> struct auxtrace_queue *queue,
>> - unsigned int queue_nr, bool formatted)
>> + unsigned int queue_nr)
>> {
>> struct cs_etm_queue *etmq = queue->priv;
>>
>> - if (etmq && formatted != etmq->formatted) {
>> - pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
>> - return -EINVAL;
>> - }
>> -
>> - if (list_empty(&queue->head) || etmq)
>> + if (etmq)
>> return 0;
>>
>> etmq = cs_etm__alloc_queue();
>> @@ -1078,7 +1079,6 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
>> etmq->queue_nr = queue_nr;
>> queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
>> etmq->offset = 0;
>> - etmq->formatted = formatted;
>>
>> return 0;
>> }
>> @@ -2791,17 +2791,6 @@ static int cs_etm__process_auxtrace_event(struct perf_session *session,
>> if (err)
>> return err;
>>
>> - /*
>> - * Knowing if the trace is formatted or not requires a lookup of
>> - * the aux record so only works in non-piped mode where data is
>> - * queued in cs_etm__queue_aux_records(). Always assume
>> - * formatted in piped mode (true).
>> - */
>> - err = cs_etm__setup_queue(etm, &etm->queues.queue_array[idx],
>> - idx, true);
>> - if (err)
>> - return err;
>> -
>> if (dump_trace)
>> if (auxtrace_buffer__get_data(buffer, fd)) {
>> cs_etm__dump_event(etm->queues.queue_array[idx].priv, buffer);
>> @@ -2918,8 +2907,7 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
>> struct perf_record_auxtrace *auxtrace_event;
>> union perf_event auxtrace_fragment;
>> __u64 aux_offset, aux_size;
>> - __u32 idx;
>> - bool formatted;
>> + enum cs_etm_format format;
>>
>> struct cs_etm_auxtrace *etm = container_of(session->auxtrace,
>> struct cs_etm_auxtrace,
>> @@ -2985,6 +2973,8 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
>>
>> if (aux_offset >= auxtrace_event->offset &&
>> aux_offset + aux_size <= auxtrace_event->offset + auxtrace_event->size) {
>> + struct cs_etm_queue *etmq = etm->queues.queue_array[auxtrace_event->idx].priv;
>> +
>> /*
>> * If this AUX event was inside this buffer somewhere, create a new auxtrace event
>> * based on the sizes of the aux event, and queue that fragment.
>> @@ -3001,10 +2991,14 @@ static int cs_etm__queue_aux_fragment(struct perf_session *session, off_t file_o
>> if (err)
>> return err;
>>
>> - idx = auxtrace_event->idx;
>> - formatted = !(aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW);
>> -
>> - return cs_etm__setup_queue(etm, &etm->queues.queue_array[idx], idx, formatted);
>> + format = (aux_event->flags & PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW) ?
>> + UNFORMATTED : FORMATTED;
>> + if (etmq->format != UNSET && format != etmq->format) {
>> + pr_err("CS_ETM: mixed formatted and unformatted trace not supported\n");
>> + return -EINVAL;
>> + }
>> + etmq->format = format;
>> + return 0;
>> }
>>
>> /* Wasn't inside this buffer, but there were no parse errors. 1 == 'not found' */
>> @@ -3200,7 +3194,7 @@ static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
>> * Each queue can only contain data from one CPU when unformatted, so only one decoder is
>> * needed.
>> */
>> - int decoders = etmq->formatted ? etmq->etm->num_cpu : 1;
>> + int decoders = etmq->format == FORMATTED ? etmq->etm->num_cpu : 1;
>>
>> /* Use metadata to fill in trace parameters for trace decoder */
>> struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders);
>> @@ -3208,7 +3202,7 @@ static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
>> if (!t_params)
>> goto out_free;
>>
>> - if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->formatted,
>> + if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->format,
>> etmq->queue_nr, decoders))
>> goto out_free;
>>
>> @@ -3256,6 +3250,7 @@ static int cs_etm__create_decoders(struct cs_etm_auxtrace *etm)
>> * Don't create decoders for empty queues, mainly because
>> * etmq->formatted is unknown for empty queues.
>> */
>> + assert(empty == (etmq->format == UNSET));
>> if (empty)
>> continue;
>>
>> @@ -3275,10 +3270,10 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
>> int event_header_size = sizeof(struct perf_event_header);
>> int total_size = auxtrace_info->header.size;
>> int priv_size = 0;
>> - int num_cpu;
>> + int num_cpu, max_cpu = 0;
>> int err = 0;
>> int aux_hw_id_found;
>> - int i, j;
>> + int i;
>> u64 *ptr = NULL;
>> u64 **metadata = NULL;
>>
>> @@ -3309,7 +3304,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
>> * required by the trace decoder to properly decode the trace due
>> * to its highly compressed nature.
>> */
>> - for (j = 0; j < num_cpu; j++) {
>> + for (int j = 0; j < num_cpu; j++) {
>> if (ptr[i] == __perf_cs_etmv3_magic) {
>> metadata[j] =
>> cs_etm__create_meta_blk(ptr, &i,
>> @@ -3333,6 +3328,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
>> err = -ENOMEM;
>> goto err_free_metadata;
>> }
>> +
>> + if ((int) metadata[j][CS_ETM_CPU] > max_cpu)
>> + max_cpu = metadata[j][CS_ETM_CPU];
>> }
>>
>> /*
>> @@ -3362,10 +3360,16 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
>> */
>> etm->pid_fmt = cs_etm__init_pid_fmt(metadata[0]);
>>
>> - err = auxtrace_queues__init(&etm->queues);
>> + err = auxtrace_queues__init_nr(&etm->queues, max_cpu + 1);
>> if (err)
>> goto err_free_etm;
>>
>> + for (unsigned int j = 0; j < etm->queues.nr_queues; ++j) {
>> + err = cs_etm__setup_queue(etm, &etm->queues.queue_array[j], j);
>> + if (err)
>> + goto err_free_queues;
>> + }
>> +
>> if (session->itrace_synth_opts->set) {
>> etm->synth_opts = *session->itrace_synth_opts;
>> } else {
>> @@ -3487,7 +3491,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
>> zfree(&etm);
>> err_free_metadata:
>> /* No need to check @metadata[j], free(NULL) is supported */
>> - for (j = 0; j < num_cpu; j++)
>> + for (int j = 0; j < num_cpu; j++)
>> zfree(&metadata[j]);
>> zfree(&metadata);
>> err_free_traceid_list:
>> --
>> 2.34.1
>>
>
> Reviewed-by: Mike Leach <mike.leach@linaro.org>
>
>
> --
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 03/17] perf: cs-etm: Move traceid_list to each queue
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
2024-07-12 10:20 ` [PATCH v5 01/17] perf: cs-etm: Create decoders after both AUX and HW_ID search passes James Clark
2024-07-12 10:20 ` [PATCH v5 02/17] perf: cs-etm: Allocate queues for all CPUs James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-18 13:24 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 04/17] perf: cs-etm: Create decoders based on the trace ID mappings James Clark
` (13 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
The global list won't work for per-sink trace ID allocations, so put a
list in each queue where the IDs will be unique to that queue.
To keep the same behavior as before, for version 0 of the HW_ID packets,
copy all the HW_ID mappings into all queues.
This change doesn't effect the decoders, only trace ID lookups on the
Perf side. The decoders are still created with global mappings which
will be fixed in a later commit.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
.../perf/util/cs-etm-decoder/cs-etm-decoder.c | 28 ++-
tools/perf/util/cs-etm.c | 215 +++++++++++-------
tools/perf/util/cs-etm.h | 2 +-
3 files changed, 147 insertions(+), 98 deletions(-)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index e917985bbbe6..0c9c48cedbf1 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -388,7 +388,8 @@ cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
+cs_etm_decoder__buffer_packet(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *packet_queue,
const u8 trace_chan_id,
enum cs_etm_sample_type sample_type)
{
@@ -398,7 +399,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
return OCSD_RESP_FATAL_SYS_ERR;
- if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
+ if (cs_etm__get_cpu(etmq, trace_chan_id, &cpu) < 0)
return OCSD_RESP_FATAL_SYS_ERR;
et = packet_queue->tail;
@@ -436,7 +437,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(etmq, packet_queue, trace_chan_id,
CS_ETM_RANGE);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
@@ -496,7 +497,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_discontinuity(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const uint8_t trace_chan_id)
{
/*
@@ -504,18 +506,19 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
* reset time statistics.
*/
cs_etm_decoder__reset_timestamp(queue);
- return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_DISCONTINUITY);
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_exception(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const ocsd_generic_trace_elem *elem,
const uint8_t trace_chan_id)
{ int ret = 0;
struct cs_etm_packet *packet;
- ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ ret = cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_EXCEPTION);
if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
return ret;
@@ -527,10 +530,11 @@ cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
}
static ocsd_datapath_resp_t
-cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
+cs_etm_decoder__buffer_exception_ret(struct cs_etm_queue *etmq,
+ struct cs_etm_packet_queue *queue,
const uint8_t trace_chan_id)
{
- return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
+ return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
CS_ETM_EXCEPTION_RET);
}
@@ -599,7 +603,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
case OCSD_GEN_TRC_ELEM_EO_TRACE:
case OCSD_GEN_TRC_ELEM_NO_SYNC:
case OCSD_GEN_TRC_ELEM_TRACE_ON:
- resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
+ resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
@@ -607,11 +611,11 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION:
- resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
+ resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
- resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
+ resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue,
trace_chan_id);
break;
case OCSD_GEN_TRC_ELEM_TIMESTAMP:
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 1a95c4bb898f..0cd7d3843411 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -116,16 +116,18 @@ struct cs_etm_queue {
/* Conversion between traceID and index in traceid_queues array */
struct intlist *traceid_queues_list;
struct cs_etm_traceid_queue **traceid_queues;
+ /* Conversion between traceID and metadata pointers */
+ struct intlist *traceid_list;
};
-/* RB tree for quick conversion between traceID and metadata pointers */
-static struct intlist *traceid_list;
-
static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
pid_t tid);
static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
+static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
+static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
+static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
/* PTMs ETMIDR [11:8] set to b0011 */
#define ETMIDR_PTM_VERSION 0x00000300
@@ -151,12 +153,12 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
return CS_ETM_PROTO_ETMV3;
}
-static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
+static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
{
struct int_node *inode;
u64 *metadata;
- inode = intlist__find(traceid_list, trace_chan_id);
+ inode = intlist__find(etmq->traceid_list, trace_chan_id);
if (!inode)
return -EINVAL;
@@ -165,12 +167,12 @@ static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
return 0;
}
-int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
+int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
{
struct int_node *inode;
u64 *metadata;
- inode = intlist__find(traceid_list, trace_chan_id);
+ inode = intlist__find(etmq->traceid_list, trace_chan_id);
if (!inode)
return -EINVAL;
@@ -222,30 +224,108 @@ enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
return etmq->etm->pid_fmt;
}
-static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
+static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
+ u8 trace_chan_id, u64 *cpu_metadata)
{
- struct int_node *inode;
-
/* Get an RB node for this CPU */
- inode = intlist__findnew(traceid_list, trace_chan_id);
+ struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
/* Something went wrong, no need to continue */
if (!inode)
return -ENOMEM;
+ /* Disallow re-mapping a different traceID to metadata pair. */
+ if (inode->priv) {
+ u64 *curr_cpu_data = inode->priv;
+ u8 curr_chan_id;
+ int err;
+
+ if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
+ pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
+ return -EINVAL;
+ }
+
+ /* check that the mapped ID matches */
+ err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
+ if (err)
+ return err;
+
+ if (curr_chan_id != trace_chan_id) {
+ pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
+ return -EINVAL;
+ }
+
+ /* Skip re-adding the same mappings if everything matched */
+ return 0;
+ }
+
+ /* Not one we've seen before, associate the traceID with the metadata pointer */
+ inode->priv = cpu_metadata;
+
+ return 0;
+}
+
+static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
+{
+ if (etm->per_thread_decoding)
+ return etm->queues.queue_array[0].priv;
+ else
+ return etm->queues.queue_array[cpu].priv;
+}
+
+static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
+ u64 *cpu_metadata)
+{
+ struct cs_etm_queue *etmq;
+
/*
- * The node for that CPU should not be taken.
- * Back out if that's the case.
+ * If the queue is unformatted then only save one mapping in the
+ * queue associated with that CPU so only one decoder is made.
*/
- if (inode->priv)
- return -EINVAL;
+ etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
+ if (etmq->format == UNFORMATTED)
+ return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
+ cpu_metadata);
- /* All good, associate the traceID with the metadata pointer */
- inode->priv = cpu_metadata;
+ /*
+ * Otherwise, version 0 trace IDs are global so save them into every
+ * queue.
+ */
+ for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
+ int ret;
+
+ etmq = etm->queues.queue_array[i].priv;
+ ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
+ cpu_metadata);
+ if (ret)
+ return ret;
+ }
return 0;
}
+static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
+ u64 hw_id)
+{
+ int err;
+ u64 *cpu_data;
+ u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+
+ cpu_data = get_cpu_data(etm, cpu);
+ if (cpu_data == NULL)
+ return -EINVAL;
+
+ err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
+ if (err)
+ return err;
+
+ /*
+ * if we are picking up the association from the packet, need to plug
+ * the correct trace ID into the metadata for setting up decoders later.
+ */
+ return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
+}
+
static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
{
u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
@@ -329,17 +409,13 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
{
struct cs_etm_auxtrace *etm;
struct perf_sample sample;
- struct int_node *inode;
struct evsel *evsel;
- u64 *cpu_data;
u64 hw_id;
int cpu, version, err;
- u8 trace_chan_id, curr_chan_id;
/* extract and parse the HW ID */
hw_id = event->aux_output_hw_id.hw_id;
version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
- trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
/* check that we can handle this version */
if (version > CS_AUX_HW_ID_CURR_VERSION) {
@@ -367,43 +443,7 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
return -EINVAL;
}
- /* See if the ID is mapped to a CPU, and it matches the current CPU */
- inode = intlist__find(traceid_list, trace_chan_id);
- if (inode) {
- cpu_data = inode->priv;
- if ((int)cpu_data[CS_ETM_CPU] != cpu) {
- pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
- return -EINVAL;
- }
-
- /* check that the mapped ID matches */
- err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
- if (err)
- return err;
- if (curr_chan_id != trace_chan_id) {
- pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
- return -EINVAL;
- }
-
- /* mapped and matched - return OK */
- return 0;
- }
-
- cpu_data = get_cpu_data(etm, cpu);
- if (cpu_data == NULL)
- return err;
-
- /* not one we've seen before - lets map it */
- err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
- if (err)
- return err;
-
- /*
- * if we are picking up the association from the packet, need to plug
- * the correct trace ID into the metadata for setting up decoders later.
- */
- err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
- return err;
+ return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
@@ -856,6 +896,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
static void cs_etm__free_queue(void *priv)
{
+ struct int_node *inode, *tmp;
struct cs_etm_queue *etmq = priv;
if (!etmq)
@@ -863,6 +904,14 @@ static void cs_etm__free_queue(void *priv)
cs_etm_decoder__free(etmq->decoder);
cs_etm__free_traceid_queues(etmq);
+
+ /* First remove all traceID/metadata nodes for the RB tree */
+ intlist__for_each_entry_safe(inode, tmp, etmq->traceid_list)
+ intlist__remove(etmq->traceid_list, inode);
+
+ /* Then the RB tree itself */
+ intlist__delete(etmq->traceid_list);
+
free(etmq);
}
@@ -885,19 +934,12 @@ static void cs_etm__free_events(struct perf_session *session)
static void cs_etm__free(struct perf_session *session)
{
int i;
- struct int_node *inode, *tmp;
struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
struct cs_etm_auxtrace,
auxtrace);
cs_etm__free_events(session);
session->auxtrace = NULL;
- /* First remove all traceID/metadata nodes for the RB tree */
- intlist__for_each_entry_safe(inode, tmp, traceid_list)
- intlist__remove(traceid_list, inode);
- /* Then the RB tree itself */
- intlist__delete(traceid_list);
-
for (i = 0; i < aux->num_cpu; i++)
zfree(&aux->metadata[i]);
@@ -1055,9 +1097,24 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void)
etmq->traceid_queues_list = intlist__new(NULL);
if (!etmq->traceid_queues_list)
- free(etmq);
+ goto out_free;
+
+ /*
+ * Create an RB tree for traceID-metadata tuple. Since the conversion
+ * has to be made for each packet that gets decoded, optimizing access
+ * in anything other than a sequential array is worth doing.
+ */
+ etmq->traceid_list = intlist__new(NULL);
+ if (!etmq->traceid_list)
+ goto out_free;
return etmq;
+
+out_free:
+ intlist__delete(etmq->traceid_queues_list);
+ free(etmq);
+
+ return NULL;
}
static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
@@ -2207,7 +2264,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
PERF_IP_FLAG_TRACE_END;
break;
case CS_ETM_EXCEPTION:
- ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
+ ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
if (ret)
return ret;
@@ -3124,7 +3181,8 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
}
/* map trace ids to correct metadata block, from information in metadata */
-static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
+static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
+ u64 **metadata)
{
u64 cs_etm_magic;
u8 trace_chan_id;
@@ -3146,7 +3204,7 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
/* unknown magic number */
return -EINVAL;
}
- err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
+ err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
if (err)
return err;
}
@@ -3277,23 +3335,12 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
u64 *ptr = NULL;
u64 **metadata = NULL;
- /*
- * Create an RB tree for traceID-metadata tuple. Since the conversion
- * has to be made for each packet that gets decoded, optimizing access
- * in anything other than a sequential array is worth doing.
- */
- traceid_list = intlist__new(NULL);
- if (!traceid_list)
- return -ENOMEM;
-
/* First the global part */
ptr = (u64 *) auxtrace_info->priv;
num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
metadata = zalloc(sizeof(*metadata) * num_cpu);
- if (!metadata) {
- err = -ENOMEM;
- goto err_free_traceid_list;
- }
+ if (!metadata)
+ return -ENOMEM;
/* Start parsing after the common part of the header */
i = CS_HEADER_VERSION_MAX;
@@ -3472,7 +3519,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
/* otherwise, this is a file with metadata values only, map from metadata */
else
- err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
+ err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
if (err)
goto err_free_queues;
@@ -3494,7 +3541,5 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
for (int j = 0; j < num_cpu; j++)
zfree(&metadata[j]);
zfree(&metadata);
-err_free_traceid_list:
- intlist__delete(traceid_list);
return err;
}
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index 4696267a32f0..f4f69f7cc0f3 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -252,7 +252,7 @@ enum cs_etm_pid_fmt {
#ifdef HAVE_CSTRACE_SUPPORT
#include <opencsd/ocsd_if_types.h>
-int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
+int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu);
enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq);
int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
u8 trace_chan_id, ocsd_ex_level el);
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 03/17] perf: cs-etm: Move traceid_list to each queue
2024-07-12 10:20 ` [PATCH v5 03/17] perf: cs-etm: Move traceid_list to each queue James Clark
@ 2024-07-18 13:24 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-18 13:24 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> The global list won't work for per-sink trace ID allocations, so put a
> list in each queue where the IDs will be unique to that queue.
>
> To keep the same behavior as before, for version 0 of the HW_ID packets,
> copy all the HW_ID mappings into all queues.
>
> This change doesn't effect the decoders, only trace ID lookups on the
> Perf side. The decoders are still created with global mappings which
> will be fixed in a later commit.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 28 ++-
> tools/perf/util/cs-etm.c | 215 +++++++++++-------
> tools/perf/util/cs-etm.h | 2 +-
> 3 files changed, 147 insertions(+), 98 deletions(-)
>
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index e917985bbbe6..0c9c48cedbf1 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -388,7 +388,8 @@ cs_etm_decoder__reset_timestamp(struct cs_etm_packet_queue *packet_queue)
> }
>
> static ocsd_datapath_resp_t
> -cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
> +cs_etm_decoder__buffer_packet(struct cs_etm_queue *etmq,
> + struct cs_etm_packet_queue *packet_queue,
> const u8 trace_chan_id,
> enum cs_etm_sample_type sample_type)
> {
> @@ -398,7 +399,7 @@ cs_etm_decoder__buffer_packet(struct cs_etm_packet_queue *packet_queue,
> if (packet_queue->packet_count >= CS_ETM_PACKET_MAX_BUFFER - 1)
> return OCSD_RESP_FATAL_SYS_ERR;
>
> - if (cs_etm__get_cpu(trace_chan_id, &cpu) < 0)
> + if (cs_etm__get_cpu(etmq, trace_chan_id, &cpu) < 0)
> return OCSD_RESP_FATAL_SYS_ERR;
>
> et = packet_queue->tail;
> @@ -436,7 +437,7 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
> int ret = 0;
> struct cs_etm_packet *packet;
>
> - ret = cs_etm_decoder__buffer_packet(packet_queue, trace_chan_id,
> + ret = cs_etm_decoder__buffer_packet(etmq, packet_queue, trace_chan_id,
> CS_ETM_RANGE);
> if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
> return ret;
> @@ -496,7 +497,8 @@ cs_etm_decoder__buffer_range(struct cs_etm_queue *etmq,
> }
>
> static ocsd_datapath_resp_t
> -cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
> +cs_etm_decoder__buffer_discontinuity(struct cs_etm_queue *etmq,
> + struct cs_etm_packet_queue *queue,
> const uint8_t trace_chan_id)
> {
> /*
> @@ -504,18 +506,19 @@ cs_etm_decoder__buffer_discontinuity(struct cs_etm_packet_queue *queue,
> * reset time statistics.
> */
> cs_etm_decoder__reset_timestamp(queue);
> - return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
> + return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
> CS_ETM_DISCONTINUITY);
> }
>
> static ocsd_datapath_resp_t
> -cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
> +cs_etm_decoder__buffer_exception(struct cs_etm_queue *etmq,
> + struct cs_etm_packet_queue *queue,
> const ocsd_generic_trace_elem *elem,
> const uint8_t trace_chan_id)
> { int ret = 0;
> struct cs_etm_packet *packet;
>
> - ret = cs_etm_decoder__buffer_packet(queue, trace_chan_id,
> + ret = cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
> CS_ETM_EXCEPTION);
> if (ret != OCSD_RESP_CONT && ret != OCSD_RESP_WAIT)
> return ret;
> @@ -527,10 +530,11 @@ cs_etm_decoder__buffer_exception(struct cs_etm_packet_queue *queue,
> }
>
> static ocsd_datapath_resp_t
> -cs_etm_decoder__buffer_exception_ret(struct cs_etm_packet_queue *queue,
> +cs_etm_decoder__buffer_exception_ret(struct cs_etm_queue *etmq,
> + struct cs_etm_packet_queue *queue,
> const uint8_t trace_chan_id)
> {
> - return cs_etm_decoder__buffer_packet(queue, trace_chan_id,
> + return cs_etm_decoder__buffer_packet(etmq, queue, trace_chan_id,
> CS_ETM_EXCEPTION_RET);
> }
>
> @@ -599,7 +603,7 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
> case OCSD_GEN_TRC_ELEM_EO_TRACE:
> case OCSD_GEN_TRC_ELEM_NO_SYNC:
> case OCSD_GEN_TRC_ELEM_TRACE_ON:
> - resp = cs_etm_decoder__buffer_discontinuity(packet_queue,
> + resp = cs_etm_decoder__buffer_discontinuity(etmq, packet_queue,
> trace_chan_id);
> break;
> case OCSD_GEN_TRC_ELEM_INSTR_RANGE:
> @@ -607,11 +611,11 @@ static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer(
> trace_chan_id);
> break;
> case OCSD_GEN_TRC_ELEM_EXCEPTION:
> - resp = cs_etm_decoder__buffer_exception(packet_queue, elem,
> + resp = cs_etm_decoder__buffer_exception(etmq, packet_queue, elem,
> trace_chan_id);
> break;
> case OCSD_GEN_TRC_ELEM_EXCEPTION_RET:
> - resp = cs_etm_decoder__buffer_exception_ret(packet_queue,
> + resp = cs_etm_decoder__buffer_exception_ret(etmq, packet_queue,
> trace_chan_id);
> break;
> case OCSD_GEN_TRC_ELEM_TIMESTAMP:
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 1a95c4bb898f..0cd7d3843411 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -116,16 +116,18 @@ struct cs_etm_queue {
> /* Conversion between traceID and index in traceid_queues array */
> struct intlist *traceid_queues_list;
> struct cs_etm_traceid_queue **traceid_queues;
> + /* Conversion between traceID and metadata pointers */
> + struct intlist *traceid_list;
> };
>
> -/* RB tree for quick conversion between traceID and metadata pointers */
> -static struct intlist *traceid_list;
> -
> static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
> static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm,
> pid_t tid);
> static int cs_etm__get_data_block(struct cs_etm_queue *etmq);
> static int cs_etm__decode_data_block(struct cs_etm_queue *etmq);
> +static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata);
> +static u64 *get_cpu_data(struct cs_etm_auxtrace *etm, int cpu);
> +static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
>
> /* PTMs ETMIDR [11:8] set to b0011 */
> #define ETMIDR_PTM_VERSION 0x00000300
> @@ -151,12 +153,12 @@ static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
> return CS_ETM_PROTO_ETMV3;
> }
>
> -static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
> +static int cs_etm__get_magic(struct cs_etm_queue *etmq, u8 trace_chan_id, u64 *magic)
> {
> struct int_node *inode;
> u64 *metadata;
>
> - inode = intlist__find(traceid_list, trace_chan_id);
> + inode = intlist__find(etmq->traceid_list, trace_chan_id);
> if (!inode)
> return -EINVAL;
>
> @@ -165,12 +167,12 @@ static int cs_etm__get_magic(u8 trace_chan_id, u64 *magic)
> return 0;
> }
>
> -int cs_etm__get_cpu(u8 trace_chan_id, int *cpu)
> +int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu)
> {
> struct int_node *inode;
> u64 *metadata;
>
> - inode = intlist__find(traceid_list, trace_chan_id);
> + inode = intlist__find(etmq->traceid_list, trace_chan_id);
> if (!inode)
> return -EINVAL;
>
> @@ -222,30 +224,108 @@ enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq)
> return etmq->etm->pid_fmt;
> }
>
> -static int cs_etm__map_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
> +static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
> + u8 trace_chan_id, u64 *cpu_metadata)
> {
> - struct int_node *inode;
> -
> /* Get an RB node for this CPU */
> - inode = intlist__findnew(traceid_list, trace_chan_id);
> + struct int_node *inode = intlist__findnew(etmq->traceid_list, trace_chan_id);
>
> /* Something went wrong, no need to continue */
> if (!inode)
> return -ENOMEM;
>
> + /* Disallow re-mapping a different traceID to metadata pair. */
> + if (inode->priv) {
> + u64 *curr_cpu_data = inode->priv;
> + u8 curr_chan_id;
> + int err;
> +
> + if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
> + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
> + return -EINVAL;
> + }
> +
> + /* check that the mapped ID matches */
> + err = cs_etm__metadata_get_trace_id(&curr_chan_id, curr_cpu_data);
> + if (err)
> + return err;
> +
> + if (curr_chan_id != trace_chan_id) {
> + pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
> + return -EINVAL;
> + }
> +
> + /* Skip re-adding the same mappings if everything matched */
> + return 0;
> + }
> +
> + /* Not one we've seen before, associate the traceID with the metadata pointer */
> + inode->priv = cpu_metadata;
> +
> + return 0;
> +}
> +
> +static struct cs_etm_queue *cs_etm__get_queue(struct cs_etm_auxtrace *etm, int cpu)
> +{
> + if (etm->per_thread_decoding)
> + return etm->queues.queue_array[0].priv;
> + else
> + return etm->queues.queue_array[cpu].priv;
> +}
> +
> +static int cs_etm__map_trace_id_v0(struct cs_etm_auxtrace *etm, u8 trace_chan_id,
> + u64 *cpu_metadata)
> +{
> + struct cs_etm_queue *etmq;
> +
> /*
> - * The node for that CPU should not be taken.
> - * Back out if that's the case.
> + * If the queue is unformatted then only save one mapping in the
> + * queue associated with that CPU so only one decoder is made.
> */
> - if (inode->priv)
> - return -EINVAL;
> + etmq = cs_etm__get_queue(etm, cpu_metadata[CS_ETM_CPU]);
> + if (etmq->format == UNFORMATTED)
> + return cs_etm__insert_trace_id_node(etmq, trace_chan_id,
> + cpu_metadata);
>
> - /* All good, associate the traceID with the metadata pointer */
> - inode->priv = cpu_metadata;
> + /*
> + * Otherwise, version 0 trace IDs are global so save them into every
> + * queue.
> + */
> + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
> + int ret;
> +
> + etmq = etm->queues.queue_array[i].priv;
> + ret = cs_etm__insert_trace_id_node(etmq, trace_chan_id,
> + cpu_metadata);
> + if (ret)
> + return ret;
> + }
>
> return 0;
> }
>
> +static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
> + u64 hw_id)
> +{
> + int err;
> + u64 *cpu_data;
> + u8 trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
> +
> + cpu_data = get_cpu_data(etm, cpu);
> + if (cpu_data == NULL)
> + return -EINVAL;
> +
> + err = cs_etm__map_trace_id_v0(etm, trace_chan_id, cpu_data);
> + if (err)
> + return err;
> +
> + /*
> + * if we are picking up the association from the packet, need to plug
> + * the correct trace ID into the metadata for setting up decoders later.
> + */
> + return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
> +}
> +
> static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
> {
> u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
> @@ -329,17 +409,13 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
> {
> struct cs_etm_auxtrace *etm;
> struct perf_sample sample;
> - struct int_node *inode;
> struct evsel *evsel;
> - u64 *cpu_data;
> u64 hw_id;
> int cpu, version, err;
> - u8 trace_chan_id, curr_chan_id;
>
> /* extract and parse the HW ID */
> hw_id = event->aux_output_hw_id.hw_id;
> version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
> - trace_chan_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
>
> /* check that we can handle this version */
> if (version > CS_AUX_HW_ID_CURR_VERSION) {
> @@ -367,43 +443,7 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
> return -EINVAL;
> }
>
> - /* See if the ID is mapped to a CPU, and it matches the current CPU */
> - inode = intlist__find(traceid_list, trace_chan_id);
> - if (inode) {
> - cpu_data = inode->priv;
> - if ((int)cpu_data[CS_ETM_CPU] != cpu) {
> - pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
> - return -EINVAL;
> - }
> -
> - /* check that the mapped ID matches */
> - err = cs_etm__metadata_get_trace_id(&curr_chan_id, cpu_data);
> - if (err)
> - return err;
> - if (curr_chan_id != trace_chan_id) {
> - pr_err("CS_ETM: mismatch between CPU trace ID and HW_ID packet ID\n");
> - return -EINVAL;
> - }
> -
> - /* mapped and matched - return OK */
> - return 0;
> - }
> -
> - cpu_data = get_cpu_data(etm, cpu);
> - if (cpu_data == NULL)
> - return err;
> -
> - /* not one we've seen before - lets map it */
> - err = cs_etm__map_trace_id(trace_chan_id, cpu_data);
> - if (err)
> - return err;
> -
> - /*
> - * if we are picking up the association from the packet, need to plug
> - * the correct trace ID into the metadata for setting up decoders later.
> - */
> - err = cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
> - return err;
> + return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
> }
>
> void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
> @@ -856,6 +896,7 @@ static void cs_etm__free_traceid_queues(struct cs_etm_queue *etmq)
>
> static void cs_etm__free_queue(void *priv)
> {
> + struct int_node *inode, *tmp;
> struct cs_etm_queue *etmq = priv;
>
> if (!etmq)
> @@ -863,6 +904,14 @@ static void cs_etm__free_queue(void *priv)
>
> cs_etm_decoder__free(etmq->decoder);
> cs_etm__free_traceid_queues(etmq);
> +
> + /* First remove all traceID/metadata nodes for the RB tree */
> + intlist__for_each_entry_safe(inode, tmp, etmq->traceid_list)
> + intlist__remove(etmq->traceid_list, inode);
> +
> + /* Then the RB tree itself */
> + intlist__delete(etmq->traceid_list);
> +
> free(etmq);
> }
>
> @@ -885,19 +934,12 @@ static void cs_etm__free_events(struct perf_session *session)
> static void cs_etm__free(struct perf_session *session)
> {
> int i;
> - struct int_node *inode, *tmp;
> struct cs_etm_auxtrace *aux = container_of(session->auxtrace,
> struct cs_etm_auxtrace,
> auxtrace);
> cs_etm__free_events(session);
> session->auxtrace = NULL;
>
> - /* First remove all traceID/metadata nodes for the RB tree */
> - intlist__for_each_entry_safe(inode, tmp, traceid_list)
> - intlist__remove(traceid_list, inode);
> - /* Then the RB tree itself */
> - intlist__delete(traceid_list);
> -
> for (i = 0; i < aux->num_cpu; i++)
> zfree(&aux->metadata[i]);
>
> @@ -1055,9 +1097,24 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void)
>
> etmq->traceid_queues_list = intlist__new(NULL);
> if (!etmq->traceid_queues_list)
> - free(etmq);
> + goto out_free;
> +
> + /*
> + * Create an RB tree for traceID-metadata tuple. Since the conversion
> + * has to be made for each packet that gets decoded, optimizing access
> + * in anything other than a sequential array is worth doing.
> + */
> + etmq->traceid_list = intlist__new(NULL);
> + if (!etmq->traceid_list)
> + goto out_free;
>
> return etmq;
> +
> +out_free:
> + intlist__delete(etmq->traceid_queues_list);
> + free(etmq);
> +
> + return NULL;
> }
>
> static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> @@ -2207,7 +2264,7 @@ static int cs_etm__set_sample_flags(struct cs_etm_queue *etmq,
> PERF_IP_FLAG_TRACE_END;
> break;
> case CS_ETM_EXCEPTION:
> - ret = cs_etm__get_magic(packet->trace_chan_id, &magic);
> + ret = cs_etm__get_magic(etmq, packet->trace_chan_id, &magic);
> if (ret)
> return ret;
>
> @@ -3124,7 +3181,8 @@ static bool cs_etm__has_virtual_ts(u64 **metadata, int num_cpu)
> }
>
> /* map trace ids to correct metadata block, from information in metadata */
> -static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
> +static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_cpu,
> + u64 **metadata)
> {
> u64 cs_etm_magic;
> u8 trace_chan_id;
> @@ -3146,7 +3204,7 @@ static int cs_etm__map_trace_ids_metadata(int num_cpu, u64 **metadata)
> /* unknown magic number */
> return -EINVAL;
> }
> - err = cs_etm__map_trace_id(trace_chan_id, metadata[i]);
> + err = cs_etm__map_trace_id_v0(etm, trace_chan_id, metadata[i]);
> if (err)
> return err;
> }
> @@ -3277,23 +3335,12 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> u64 *ptr = NULL;
> u64 **metadata = NULL;
>
> - /*
> - * Create an RB tree for traceID-metadata tuple. Since the conversion
> - * has to be made for each packet that gets decoded, optimizing access
> - * in anything other than a sequential array is worth doing.
> - */
> - traceid_list = intlist__new(NULL);
> - if (!traceid_list)
> - return -ENOMEM;
> -
> /* First the global part */
> ptr = (u64 *) auxtrace_info->priv;
> num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
> metadata = zalloc(sizeof(*metadata) * num_cpu);
> - if (!metadata) {
> - err = -ENOMEM;
> - goto err_free_traceid_list;
> - }
> + if (!metadata)
> + return -ENOMEM;
>
> /* Start parsing after the common part of the header */
> i = CS_HEADER_VERSION_MAX;
> @@ -3472,7 +3519,7 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
> /* otherwise, this is a file with metadata values only, map from metadata */
> else
> - err = cs_etm__map_trace_ids_metadata(num_cpu, metadata);
> + err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
>
> if (err)
> goto err_free_queues;
> @@ -3494,7 +3541,5 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> for (int j = 0; j < num_cpu; j++)
> zfree(&metadata[j]);
> zfree(&metadata);
> -err_free_traceid_list:
> - intlist__delete(traceid_list);
> return err;
> }
> diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
> index 4696267a32f0..f4f69f7cc0f3 100644
> --- a/tools/perf/util/cs-etm.h
> +++ b/tools/perf/util/cs-etm.h
> @@ -252,7 +252,7 @@ enum cs_etm_pid_fmt {
>
> #ifdef HAVE_CSTRACE_SUPPORT
> #include <opencsd/ocsd_if_types.h>
> -int cs_etm__get_cpu(u8 trace_chan_id, int *cpu);
> +int cs_etm__get_cpu(struct cs_etm_queue *etmq, u8 trace_chan_id, int *cpu);
> enum cs_etm_pid_fmt cs_etm__get_pid_fmt(struct cs_etm_queue *etmq);
> int cs_etm__etmq_set_tid_el(struct cs_etm_queue *etmq, pid_t tid,
> u8 trace_chan_id, ocsd_ex_level el);
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 04/17] perf: cs-etm: Create decoders based on the trace ID mappings
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (2 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 03/17] perf: cs-etm: Move traceid_list to each queue James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-18 13:24 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 05/17] perf: cs-etm: Only save valid trace IDs into files James Clark
` (12 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
Now that each queue has a unique set of trace ID mappings, use this
list to create the decoders. In unformatted mode just add a single
mapping so only one decoder is made.
Previously each queue would have a decoder created for each traced CPU
on the system but this won't work anymore because CPUs can have
overlapping trace IDs.
This also means that the CORESIGHT_TRACE_ID_UNUSED_FLAG isn't needed
any more. If mappings aren't added then decoders aren't created, rather
than needing a flag to suppress creation.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
tools/perf/arch/arm/util/cs-etm.c | 8 +-
.../perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 -
tools/perf/util/cs-etm.c | 155 ++++++------------
tools/perf/util/cs-etm.h | 10 --
4 files changed, 55 insertions(+), 122 deletions(-)
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index da6231367993..b0118546cd4d 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -654,8 +654,7 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr,
/* Get trace configuration register */
data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr);
/* traceID set to legacy version, in case new perf running on older system */
- data[CS_ETMV4_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) |
- CORESIGHT_TRACE_ID_UNUSED_FLAG;
+ data[CS_ETMV4_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
/* Get read-only information from sysFS */
cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0],
@@ -687,7 +686,7 @@ static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, st
/* Get trace configuration register */
data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr);
/* traceID set to legacy version, in case new perf running on older system */
- data[CS_ETE_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG;
+ data[CS_ETE_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
/* Get read-only information from sysFS */
cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCIDR0], &data[CS_ETE_TRCIDR0]);
@@ -743,8 +742,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset,
/* Get configuration register */
info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
/* traceID set to legacy value in case new perf running on old system */
- info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) |
- CORESIGHT_TRACE_ID_UNUSED_FLAG;
+ info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
/* Get read-only information from sysFS */
cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv3_ro[CS_ETM_ETMCCER],
&info->priv[*offset + CS_ETM_ETMCCER]);
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index 0c9c48cedbf1..d49c3e9c7c21 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -684,10 +684,6 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
return -1;
}
- /* if the CPU has no trace ID associated, no decoder needed */
- if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL)
- return 0;
-
if (d_params->operation == CS_ETM_OPERATION_DECODE) {
if (ocsd_dt_create_decoder(decoder->dcd_tree,
decoder->decoder_name,
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 0cd7d3843411..954a6f7bedf3 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -348,7 +348,6 @@ static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
/*
* update metadata trace ID from the value found in the AUX_HW_INFO packet.
- * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
*/
static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
{
@@ -700,80 +699,58 @@ static void cs_etm__packet_dump(const char *pkt_string)
}
static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx, u32 etmidr)
+ u64 *metadata, u32 etmidr)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
- t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
- t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
+ t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
+ t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
+ t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
}
static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx)
+ u64 *metadata)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
- t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
- t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
- t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
- t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
- t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
- t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
+ t_params->protocol = CS_ETM_PROTO_ETMV4i;
+ t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
+ t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
+ t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
+ t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
+ t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
+ t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
}
static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm, int t_idx,
- int m_idx)
+ u64 *metadata)
{
- u64 **metadata = etm->metadata;
-
- t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
- t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
- t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
- t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
- t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
- t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
- t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
- t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
+ t_params->protocol = CS_ETM_PROTO_ETE;
+ t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
+ t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
+ t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
+ t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
+ t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
+ t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
+ t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
}
static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
- struct cs_etm_auxtrace *etm,
- enum cs_etm_format format,
- int sample_cpu,
- int decoders)
-{
- int t_idx, m_idx;
- u32 etmidr;
- u64 architecture;
-
- for (t_idx = 0; t_idx < decoders; t_idx++) {
- if (format == FORMATTED)
- m_idx = t_idx;
- else {
- m_idx = get_cpu_data_idx(etm, sample_cpu);
- if (m_idx == -1) {
- pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
- m_idx = 0;
- }
- }
+ struct cs_etm_queue *etmq)
+{
+ struct int_node *inode;
- architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
+ intlist__for_each_entry(inode, etmq->traceid_list) {
+ u64 *metadata = inode->priv;
+ u64 architecture = metadata[CS_ETM_MAGIC];
+ u32 etmidr;
switch (architecture) {
case __perf_cs_etmv3_magic:
- etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
- cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
+ etmidr = metadata[CS_ETM_ETMIDR];
+ cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
break;
case __perf_cs_etmv4_magic:
- cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
+ cs_etm__set_trace_param_etmv4(t_params++, metadata);
break;
case __perf_cs_ete_magic:
- cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
+ cs_etm__set_trace_param_ete(t_params++, metadata);
break;
default:
return -EINVAL;
@@ -3211,35 +3188,6 @@ static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_c
return 0;
}
-/*
- * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
- * unused value to reduce the number of unneeded decoders created.
- */
-static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
-{
- u64 cs_etm_magic;
- int i;
-
- for (i = 0; i < num_cpu; i++) {
- cs_etm_magic = metadata[i][CS_ETM_MAGIC];
- switch (cs_etm_magic) {
- case __perf_cs_etmv3_magic:
- if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
- metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
- break;
- case __perf_cs_etmv4_magic:
- case __perf_cs_ete_magic:
- if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
- metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
- break;
- default:
- /* unknown magic number */
- return -EINVAL;
- }
- }
- return 0;
-}
-
/*
* Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
* (formatted or not) packets to create the decoders.
@@ -3247,21 +3195,26 @@ static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
{
struct cs_etm_decoder_params d_params;
+ struct cs_etm_trace_params *t_params;
+ int decoders = intlist__nr_entries(etmq->traceid_list);
+
+ if (decoders == 0)
+ return 0;
/*
* Each queue can only contain data from one CPU when unformatted, so only one decoder is
* needed.
*/
- int decoders = etmq->format == FORMATTED ? etmq->etm->num_cpu : 1;
+ if (etmq->format == UNFORMATTED)
+ assert(decoders == 1);
/* Use metadata to fill in trace parameters for trace decoder */
- struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders);
+ t_params = zalloc(sizeof(*t_params) * decoders);
if (!t_params)
goto out_free;
- if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->format,
- etmq->queue_nr, decoders))
+ if (cs_etm__init_trace_params(t_params, etmq))
goto out_free;
/* Set decoder parameters to decode trace packets */
@@ -3487,9 +3440,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
/*
* Map Trace ID values to CPU metadata.
*
- * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
- * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
- * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
+ * Trace metadata will always contain Trace ID values from the legacy algorithm
+ * in case it's read by a version of Perf that doesn't know about HW_ID packets
+ * or the kernel doesn't emit them.
*
* The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
* the same IDs as the old algorithm as far as is possible, unless there are clashes
@@ -3498,12 +3451,11 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
*
* For a perf able to interpret AUX_HW_ID packets we first check for the presence of
* those packets. If they are there then the values will be mapped and plugged into
- * the metadata. We then set any remaining metadata values with the used flag to a
- * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
+ * the metadata and decoders are only created for each mapping received.
*
* If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
- * then we map Trace ID values to CPU directly from the metadata - clearing any unused
- * flags if present.
+ * then we map Trace ID values to CPU directly from the metadata and create decoders
+ * for all mappings.
*/
/* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
@@ -3514,15 +3466,12 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
if (err)
goto err_free_queues;
- /* if HW ID found then clear any unused metadata ID values */
- if (aux_hw_id_found)
- err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
- /* otherwise, this is a file with metadata values only, map from metadata */
- else
+ /* if no HW ID found this is a file with metadata values only, map from metadata */
+ if (!aux_hw_id_found) {
err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
-
- if (err)
- goto err_free_queues;
+ if (err)
+ goto err_free_queues;
+ }
err = cs_etm__create_decoders(etm);
if (err)
diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
index f4f69f7cc0f3..a8caeea720aa 100644
--- a/tools/perf/util/cs-etm.h
+++ b/tools/perf/util/cs-etm.h
@@ -230,16 +230,6 @@ struct cs_etm_packet_queue {
/* CoreSight trace ID is currently the bottom 7 bits of the value */
#define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0)
-/*
- * perf record will set the legacy meta data values as unused initially.
- * This allows perf report to manage the decoders created when dynamic
- * allocation in operation.
- */
-#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
-
-/* Value to set for unused trace ID values */
-#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
-
int cs_etm__process_auxtrace_info(union perf_event *event,
struct perf_session *session);
void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 04/17] perf: cs-etm: Create decoders based on the trace ID mappings
2024-07-12 10:20 ` [PATCH v5 04/17] perf: cs-etm: Create decoders based on the trace ID mappings James Clark
@ 2024-07-18 13:24 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-18 13:24 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> Now that each queue has a unique set of trace ID mappings, use this
> list to create the decoders. In unformatted mode just add a single
> mapping so only one decoder is made.
>
> Previously each queue would have a decoder created for each traced CPU
> on the system but this won't work anymore because CPUs can have
> overlapping trace IDs.
>
> This also means that the CORESIGHT_TRACE_ID_UNUSED_FLAG isn't needed
> any more. If mappings aren't added then decoders aren't created, rather
> than needing a flag to suppress creation.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> tools/perf/arch/arm/util/cs-etm.c | 8 +-
> .../perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 -
> tools/perf/util/cs-etm.c | 155 ++++++------------
> tools/perf/util/cs-etm.h | 10 --
> 4 files changed, 55 insertions(+), 122 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> index da6231367993..b0118546cd4d 100644
> --- a/tools/perf/arch/arm/util/cs-etm.c
> +++ b/tools/perf/arch/arm/util/cs-etm.c
> @@ -654,8 +654,7 @@ static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr,
> /* Get trace configuration register */
> data[CS_ETMV4_TRCCONFIGR] = cs_etmv4_get_config(itr);
> /* traceID set to legacy version, in case new perf running on older system */
> - data[CS_ETMV4_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) |
> - CORESIGHT_TRACE_ID_UNUSED_FLAG;
> + data[CS_ETMV4_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
>
> /* Get read-only information from sysFS */
> cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv4_ro[CS_ETMV4_TRCIDR0],
> @@ -687,7 +686,7 @@ static void cs_etm_save_ete_header(__u64 data[], struct auxtrace_record *itr, st
> /* Get trace configuration register */
> data[CS_ETE_TRCCONFIGR] = cs_etmv4_get_config(itr);
> /* traceID set to legacy version, in case new perf running on older system */
> - data[CS_ETE_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) | CORESIGHT_TRACE_ID_UNUSED_FLAG;
> + data[CS_ETE_TRCTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
>
> /* Get read-only information from sysFS */
> cs_etm_get_ro(cs_etm_pmu, cpu, metadata_ete_ro[CS_ETE_TRCIDR0], &data[CS_ETE_TRCIDR0]);
> @@ -743,8 +742,7 @@ static void cs_etm_get_metadata(struct perf_cpu cpu, u32 *offset,
> /* Get configuration register */
> info->priv[*offset + CS_ETM_ETMCR] = cs_etm_get_config(itr);
> /* traceID set to legacy value in case new perf running on old system */
> - info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu) |
> - CORESIGHT_TRACE_ID_UNUSED_FLAG;
> + info->priv[*offset + CS_ETM_ETMTRACEIDR] = cs_etm_get_legacy_trace_id(cpu);
> /* Get read-only information from sysFS */
> cs_etm_get_ro(cs_etm_pmu, cpu, metadata_etmv3_ro[CS_ETM_ETMCCER],
> &info->priv[*offset + CS_ETM_ETMCCER]);
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index 0c9c48cedbf1..d49c3e9c7c21 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -684,10 +684,6 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params,
> return -1;
> }
>
> - /* if the CPU has no trace ID associated, no decoder needed */
> - if (csid == CORESIGHT_TRACE_ID_UNUSED_VAL)
> - return 0;
> -
> if (d_params->operation == CS_ETM_OPERATION_DECODE) {
> if (ocsd_dt_create_decoder(decoder->dcd_tree,
> decoder->decoder_name,
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 0cd7d3843411..954a6f7bedf3 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -348,7 +348,6 @@ static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
>
> /*
> * update metadata trace ID from the value found in the AUX_HW_INFO packet.
> - * This will also clear the CORESIGHT_TRACE_ID_UNUSED_FLAG flag if present.
> */
> static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata)
> {
> @@ -700,80 +699,58 @@ static void cs_etm__packet_dump(const char *pkt_string)
> }
>
> static void cs_etm__set_trace_param_etmv3(struct cs_etm_trace_params *t_params,
> - struct cs_etm_auxtrace *etm, int t_idx,
> - int m_idx, u32 etmidr)
> + u64 *metadata, u32 etmidr)
> {
> - u64 **metadata = etm->metadata;
> -
> - t_params[t_idx].protocol = cs_etm__get_v7_protocol_version(etmidr);
> - t_params[t_idx].etmv3.reg_ctrl = metadata[m_idx][CS_ETM_ETMCR];
> - t_params[t_idx].etmv3.reg_trc_id = metadata[m_idx][CS_ETM_ETMTRACEIDR];
> + t_params->protocol = cs_etm__get_v7_protocol_version(etmidr);
> + t_params->etmv3.reg_ctrl = metadata[CS_ETM_ETMCR];
> + t_params->etmv3.reg_trc_id = metadata[CS_ETM_ETMTRACEIDR];
> }
>
> static void cs_etm__set_trace_param_etmv4(struct cs_etm_trace_params *t_params,
> - struct cs_etm_auxtrace *etm, int t_idx,
> - int m_idx)
> + u64 *metadata)
> {
> - u64 **metadata = etm->metadata;
> -
> - t_params[t_idx].protocol = CS_ETM_PROTO_ETMV4i;
> - t_params[t_idx].etmv4.reg_idr0 = metadata[m_idx][CS_ETMV4_TRCIDR0];
> - t_params[t_idx].etmv4.reg_idr1 = metadata[m_idx][CS_ETMV4_TRCIDR1];
> - t_params[t_idx].etmv4.reg_idr2 = metadata[m_idx][CS_ETMV4_TRCIDR2];
> - t_params[t_idx].etmv4.reg_idr8 = metadata[m_idx][CS_ETMV4_TRCIDR8];
> - t_params[t_idx].etmv4.reg_configr = metadata[m_idx][CS_ETMV4_TRCCONFIGR];
> - t_params[t_idx].etmv4.reg_traceidr = metadata[m_idx][CS_ETMV4_TRCTRACEIDR];
> + t_params->protocol = CS_ETM_PROTO_ETMV4i;
> + t_params->etmv4.reg_idr0 = metadata[CS_ETMV4_TRCIDR0];
> + t_params->etmv4.reg_idr1 = metadata[CS_ETMV4_TRCIDR1];
> + t_params->etmv4.reg_idr2 = metadata[CS_ETMV4_TRCIDR2];
> + t_params->etmv4.reg_idr8 = metadata[CS_ETMV4_TRCIDR8];
> + t_params->etmv4.reg_configr = metadata[CS_ETMV4_TRCCONFIGR];
> + t_params->etmv4.reg_traceidr = metadata[CS_ETMV4_TRCTRACEIDR];
> }
>
> static void cs_etm__set_trace_param_ete(struct cs_etm_trace_params *t_params,
> - struct cs_etm_auxtrace *etm, int t_idx,
> - int m_idx)
> + u64 *metadata)
> {
> - u64 **metadata = etm->metadata;
> -
> - t_params[t_idx].protocol = CS_ETM_PROTO_ETE;
> - t_params[t_idx].ete.reg_idr0 = metadata[m_idx][CS_ETE_TRCIDR0];
> - t_params[t_idx].ete.reg_idr1 = metadata[m_idx][CS_ETE_TRCIDR1];
> - t_params[t_idx].ete.reg_idr2 = metadata[m_idx][CS_ETE_TRCIDR2];
> - t_params[t_idx].ete.reg_idr8 = metadata[m_idx][CS_ETE_TRCIDR8];
> - t_params[t_idx].ete.reg_configr = metadata[m_idx][CS_ETE_TRCCONFIGR];
> - t_params[t_idx].ete.reg_traceidr = metadata[m_idx][CS_ETE_TRCTRACEIDR];
> - t_params[t_idx].ete.reg_devarch = metadata[m_idx][CS_ETE_TRCDEVARCH];
> + t_params->protocol = CS_ETM_PROTO_ETE;
> + t_params->ete.reg_idr0 = metadata[CS_ETE_TRCIDR0];
> + t_params->ete.reg_idr1 = metadata[CS_ETE_TRCIDR1];
> + t_params->ete.reg_idr2 = metadata[CS_ETE_TRCIDR2];
> + t_params->ete.reg_idr8 = metadata[CS_ETE_TRCIDR8];
> + t_params->ete.reg_configr = metadata[CS_ETE_TRCCONFIGR];
> + t_params->ete.reg_traceidr = metadata[CS_ETE_TRCTRACEIDR];
> + t_params->ete.reg_devarch = metadata[CS_ETE_TRCDEVARCH];
> }
>
> static int cs_etm__init_trace_params(struct cs_etm_trace_params *t_params,
> - struct cs_etm_auxtrace *etm,
> - enum cs_etm_format format,
> - int sample_cpu,
> - int decoders)
> -{
> - int t_idx, m_idx;
> - u32 etmidr;
> - u64 architecture;
> -
> - for (t_idx = 0; t_idx < decoders; t_idx++) {
> - if (format == FORMATTED)
> - m_idx = t_idx;
> - else {
> - m_idx = get_cpu_data_idx(etm, sample_cpu);
> - if (m_idx == -1) {
> - pr_warning("CS_ETM: unknown CPU, falling back to first metadata\n");
> - m_idx = 0;
> - }
> - }
> + struct cs_etm_queue *etmq)
> +{
> + struct int_node *inode;
>
> - architecture = etm->metadata[m_idx][CS_ETM_MAGIC];
> + intlist__for_each_entry(inode, etmq->traceid_list) {
> + u64 *metadata = inode->priv;
> + u64 architecture = metadata[CS_ETM_MAGIC];
> + u32 etmidr;
>
> switch (architecture) {
> case __perf_cs_etmv3_magic:
> - etmidr = etm->metadata[m_idx][CS_ETM_ETMIDR];
> - cs_etm__set_trace_param_etmv3(t_params, etm, t_idx, m_idx, etmidr);
> + etmidr = metadata[CS_ETM_ETMIDR];
> + cs_etm__set_trace_param_etmv3(t_params++, metadata, etmidr);
> break;
> case __perf_cs_etmv4_magic:
> - cs_etm__set_trace_param_etmv4(t_params, etm, t_idx, m_idx);
> + cs_etm__set_trace_param_etmv4(t_params++, metadata);
> break;
> case __perf_cs_ete_magic:
> - cs_etm__set_trace_param_ete(t_params, etm, t_idx, m_idx);
> + cs_etm__set_trace_param_ete(t_params++, metadata);
> break;
> default:
> return -EINVAL;
> @@ -3211,35 +3188,6 @@ static int cs_etm__map_trace_ids_metadata(struct cs_etm_auxtrace *etm, int num_c
> return 0;
> }
>
> -/*
> - * If we found AUX_HW_ID packets, then set any metadata marked as unused to the
> - * unused value to reduce the number of unneeded decoders created.
> - */
> -static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
> -{
> - u64 cs_etm_magic;
> - int i;
> -
> - for (i = 0; i < num_cpu; i++) {
> - cs_etm_magic = metadata[i][CS_ETM_MAGIC];
> - switch (cs_etm_magic) {
> - case __perf_cs_etmv3_magic:
> - if (metadata[i][CS_ETM_ETMTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
> - metadata[i][CS_ETM_ETMTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
> - break;
> - case __perf_cs_etmv4_magic:
> - case __perf_cs_ete_magic:
> - if (metadata[i][CS_ETMV4_TRCTRACEIDR] & CORESIGHT_TRACE_ID_UNUSED_FLAG)
> - metadata[i][CS_ETMV4_TRCTRACEIDR] = CORESIGHT_TRACE_ID_UNUSED_VAL;
> - break;
> - default:
> - /* unknown magic number */
> - return -EINVAL;
> - }
> - }
> - return 0;
> -}
> -
> /*
> * Use the data gathered by the peeks for HW_ID (trace ID mappings) and AUX
> * (formatted or not) packets to create the decoders.
> @@ -3247,21 +3195,26 @@ static int cs_etm__clear_unused_trace_ids_metadata(int num_cpu, u64 **metadata)
> static int cs_etm__create_queue_decoders(struct cs_etm_queue *etmq)
> {
> struct cs_etm_decoder_params d_params;
> + struct cs_etm_trace_params *t_params;
> + int decoders = intlist__nr_entries(etmq->traceid_list);
> +
> + if (decoders == 0)
> + return 0;
>
> /*
> * Each queue can only contain data from one CPU when unformatted, so only one decoder is
> * needed.
> */
> - int decoders = etmq->format == FORMATTED ? etmq->etm->num_cpu : 1;
> + if (etmq->format == UNFORMATTED)
> + assert(decoders == 1);
>
> /* Use metadata to fill in trace parameters for trace decoder */
> - struct cs_etm_trace_params *t_params = zalloc(sizeof(*t_params) * decoders);
> + t_params = zalloc(sizeof(*t_params) * decoders);
>
> if (!t_params)
> goto out_free;
>
> - if (cs_etm__init_trace_params(t_params, etmq->etm, etmq->format,
> - etmq->queue_nr, decoders))
> + if (cs_etm__init_trace_params(t_params, etmq))
> goto out_free;
>
> /* Set decoder parameters to decode trace packets */
> @@ -3487,9 +3440,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> /*
> * Map Trace ID values to CPU metadata.
> *
> - * Trace metadata will always contain Trace ID values from the legacy algorithm. If the
> - * files has been recorded by a "new" perf updated to handle AUX_HW_ID then the metadata
> - * ID value will also have the CORESIGHT_TRACE_ID_UNUSED_FLAG set.
> + * Trace metadata will always contain Trace ID values from the legacy algorithm
> + * in case it's read by a version of Perf that doesn't know about HW_ID packets
> + * or the kernel doesn't emit them.
> *
> * The updated kernel drivers that use AUX_HW_ID to sent Trace IDs will attempt to use
> * the same IDs as the old algorithm as far as is possible, unless there are clashes
> @@ -3498,12 +3451,11 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> *
> * For a perf able to interpret AUX_HW_ID packets we first check for the presence of
> * those packets. If they are there then the values will be mapped and plugged into
> - * the metadata. We then set any remaining metadata values with the used flag to a
> - * value CORESIGHT_TRACE_ID_UNUSED_VAL - which indicates no decoder is required.
> + * the metadata and decoders are only created for each mapping received.
> *
> * If no AUX_HW_ID packets are present - which means a file recorded on an old kernel
> - * then we map Trace ID values to CPU directly from the metadata - clearing any unused
> - * flags if present.
> + * then we map Trace ID values to CPU directly from the metadata and create decoders
> + * for all mappings.
> */
>
> /* Scan for AUX_OUTPUT_HW_ID records to map trace ID values to CPU metadata */
> @@ -3514,15 +3466,12 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> if (err)
> goto err_free_queues;
>
> - /* if HW ID found then clear any unused metadata ID values */
> - if (aux_hw_id_found)
> - err = cs_etm__clear_unused_trace_ids_metadata(num_cpu, metadata);
> - /* otherwise, this is a file with metadata values only, map from metadata */
> - else
> + /* if no HW ID found this is a file with metadata values only, map from metadata */
> + if (!aux_hw_id_found) {
> err = cs_etm__map_trace_ids_metadata(etm, num_cpu, metadata);
> -
> - if (err)
> - goto err_free_queues;
> + if (err)
> + goto err_free_queues;
> + }
>
> err = cs_etm__create_decoders(etm);
> if (err)
> diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h
> index f4f69f7cc0f3..a8caeea720aa 100644
> --- a/tools/perf/util/cs-etm.h
> +++ b/tools/perf/util/cs-etm.h
> @@ -230,16 +230,6 @@ struct cs_etm_packet_queue {
> /* CoreSight trace ID is currently the bottom 7 bits of the value */
> #define CORESIGHT_TRACE_ID_VAL_MASK GENMASK(6, 0)
>
> -/*
> - * perf record will set the legacy meta data values as unused initially.
> - * This allows perf report to manage the decoders created when dynamic
> - * allocation in operation.
> - */
> -#define CORESIGHT_TRACE_ID_UNUSED_FLAG BIT(31)
> -
> -/* Value to set for unused trace ID values */
> -#define CORESIGHT_TRACE_ID_UNUSED_VAL 0x7F
> -
> int cs_etm__process_auxtrace_info(union perf_event *event,
> struct perf_session *session);
> void cs_etm_get_default_config(const struct perf_pmu *pmu, struct perf_event_attr *attr);
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 05/17] perf: cs-etm: Only save valid trace IDs into files
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (3 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 04/17] perf: cs-etm: Create decoders based on the trace ID mappings James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-18 13:24 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets James Clark
` (11 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
This isn't a bug because Perf always masks with
CORESIGHT_TRACE_ID_VAL_MASK before using these values, but to avoid it
looking like it could be, make an effort to not save bad values.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
tools/perf/arch/arm/util/cs-etm.c | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
index b0118546cd4d..14b8afabce3a 100644
--- a/tools/perf/arch/arm/util/cs-etm.c
+++ b/tools/perf/arch/arm/util/cs-etm.c
@@ -643,7 +643,8 @@ static bool cs_etm_is_ete(struct perf_pmu *cs_etm_pmu, struct perf_cpu cpu)
static __u64 cs_etm_get_legacy_trace_id(struct perf_cpu cpu)
{
- return CORESIGHT_LEGACY_CPU_TRACE_ID(cpu.cpu);
+ /* Wrap at 48 so that invalid trace IDs aren't saved into files. */
+ return CORESIGHT_LEGACY_CPU_TRACE_ID(cpu.cpu % 48);
}
static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, struct perf_cpu cpu)
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 05/17] perf: cs-etm: Only save valid trace IDs into files
2024-07-12 10:20 ` [PATCH v5 05/17] perf: cs-etm: Only save valid trace IDs into files James Clark
@ 2024-07-18 13:24 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-18 13:24 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> This isn't a bug because Perf always masks with
> CORESIGHT_TRACE_ID_VAL_MASK before using these values, but to avoid it
> looking like it could be, make an effort to not save bad values.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> tools/perf/arch/arm/util/cs-etm.c | 3 ++-
> 1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> index b0118546cd4d..14b8afabce3a 100644
> --- a/tools/perf/arch/arm/util/cs-etm.c
> +++ b/tools/perf/arch/arm/util/cs-etm.c
> @@ -643,7 +643,8 @@ static bool cs_etm_is_ete(struct perf_pmu *cs_etm_pmu, struct perf_cpu cpu)
>
> static __u64 cs_etm_get_legacy_trace_id(struct perf_cpu cpu)
> {
> - return CORESIGHT_LEGACY_CPU_TRACE_ID(cpu.cpu);
> + /* Wrap at 48 so that invalid trace IDs aren't saved into files. */
> + return CORESIGHT_LEGACY_CPU_TRACE_ID(cpu.cpu % 48);
> }
>
> static void cs_etm_save_etmv4_header(__u64 data[], struct auxtrace_record *itr, struct perf_cpu cpu)
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (4 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 05/17] perf: cs-etm: Only save valid trace IDs into files James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-18 13:24 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 07/17] perf: cs-etm: Print queue number in raw trace dump James Clark
` (10 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
v0.1 HW_ID packets have a new field that describes which sink each CPU
writes to. Use the sink ID to link trace ID maps to each other so that
mappings are shared wherever the sink is shared.
Also update the error message to show that overlapping IDs aren't an
error in per-thread mode, just not supported. In the future we can
use the CPU ID from the AUX records, or watch for changing sink IDs on
HW_ID packets to use the correct decoders.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
tools/include/linux/coresight-pmu.h | 17 +++--
tools/perf/util/cs-etm.c | 100 +++++++++++++++++++++++++---
2 files changed, 103 insertions(+), 14 deletions(-)
diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
index 51ac441a37c3..89b0ac0014b0 100644
--- a/tools/include/linux/coresight-pmu.h
+++ b/tools/include/linux/coresight-pmu.h
@@ -49,12 +49,21 @@
* Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
* Used to associate a CPU with the CoreSight Trace ID.
* [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
- * [59:08] - Unused (SBZ)
- * [63:60] - Version
+ * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
+ * Added in minor version 1.
+ * [55:40] - Unused (SBZ)
+ * [59:56] - Minor Version - previously existing fields are compatible with
+ * all minor versions.
+ * [63:60] - Major Version - previously existing fields mean different things
+ * in new major versions.
*/
#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
-#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
+#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
-#define CS_AUX_HW_ID_CURR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
+#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
+
+#define CS_AUX_HW_ID_MAJOR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION 1
#endif
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 954a6f7bedf3..87e983da19be 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -118,6 +118,12 @@ struct cs_etm_queue {
struct cs_etm_traceid_queue **traceid_queues;
/* Conversion between traceID and metadata pointers */
struct intlist *traceid_list;
+ /*
+ * Same as traceid_list, but traceid_list may be a reference to another
+ * queue's which has a matching sink ID.
+ */
+ struct intlist *own_traceid_list;
+ u32 sink_id;
};
static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
@@ -142,6 +148,7 @@ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
(queue_nr << 16 | trace_chan_id)
#define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
#define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
+#define SINK_UNSET ((u32) -1)
static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
{
@@ -241,7 +248,16 @@ static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
int err;
if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
- pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
+ /*
+ * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
+ * are expected (but not supported) in per-thread mode,
+ * rather than signifying an error.
+ */
+ if (etmq->etm->per_thread_decoding)
+ pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
+ else
+ pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
+
return -EINVAL;
}
@@ -326,6 +342,64 @@ static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
}
+static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
+ u64 hw_id)
+{
+ struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
+ int ret;
+ u64 *cpu_data;
+ u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
+ u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
+
+ /*
+ * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
+ * let it pass for now until an actual overlapping trace ID is hit. In
+ * most cases IDs won't overlap even if the sink changes.
+ */
+ if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
+ etmq->sink_id != sink_id) {
+ pr_err("CS_ETM: mismatch between sink IDs\n");
+ return -EINVAL;
+ }
+
+ etmq->sink_id = sink_id;
+
+ /* Find which other queues use this sink and link their ID maps */
+ for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
+ struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
+
+ /* Different sinks, skip */
+ if (other_etmq->sink_id != etmq->sink_id)
+ continue;
+
+ /* Already linked, skip */
+ if (other_etmq->traceid_list == etmq->traceid_list)
+ continue;
+
+ /* At the point of first linking, this one should be empty */
+ if (!intlist__empty(etmq->traceid_list)) {
+ pr_err("CS_ETM: Can't link populated trace ID lists\n");
+ return -EINVAL;
+ }
+
+ etmq->own_traceid_list = NULL;
+ intlist__delete(etmq->traceid_list);
+ etmq->traceid_list = other_etmq->traceid_list;
+ break;
+ }
+
+ cpu_data = get_cpu_data(etm, cpu);
+ ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
+ if (ret)
+ return ret;
+
+ ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
{
u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
@@ -414,10 +488,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
/* extract and parse the HW ID */
hw_id = event->aux_output_hw_id.hw_id;
- version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
+ version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
/* check that we can handle this version */
- if (version > CS_AUX_HW_ID_CURR_VERSION) {
+ if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
version);
return -EINVAL;
@@ -442,7 +516,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
return -EINVAL;
}
- return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
+ if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
+ return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
+ else
+ return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
}
void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
@@ -882,12 +959,14 @@ static void cs_etm__free_queue(void *priv)
cs_etm_decoder__free(etmq->decoder);
cs_etm__free_traceid_queues(etmq);
- /* First remove all traceID/metadata nodes for the RB tree */
- intlist__for_each_entry_safe(inode, tmp, etmq->traceid_list)
- intlist__remove(etmq->traceid_list, inode);
+ if (etmq->own_traceid_list) {
+ /* First remove all traceID/metadata nodes for the RB tree */
+ intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
+ intlist__remove(etmq->own_traceid_list, inode);
- /* Then the RB tree itself */
- intlist__delete(etmq->traceid_list);
+ /* Then the RB tree itself */
+ intlist__delete(etmq->own_traceid_list);
+ }
free(etmq);
}
@@ -1081,7 +1160,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void)
* has to be made for each packet that gets decoded, optimizing access
* in anything other than a sequential array is worth doing.
*/
- etmq->traceid_list = intlist__new(NULL);
+ etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
if (!etmq->traceid_list)
goto out_free;
@@ -1113,6 +1192,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
etmq->queue_nr = queue_nr;
queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
etmq->offset = 0;
+ etmq->sink_id = SINK_UNSET;
return 0;
}
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets
2024-07-12 10:20 ` [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets James Clark
@ 2024-07-18 13:24 ` Mike Leach
2024-07-19 10:48 ` James Clark
0 siblings, 1 reply; 40+ messages in thread
From: Mike Leach @ 2024-07-18 13:24 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> v0.1 HW_ID packets have a new field that describes which sink each CPU
> writes to. Use the sink ID to link trace ID maps to each other so that
> mappings are shared wherever the sink is shared.
>
> Also update the error message to show that overlapping IDs aren't an
> error in per-thread mode, just not supported. In the future we can
> use the CPU ID from the AUX records, or watch for changing sink IDs on
> HW_ID packets to use the correct decoders.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> tools/include/linux/coresight-pmu.h | 17 +++--
> tools/perf/util/cs-etm.c | 100 +++++++++++++++++++++++++---
> 2 files changed, 103 insertions(+), 14 deletions(-)
>
> diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
> index 51ac441a37c3..89b0ac0014b0 100644
> --- a/tools/include/linux/coresight-pmu.h
> +++ b/tools/include/linux/coresight-pmu.h
> @@ -49,12 +49,21 @@
> * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
> * Used to associate a CPU with the CoreSight Trace ID.
> * [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
> - * [59:08] - Unused (SBZ)
> - * [63:60] - Version
> + * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
> + * Added in minor version 1.
> + * [55:40] - Unused (SBZ)
> + * [59:56] - Minor Version - previously existing fields are compatible with
> + * all minor versions.
> + * [63:60] - Major Version - previously existing fields mean different things
> + * in new major versions.
> */
> #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
> -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
> +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
>
> -#define CS_AUX_HW_ID_CURR_VERSION 0
> +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
> +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
> +
> +#define CS_AUX_HW_ID_MAJOR_VERSION 0
> +#define CS_AUX_HW_ID_MINOR_VERSION 1
>
> #endif
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 954a6f7bedf3..87e983da19be 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -118,6 +118,12 @@ struct cs_etm_queue {
> struct cs_etm_traceid_queue **traceid_queues;
> /* Conversion between traceID and metadata pointers */
> struct intlist *traceid_list;
> + /*
> + * Same as traceid_list, but traceid_list may be a reference to another
> + * queue's which has a matching sink ID.
> + */
> + struct intlist *own_traceid_list;
> + u32 sink_id;
> };
>
> static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
> @@ -142,6 +148,7 @@ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
> (queue_nr << 16 | trace_chan_id)
> #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
> #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
> +#define SINK_UNSET ((u32) -1)
>
> static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
> {
> @@ -241,7 +248,16 @@ static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
> int err;
>
> if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
> - pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
> + /*
> + * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
> + * are expected (but not supported) in per-thread mode,
> + * rather than signifying an error.
> + */
> + if (etmq->etm->per_thread_decoding)
> + pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
> + else
> + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
> +
> return -EINVAL;
> }
>
> @@ -326,6 +342,64 @@ static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
> return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
> }
>
> +static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
> + u64 hw_id)
> +{
> + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
> + int ret;
> + u64 *cpu_data;
> + u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
> + u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
> +
> + /*
> + * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
> + * let it pass for now until an actual overlapping trace ID is hit. In
> + * most cases IDs won't overlap even if the sink changes.
> + */
> + if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
> + etmq->sink_id != sink_id) {
> + pr_err("CS_ETM: mismatch between sink IDs\n");
> + return -EINVAL;
> + }
> +
> + etmq->sink_id = sink_id;
> +
> + /* Find which other queues use this sink and link their ID maps */
> + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
> + struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
> +
> + /* Different sinks, skip */
> + if (other_etmq->sink_id != etmq->sink_id)
> + continue;
> +
> + /* Already linked, skip */
> + if (other_etmq->traceid_list == etmq->traceid_list)
> + continue;
> +
> + /* At the point of first linking, this one should be empty */
> + if (!intlist__empty(etmq->traceid_list)) {
> + pr_err("CS_ETM: Can't link populated trace ID lists\n");
> + return -EINVAL;
> + }
> +
> + etmq->own_traceid_list = NULL;
> + intlist__delete(etmq->traceid_list);
> + etmq->traceid_list = other_etmq->traceid_list;
> + break;
> + }
> +
> + cpu_data = get_cpu_data(etm, cpu);
> + ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
> + if (ret)
> + return ret;
> +
> + ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
> + if (ret)
> + return ret;
> +
> + return 0;
> +}
> +
> static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
> {
> u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
> @@ -414,10 +488,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
>
> /* extract and parse the HW ID */
> hw_id = event->aux_output_hw_id.hw_id;
> - version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
> + version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
>
> /* check that we can handle this version */
> - if (version > CS_AUX_HW_ID_CURR_VERSION) {
> + if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
> pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
> version);
> return -EINVAL;
> @@ -442,7 +516,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
> return -EINVAL;
> }
>
> - return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
Perhaps leave this as the final statement of the function
> + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
this could be moved before and be
if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
> + return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
> + else
> + return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
> }
>
> void cs_etm__etmq_set_traceid_queue_timestamp(struct cs_etm_queue *etmq,
> @@ -882,12 +959,14 @@ static void cs_etm__free_queue(void *priv)
> cs_etm_decoder__free(etmq->decoder);
> cs_etm__free_traceid_queues(etmq);
>
> - /* First remove all traceID/metadata nodes for the RB tree */
> - intlist__for_each_entry_safe(inode, tmp, etmq->traceid_list)
> - intlist__remove(etmq->traceid_list, inode);
> + if (etmq->own_traceid_list) {
> + /* First remove all traceID/metadata nodes for the RB tree */
> + intlist__for_each_entry_safe(inode, tmp, etmq->own_traceid_list)
> + intlist__remove(etmq->own_traceid_list, inode);
>
> - /* Then the RB tree itself */
> - intlist__delete(etmq->traceid_list);
> + /* Then the RB tree itself */
> + intlist__delete(etmq->own_traceid_list);
> + }
>
> free(etmq);
> }
> @@ -1081,7 +1160,7 @@ static struct cs_etm_queue *cs_etm__alloc_queue(void)
> * has to be made for each packet that gets decoded, optimizing access
> * in anything other than a sequential array is worth doing.
> */
> - etmq->traceid_list = intlist__new(NULL);
> + etmq->traceid_list = etmq->own_traceid_list = intlist__new(NULL);
> if (!etmq->traceid_list)
> goto out_free;
>
> @@ -1113,6 +1192,7 @@ static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm,
> etmq->queue_nr = queue_nr;
> queue->cpu = queue_nr; /* Placeholder, may be reset to -1 in per-thread mode */
> etmq->offset = 0;
> + etmq->sink_id = SINK_UNSET;
>
> return 0;
> }
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets
2024-07-18 13:24 ` Mike Leach
@ 2024-07-19 10:48 ` James Clark
2024-07-19 10:49 ` James Clark
0 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-19 10:48 UTC (permalink / raw)
To: Mike Leach
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On 18/07/2024 2:24 pm, Mike Leach wrote:
> On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>>
>> From: James Clark <james.clark@arm.com>
>>
>> v0.1 HW_ID packets have a new field that describes which sink each CPU
>> writes to. Use the sink ID to link trace ID maps to each other so that
>> mappings are shared wherever the sink is shared.
>>
>> Also update the error message to show that overlapping IDs aren't an
>> error in per-thread mode, just not supported. In the future we can
>> use the CPU ID from the AUX records, or watch for changing sink IDs on
>> HW_ID packets to use the correct decoders.
>>
>> Signed-off-by: James Clark <james.clark@arm.com>
>> Signed-off-by: James Clark <james.clark@linaro.org>
>> ---
>> tools/include/linux/coresight-pmu.h | 17 +++--
>> tools/perf/util/cs-etm.c | 100 +++++++++++++++++++++++++---
>> 2 files changed, 103 insertions(+), 14 deletions(-)
>>
>> diff --git a/tools/include/linux/coresight-pmu.h b/tools/include/linux/coresight-pmu.h
>> index 51ac441a37c3..89b0ac0014b0 100644
>> --- a/tools/include/linux/coresight-pmu.h
>> +++ b/tools/include/linux/coresight-pmu.h
>> @@ -49,12 +49,21 @@
>> * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
>> * Used to associate a CPU with the CoreSight Trace ID.
>> * [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
>> - * [59:08] - Unused (SBZ)
>> - * [63:60] - Version
>> + * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
>> + * Added in minor version 1.
>> + * [55:40] - Unused (SBZ)
>> + * [59:56] - Minor Version - previously existing fields are compatible with
>> + * all minor versions.
>> + * [63:60] - Major Version - previously existing fields mean different things
>> + * in new major versions.
>> */
>> #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
>> -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
>> +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
>>
>> -#define CS_AUX_HW_ID_CURR_VERSION 0
>> +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
>> +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
>> +
>> +#define CS_AUX_HW_ID_MAJOR_VERSION 0
>> +#define CS_AUX_HW_ID_MINOR_VERSION 1
>>
>> #endif
>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>> index 954a6f7bedf3..87e983da19be 100644
>> --- a/tools/perf/util/cs-etm.c
>> +++ b/tools/perf/util/cs-etm.c
>> @@ -118,6 +118,12 @@ struct cs_etm_queue {
>> struct cs_etm_traceid_queue **traceid_queues;
>> /* Conversion between traceID and metadata pointers */
>> struct intlist *traceid_list;
>> + /*
>> + * Same as traceid_list, but traceid_list may be a reference to another
>> + * queue's which has a matching sink ID.
>> + */
>> + struct intlist *own_traceid_list;
>> + u32 sink_id;
>> };
>>
>> static int cs_etm__process_timestamped_queues(struct cs_etm_auxtrace *etm);
>> @@ -142,6 +148,7 @@ static int cs_etm__metadata_set_trace_id(u8 trace_chan_id, u64 *cpu_metadata);
>> (queue_nr << 16 | trace_chan_id)
>> #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
>> #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
>> +#define SINK_UNSET ((u32) -1)
>>
>> static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
>> {
>> @@ -241,7 +248,16 @@ static int cs_etm__insert_trace_id_node(struct cs_etm_queue *etmq,
>> int err;
>>
>> if (curr_cpu_data[CS_ETM_CPU] != cpu_metadata[CS_ETM_CPU]) {
>> - pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
>> + /*
>> + * With > CORESIGHT_TRACE_IDS_MAX ETMs, overlapping IDs
>> + * are expected (but not supported) in per-thread mode,
>> + * rather than signifying an error.
>> + */
>> + if (etmq->etm->per_thread_decoding)
>> + pr_err("CS_ETM: overlapping Trace IDs aren't currently supported in per-thread mode\n");
>> + else
>> + pr_err("CS_ETM: map mismatch between HW_ID packet CPU and Trace ID\n");
>> +
>> return -EINVAL;
>> }
>>
>> @@ -326,6 +342,64 @@ static int cs_etm__process_trace_id_v0(struct cs_etm_auxtrace *etm, int cpu,
>> return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
>> }
>>
>> +static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace *etm, int cpu,
>> + u64 hw_id)
>> +{
>> + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
>> + int ret;
>> + u64 *cpu_data;
>> + u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
>> + u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
>> +
>> + /*
>> + * Check sink id hasn't changed in per-cpu mode. In per-thread mode,
>> + * let it pass for now until an actual overlapping trace ID is hit. In
>> + * most cases IDs won't overlap even if the sink changes.
>> + */
>> + if (!etmq->etm->per_thread_decoding && etmq->sink_id != SINK_UNSET &&
>> + etmq->sink_id != sink_id) {
>> + pr_err("CS_ETM: mismatch between sink IDs\n");
>> + return -EINVAL;
>> + }
>> +
>> + etmq->sink_id = sink_id;
>> +
>> + /* Find which other queues use this sink and link their ID maps */
>> + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
>> + struct cs_etm_queue *other_etmq = etm->queues.queue_array[i].priv;
>> +
>> + /* Different sinks, skip */
>> + if (other_etmq->sink_id != etmq->sink_id)
>> + continue;
>> +
>> + /* Already linked, skip */
>> + if (other_etmq->traceid_list == etmq->traceid_list)
>> + continue;
>> +
>> + /* At the point of first linking, this one should be empty */
>> + if (!intlist__empty(etmq->traceid_list)) {
>> + pr_err("CS_ETM: Can't link populated trace ID lists\n");
>> + return -EINVAL;
>> + }
>> +
>> + etmq->own_traceid_list = NULL;
>> + intlist__delete(etmq->traceid_list);
>> + etmq->traceid_list = other_etmq->traceid_list;
>> + break;
>> + }
>> +
>> + cpu_data = get_cpu_data(etm, cpu);
>> + ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
>> + if (ret)
>> + return ret;
>> +
>> + ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
>> + if (ret)
>> + return ret;
>> +
>> + return 0;
>> +}
>> +
>> static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64 *cpu_metadata)
>> {
>> u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
>> @@ -414,10 +488,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
>>
>> /* extract and parse the HW ID */
>> hw_id = event->aux_output_hw_id.hw_id;
>> - version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
>> + version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
>>
>> /* check that we can handle this version */
>> - if (version > CS_AUX_HW_ID_CURR_VERSION) {
>> + if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
>> pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID version %d not supported. Please update Perf.\n",
>> version);
>> return -EINVAL;
>> @@ -442,7 +516,10 @@ static int cs_etm__process_aux_output_hw_id(struct perf_session *session,
>> return -EINVAL;
>> }
>>
>> - return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
>
> Perhaps leave this as the final statement of the function
>
>> + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
>
> this could be moved before and be
>
> if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
> return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
>
>
Because I was intending minor version changes to be backwards compatible
I have it so that any value other than 0 is treated as v0.1. Otherwise
version updates will break old versions of Perf. And then if we added a
v0.3 it would look like this:
if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
else if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
else
return cs_etm__process_trace_id_v0_2(etm, cpu, hw_id);
Based on that I'm not sure if you still think it should be changed?
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets
2024-07-19 10:48 ` James Clark
@ 2024-07-19 10:49 ` James Clark
2024-07-19 13:45 ` Mike Leach
0 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-19 10:49 UTC (permalink / raw)
To: Mike Leach
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On 19/07/2024 11:48 am, James Clark wrote:
>
>
> On 18/07/2024 2:24 pm, Mike Leach wrote:
>> On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>>>
>>> From: James Clark <james.clark@arm.com>
>>>
>>> v0.1 HW_ID packets have a new field that describes which sink each CPU
>>> writes to. Use the sink ID to link trace ID maps to each other so that
>>> mappings are shared wherever the sink is shared.
>>>
>>> Also update the error message to show that overlapping IDs aren't an
>>> error in per-thread mode, just not supported. In the future we can
>>> use the CPU ID from the AUX records, or watch for changing sink IDs on
>>> HW_ID packets to use the correct decoders.
>>>
>>> Signed-off-by: James Clark <james.clark@arm.com>
>>> Signed-off-by: James Clark <james.clark@linaro.org>
>>> ---
>>> tools/include/linux/coresight-pmu.h | 17 +++--
>>> tools/perf/util/cs-etm.c | 100 +++++++++++++++++++++++++---
>>> 2 files changed, 103 insertions(+), 14 deletions(-)
>>>
>>> diff --git a/tools/include/linux/coresight-pmu.h
>>> b/tools/include/linux/coresight-pmu.h
>>> index 51ac441a37c3..89b0ac0014b0 100644
>>> --- a/tools/include/linux/coresight-pmu.h
>>> +++ b/tools/include/linux/coresight-pmu.h
>>> @@ -49,12 +49,21 @@
>>> * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
>>> * Used to associate a CPU with the CoreSight Trace ID.
>>> * [07:00] - Trace ID - uses 8 bits to make value easy to read in
>>> file.
>>> - * [59:08] - Unused (SBZ)
>>> - * [63:60] - Version
>>> + * [39:08] - Sink ID - as reported in
>>> /sys/bus/event_source/devices/cs_etm/sinks/
>>> + * Added in minor version 1.
>>> + * [55:40] - Unused (SBZ)
>>> + * [59:56] - Minor Version - previously existing fields are
>>> compatible with
>>> + * all minor versions.
>>> + * [63:60] - Major Version - previously existing fields mean
>>> different things
>>> + * in new major versions.
>>> */
>>> #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
>>> -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
>>> +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
>>>
>>> -#define CS_AUX_HW_ID_CURR_VERSION 0
>>> +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
>>> +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
>>> +
>>> +#define CS_AUX_HW_ID_MAJOR_VERSION 0
>>> +#define CS_AUX_HW_ID_MINOR_VERSION 1
>>>
>>> #endif
>>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>>> index 954a6f7bedf3..87e983da19be 100644
>>> --- a/tools/perf/util/cs-etm.c
>>> +++ b/tools/perf/util/cs-etm.c
>>> @@ -118,6 +118,12 @@ struct cs_etm_queue {
>>> struct cs_etm_traceid_queue **traceid_queues;
>>> /* Conversion between traceID and metadata pointers */
>>> struct intlist *traceid_list;
>>> + /*
>>> + * Same as traceid_list, but traceid_list may be a reference
>>> to another
>>> + * queue's which has a matching sink ID.
>>> + */
>>> + struct intlist *own_traceid_list;
>>> + u32 sink_id;
>>> };
>>>
>>> static int cs_etm__process_timestamped_queues(struct
>>> cs_etm_auxtrace *etm);
>>> @@ -142,6 +148,7 @@ static int cs_etm__metadata_set_trace_id(u8
>>> trace_chan_id, u64 *cpu_metadata);
>>> (queue_nr << 16 | trace_chan_id)
>>> #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
>>> #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
>>> +#define SINK_UNSET ((u32) -1)
>>>
>>> static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
>>> {
>>> @@ -241,7 +248,16 @@ static int cs_etm__insert_trace_id_node(struct
>>> cs_etm_queue *etmq,
>>> int err;
>>>
>>> if (curr_cpu_data[CS_ETM_CPU] !=
>>> cpu_metadata[CS_ETM_CPU]) {
>>> - pr_err("CS_ETM: map mismatch between HW_ID
>>> packet CPU and Trace ID\n");
>>> + /*
>>> + * With > CORESIGHT_TRACE_IDS_MAX ETMs,
>>> overlapping IDs
>>> + * are expected (but not supported) in
>>> per-thread mode,
>>> + * rather than signifying an error.
>>> + */
>>> + if (etmq->etm->per_thread_decoding)
>>> + pr_err("CS_ETM: overlapping Trace IDs
>>> aren't currently supported in per-thread mode\n");
>>> + else
>>> + pr_err("CS_ETM: map mismatch between
>>> HW_ID packet CPU and Trace ID\n");
>>> +
>>> return -EINVAL;
>>> }
>>>
>>> @@ -326,6 +342,64 @@ static int cs_etm__process_trace_id_v0(struct
>>> cs_etm_auxtrace *etm, int cpu,
>>> return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
>>> }
>>>
>>> +static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace
>>> *etm, int cpu,
>>> + u64 hw_id)
>>> +{
>>> + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
>>> + int ret;
>>> + u64 *cpu_data;
>>> + u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
>>> + u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
>>> +
>>> + /*
>>> + * Check sink id hasn't changed in per-cpu mode. In
>>> per-thread mode,
>>> + * let it pass for now until an actual overlapping trace ID
>>> is hit. In
>>> + * most cases IDs won't overlap even if the sink changes.
>>> + */
>>> + if (!etmq->etm->per_thread_decoding && etmq->sink_id !=
>>> SINK_UNSET &&
>>> + etmq->sink_id != sink_id) {
>>> + pr_err("CS_ETM: mismatch between sink IDs\n");
>>> + return -EINVAL;
>>> + }
>>> +
>>> + etmq->sink_id = sink_id;
>>> +
>>> + /* Find which other queues use this sink and link their ID
>>> maps */
>>> + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
>>> + struct cs_etm_queue *other_etmq =
>>> etm->queues.queue_array[i].priv;
>>> +
>>> + /* Different sinks, skip */
>>> + if (other_etmq->sink_id != etmq->sink_id)
>>> + continue;
>>> +
>>> + /* Already linked, skip */
>>> + if (other_etmq->traceid_list == etmq->traceid_list)
>>> + continue;
>>> +
>>> + /* At the point of first linking, this one should be
>>> empty */
>>> + if (!intlist__empty(etmq->traceid_list)) {
>>> + pr_err("CS_ETM: Can't link populated trace ID
>>> lists\n");
>>> + return -EINVAL;
>>> + }
>>> +
>>> + etmq->own_traceid_list = NULL;
>>> + intlist__delete(etmq->traceid_list);
>>> + etmq->traceid_list = other_etmq->traceid_list;
>>> + break;
>>> + }
>>> +
>>> + cpu_data = get_cpu_data(etm, cpu);
>>> + ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
>>> + if (ret)
>>> + return ret;
>>> +
>>> + return 0;
>>> +}
>>> +
>>> static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64
>>> *cpu_metadata)
>>> {
>>> u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
>>> @@ -414,10 +488,10 @@ static int
>>> cs_etm__process_aux_output_hw_id(struct perf_session *session,
>>>
>>> /* extract and parse the HW ID */
>>> hw_id = event->aux_output_hw_id.hw_id;
>>> - version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
>>> + version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
>>>
>>> /* check that we can handle this version */
>>> - if (version > CS_AUX_HW_ID_CURR_VERSION) {
>>> + if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
>>> pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID
>>> version %d not supported. Please update Perf.\n",
>>> version);
>>> return -EINVAL;
>>> @@ -442,7 +516,10 @@ static int
>>> cs_etm__process_aux_output_hw_id(struct perf_session *session,
>>> return -EINVAL;
>>> }
>>>
>>> - return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
>>
>> Perhaps leave this as the final statement of the function
>>
>>> + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
>>
>> this could be moved before and be
>>
>> if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
>> return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
>>
>>
>
> Because I was intending minor version changes to be backwards compatible
> I have it so that any value other than 0 is treated as v0.1. Otherwise
> version updates will break old versions of Perf. And then if we added a
> v0.3 it would look like this:
That should have said v0.2 ^
>
> if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
> return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
> else if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
> return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
> else
> return cs_etm__process_trace_id_v0_2(etm, cpu, hw_id);
>
> Based on that I'm not sure if you still think it should be changed?
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets
2024-07-19 10:49 ` James Clark
@ 2024-07-19 13:45 ` Mike Leach
2024-07-19 13:57 ` James Clark
0 siblings, 1 reply; 40+ messages in thread
From: Mike Leach @ 2024-07-19 13:45 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
Fair enough - less worried about the ordering as the final :
else
return fn()
}
where there's no unconditional return at the end of the function. The
last else looks redundant to me. More a stylistic thing, not sure if
there is a hard and fast rule either way
Mike
On Fri, 19 Jul 2024 at 11:49, James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 19/07/2024 11:48 am, James Clark wrote:
> >
> >
> > On 18/07/2024 2:24 pm, Mike Leach wrote:
> >> On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
> >>>
> >>> From: James Clark <james.clark@arm.com>
> >>>
> >>> v0.1 HW_ID packets have a new field that describes which sink each CPU
> >>> writes to. Use the sink ID to link trace ID maps to each other so that
> >>> mappings are shared wherever the sink is shared.
> >>>
> >>> Also update the error message to show that overlapping IDs aren't an
> >>> error in per-thread mode, just not supported. In the future we can
> >>> use the CPU ID from the AUX records, or watch for changing sink IDs on
> >>> HW_ID packets to use the correct decoders.
> >>>
> >>> Signed-off-by: James Clark <james.clark@arm.com>
> >>> Signed-off-by: James Clark <james.clark@linaro.org>
> >>> ---
> >>> tools/include/linux/coresight-pmu.h | 17 +++--
> >>> tools/perf/util/cs-etm.c | 100 +++++++++++++++++++++++++---
> >>> 2 files changed, 103 insertions(+), 14 deletions(-)
> >>>
> >>> diff --git a/tools/include/linux/coresight-pmu.h
> >>> b/tools/include/linux/coresight-pmu.h
> >>> index 51ac441a37c3..89b0ac0014b0 100644
> >>> --- a/tools/include/linux/coresight-pmu.h
> >>> +++ b/tools/include/linux/coresight-pmu.h
> >>> @@ -49,12 +49,21 @@
> >>> * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
> >>> * Used to associate a CPU with the CoreSight Trace ID.
> >>> * [07:00] - Trace ID - uses 8 bits to make value easy to read in
> >>> file.
> >>> - * [59:08] - Unused (SBZ)
> >>> - * [63:60] - Version
> >>> + * [39:08] - Sink ID - as reported in
> >>> /sys/bus/event_source/devices/cs_etm/sinks/
> >>> + * Added in minor version 1.
> >>> + * [55:40] - Unused (SBZ)
> >>> + * [59:56] - Minor Version - previously existing fields are
> >>> compatible with
> >>> + * all minor versions.
> >>> + * [63:60] - Major Version - previously existing fields mean
> >>> different things
> >>> + * in new major versions.
> >>> */
> >>> #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
> >>> -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
> >>> +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
> >>>
> >>> -#define CS_AUX_HW_ID_CURR_VERSION 0
> >>> +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
> >>> +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
> >>> +
> >>> +#define CS_AUX_HW_ID_MAJOR_VERSION 0
> >>> +#define CS_AUX_HW_ID_MINOR_VERSION 1
> >>>
> >>> #endif
> >>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> >>> index 954a6f7bedf3..87e983da19be 100644
> >>> --- a/tools/perf/util/cs-etm.c
> >>> +++ b/tools/perf/util/cs-etm.c
> >>> @@ -118,6 +118,12 @@ struct cs_etm_queue {
> >>> struct cs_etm_traceid_queue **traceid_queues;
> >>> /* Conversion between traceID and metadata pointers */
> >>> struct intlist *traceid_list;
> >>> + /*
> >>> + * Same as traceid_list, but traceid_list may be a reference
> >>> to another
> >>> + * queue's which has a matching sink ID.
> >>> + */
> >>> + struct intlist *own_traceid_list;
> >>> + u32 sink_id;
> >>> };
> >>>
> >>> static int cs_etm__process_timestamped_queues(struct
> >>> cs_etm_auxtrace *etm);
> >>> @@ -142,6 +148,7 @@ static int cs_etm__metadata_set_trace_id(u8
> >>> trace_chan_id, u64 *cpu_metadata);
> >>> (queue_nr << 16 | trace_chan_id)
> >>> #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
> >>> #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
> >>> +#define SINK_UNSET ((u32) -1)
> >>>
> >>> static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
> >>> {
> >>> @@ -241,7 +248,16 @@ static int cs_etm__insert_trace_id_node(struct
> >>> cs_etm_queue *etmq,
> >>> int err;
> >>>
> >>> if (curr_cpu_data[CS_ETM_CPU] !=
> >>> cpu_metadata[CS_ETM_CPU]) {
> >>> - pr_err("CS_ETM: map mismatch between HW_ID
> >>> packet CPU and Trace ID\n");
> >>> + /*
> >>> + * With > CORESIGHT_TRACE_IDS_MAX ETMs,
> >>> overlapping IDs
> >>> + * are expected (but not supported) in
> >>> per-thread mode,
> >>> + * rather than signifying an error.
> >>> + */
> >>> + if (etmq->etm->per_thread_decoding)
> >>> + pr_err("CS_ETM: overlapping Trace IDs
> >>> aren't currently supported in per-thread mode\n");
> >>> + else
> >>> + pr_err("CS_ETM: map mismatch between
> >>> HW_ID packet CPU and Trace ID\n");
> >>> +
> >>> return -EINVAL;
> >>> }
> >>>
> >>> @@ -326,6 +342,64 @@ static int cs_etm__process_trace_id_v0(struct
> >>> cs_etm_auxtrace *etm, int cpu,
> >>> return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
> >>> }
> >>>
> >>> +static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace
> >>> *etm, int cpu,
> >>> + u64 hw_id)
> >>> +{
> >>> + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
> >>> + int ret;
> >>> + u64 *cpu_data;
> >>> + u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
> >>> + u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
> >>> +
> >>> + /*
> >>> + * Check sink id hasn't changed in per-cpu mode. In
> >>> per-thread mode,
> >>> + * let it pass for now until an actual overlapping trace ID
> >>> is hit. In
> >>> + * most cases IDs won't overlap even if the sink changes.
> >>> + */
> >>> + if (!etmq->etm->per_thread_decoding && etmq->sink_id !=
> >>> SINK_UNSET &&
> >>> + etmq->sink_id != sink_id) {
> >>> + pr_err("CS_ETM: mismatch between sink IDs\n");
> >>> + return -EINVAL;
> >>> + }
> >>> +
> >>> + etmq->sink_id = sink_id;
> >>> +
> >>> + /* Find which other queues use this sink and link their ID
> >>> maps */
> >>> + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
> >>> + struct cs_etm_queue *other_etmq =
> >>> etm->queues.queue_array[i].priv;
> >>> +
> >>> + /* Different sinks, skip */
> >>> + if (other_etmq->sink_id != etmq->sink_id)
> >>> + continue;
> >>> +
> >>> + /* Already linked, skip */
> >>> + if (other_etmq->traceid_list == etmq->traceid_list)
> >>> + continue;
> >>> +
> >>> + /* At the point of first linking, this one should be
> >>> empty */
> >>> + if (!intlist__empty(etmq->traceid_list)) {
> >>> + pr_err("CS_ETM: Can't link populated trace ID
> >>> lists\n");
> >>> + return -EINVAL;
> >>> + }
> >>> +
> >>> + etmq->own_traceid_list = NULL;
> >>> + intlist__delete(etmq->traceid_list);
> >>> + etmq->traceid_list = other_etmq->traceid_list;
> >>> + break;
> >>> + }
> >>> +
> >>> + cpu_data = get_cpu_data(etm, cpu);
> >>> + ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
> >>> + if (ret)
> >>> + return ret;
> >>> +
> >>> + ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
> >>> + if (ret)
> >>> + return ret;
> >>> +
> >>> + return 0;
> >>> +}
> >>> +
> >>> static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64
> >>> *cpu_metadata)
> >>> {
> >>> u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
> >>> @@ -414,10 +488,10 @@ static int
> >>> cs_etm__process_aux_output_hw_id(struct perf_session *session,
> >>>
> >>> /* extract and parse the HW ID */
> >>> hw_id = event->aux_output_hw_id.hw_id;
> >>> - version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
> >>> + version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
> >>>
> >>> /* check that we can handle this version */
> >>> - if (version > CS_AUX_HW_ID_CURR_VERSION) {
> >>> + if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
> >>> pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID
> >>> version %d not supported. Please update Perf.\n",
> >>> version);
> >>> return -EINVAL;
> >>> @@ -442,7 +516,10 @@ static int
> >>> cs_etm__process_aux_output_hw_id(struct perf_session *session,
> >>> return -EINVAL;
> >>> }
> >>>
> >>> - return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
> >>
> >> Perhaps leave this as the final statement of the function
> >>
> >>> + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
> >>
> >> this could be moved before and be
> >>
> >> if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
> >> return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
> >>
> >>
> >
> > Because I was intending minor version changes to be backwards compatible
> > I have it so that any value other than 0 is treated as v0.1. Otherwise
> > version updates will break old versions of Perf. And then if we added a
> > v0.3 it would look like this:
>
> That should have said v0.2 ^
>
> >
> > if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
> > return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
> > else if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
> > return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
> > else
> > return cs_etm__process_trace_id_v0_2(etm, cpu, hw_id);
> >
> > Based on that I'm not sure if you still think it should be changed?
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets
2024-07-19 13:45 ` Mike Leach
@ 2024-07-19 13:57 ` James Clark
0 siblings, 0 replies; 40+ messages in thread
From: James Clark @ 2024-07-19 13:57 UTC (permalink / raw)
To: Mike Leach
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On 19/07/2024 2:45 pm, Mike Leach wrote:
> Fair enough - less worried about the ordering as the final :
>
> else
> return fn()
> }
>
> where there's no unconditional return at the end of the function. The
> last else looks redundant to me. More a stylistic thing, not sure if
> there is a hard and fast rule either way
>
> Mike
>
>
>
Ok yeah I can update that.
> On Fri, 19 Jul 2024 at 11:49, James Clark <james.clark@linaro.org> wrote:
>>
>>
>>
>> On 19/07/2024 11:48 am, James Clark wrote:
>>>
>>>
>>> On 18/07/2024 2:24 pm, Mike Leach wrote:
>>>> On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>>>>>
>>>>> From: James Clark <james.clark@arm.com>
>>>>>
>>>>> v0.1 HW_ID packets have a new field that describes which sink each CPU
>>>>> writes to. Use the sink ID to link trace ID maps to each other so that
>>>>> mappings are shared wherever the sink is shared.
>>>>>
>>>>> Also update the error message to show that overlapping IDs aren't an
>>>>> error in per-thread mode, just not supported. In the future we can
>>>>> use the CPU ID from the AUX records, or watch for changing sink IDs on
>>>>> HW_ID packets to use the correct decoders.
>>>>>
>>>>> Signed-off-by: James Clark <james.clark@arm.com>
>>>>> Signed-off-by: James Clark <james.clark@linaro.org>
>>>>> ---
>>>>> tools/include/linux/coresight-pmu.h | 17 +++--
>>>>> tools/perf/util/cs-etm.c | 100 +++++++++++++++++++++++++---
>>>>> 2 files changed, 103 insertions(+), 14 deletions(-)
>>>>>
>>>>> diff --git a/tools/include/linux/coresight-pmu.h
>>>>> b/tools/include/linux/coresight-pmu.h
>>>>> index 51ac441a37c3..89b0ac0014b0 100644
>>>>> --- a/tools/include/linux/coresight-pmu.h
>>>>> +++ b/tools/include/linux/coresight-pmu.h
>>>>> @@ -49,12 +49,21 @@
>>>>> * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
>>>>> * Used to associate a CPU with the CoreSight Trace ID.
>>>>> * [07:00] - Trace ID - uses 8 bits to make value easy to read in
>>>>> file.
>>>>> - * [59:08] - Unused (SBZ)
>>>>> - * [63:60] - Version
>>>>> + * [39:08] - Sink ID - as reported in
>>>>> /sys/bus/event_source/devices/cs_etm/sinks/
>>>>> + * Added in minor version 1.
>>>>> + * [55:40] - Unused (SBZ)
>>>>> + * [59:56] - Minor Version - previously existing fields are
>>>>> compatible with
>>>>> + * all minor versions.
>>>>> + * [63:60] - Major Version - previously existing fields mean
>>>>> different things
>>>>> + * in new major versions.
>>>>> */
>>>>> #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
>>>>> -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
>>>>> +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
>>>>>
>>>>> -#define CS_AUX_HW_ID_CURR_VERSION 0
>>>>> +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
>>>>> +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
>>>>> +
>>>>> +#define CS_AUX_HW_ID_MAJOR_VERSION 0
>>>>> +#define CS_AUX_HW_ID_MINOR_VERSION 1
>>>>>
>>>>> #endif
>>>>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>>>>> index 954a6f7bedf3..87e983da19be 100644
>>>>> --- a/tools/perf/util/cs-etm.c
>>>>> +++ b/tools/perf/util/cs-etm.c
>>>>> @@ -118,6 +118,12 @@ struct cs_etm_queue {
>>>>> struct cs_etm_traceid_queue **traceid_queues;
>>>>> /* Conversion between traceID and metadata pointers */
>>>>> struct intlist *traceid_list;
>>>>> + /*
>>>>> + * Same as traceid_list, but traceid_list may be a reference
>>>>> to another
>>>>> + * queue's which has a matching sink ID.
>>>>> + */
>>>>> + struct intlist *own_traceid_list;
>>>>> + u32 sink_id;
>>>>> };
>>>>>
>>>>> static int cs_etm__process_timestamped_queues(struct
>>>>> cs_etm_auxtrace *etm);
>>>>> @@ -142,6 +148,7 @@ static int cs_etm__metadata_set_trace_id(u8
>>>>> trace_chan_id, u64 *cpu_metadata);
>>>>> (queue_nr << 16 | trace_chan_id)
>>>>> #define TO_QUEUE_NR(cs_queue_nr) (cs_queue_nr >> 16)
>>>>> #define TO_TRACE_CHAN_ID(cs_queue_nr) (cs_queue_nr & 0x0000ffff)
>>>>> +#define SINK_UNSET ((u32) -1)
>>>>>
>>>>> static u32 cs_etm__get_v7_protocol_version(u32 etmidr)
>>>>> {
>>>>> @@ -241,7 +248,16 @@ static int cs_etm__insert_trace_id_node(struct
>>>>> cs_etm_queue *etmq,
>>>>> int err;
>>>>>
>>>>> if (curr_cpu_data[CS_ETM_CPU] !=
>>>>> cpu_metadata[CS_ETM_CPU]) {
>>>>> - pr_err("CS_ETM: map mismatch between HW_ID
>>>>> packet CPU and Trace ID\n");
>>>>> + /*
>>>>> + * With > CORESIGHT_TRACE_IDS_MAX ETMs,
>>>>> overlapping IDs
>>>>> + * are expected (but not supported) in
>>>>> per-thread mode,
>>>>> + * rather than signifying an error.
>>>>> + */
>>>>> + if (etmq->etm->per_thread_decoding)
>>>>> + pr_err("CS_ETM: overlapping Trace IDs
>>>>> aren't currently supported in per-thread mode\n");
>>>>> + else
>>>>> + pr_err("CS_ETM: map mismatch between
>>>>> HW_ID packet CPU and Trace ID\n");
>>>>> +
>>>>> return -EINVAL;
>>>>> }
>>>>>
>>>>> @@ -326,6 +342,64 @@ static int cs_etm__process_trace_id_v0(struct
>>>>> cs_etm_auxtrace *etm, int cpu,
>>>>> return cs_etm__metadata_set_trace_id(trace_chan_id, cpu_data);
>>>>> }
>>>>>
>>>>> +static int cs_etm__process_trace_id_v0_1(struct cs_etm_auxtrace
>>>>> *etm, int cpu,
>>>>> + u64 hw_id)
>>>>> +{
>>>>> + struct cs_etm_queue *etmq = cs_etm__get_queue(etm, cpu);
>>>>> + int ret;
>>>>> + u64 *cpu_data;
>>>>> + u32 sink_id = FIELD_GET(CS_AUX_HW_ID_SINK_ID_MASK, hw_id);
>>>>> + u8 trace_id = FIELD_GET(CS_AUX_HW_ID_TRACE_ID_MASK, hw_id);
>>>>> +
>>>>> + /*
>>>>> + * Check sink id hasn't changed in per-cpu mode. In
>>>>> per-thread mode,
>>>>> + * let it pass for now until an actual overlapping trace ID
>>>>> is hit. In
>>>>> + * most cases IDs won't overlap even if the sink changes.
>>>>> + */
>>>>> + if (!etmq->etm->per_thread_decoding && etmq->sink_id !=
>>>>> SINK_UNSET &&
>>>>> + etmq->sink_id != sink_id) {
>>>>> + pr_err("CS_ETM: mismatch between sink IDs\n");
>>>>> + return -EINVAL;
>>>>> + }
>>>>> +
>>>>> + etmq->sink_id = sink_id;
>>>>> +
>>>>> + /* Find which other queues use this sink and link their ID
>>>>> maps */
>>>>> + for (unsigned int i = 0; i < etm->queues.nr_queues; ++i) {
>>>>> + struct cs_etm_queue *other_etmq =
>>>>> etm->queues.queue_array[i].priv;
>>>>> +
>>>>> + /* Different sinks, skip */
>>>>> + if (other_etmq->sink_id != etmq->sink_id)
>>>>> + continue;
>>>>> +
>>>>> + /* Already linked, skip */
>>>>> + if (other_etmq->traceid_list == etmq->traceid_list)
>>>>> + continue;
>>>>> +
>>>>> + /* At the point of first linking, this one should be
>>>>> empty */
>>>>> + if (!intlist__empty(etmq->traceid_list)) {
>>>>> + pr_err("CS_ETM: Can't link populated trace ID
>>>>> lists\n");
>>>>> + return -EINVAL;
>>>>> + }
>>>>> +
>>>>> + etmq->own_traceid_list = NULL;
>>>>> + intlist__delete(etmq->traceid_list);
>>>>> + etmq->traceid_list = other_etmq->traceid_list;
>>>>> + break;
>>>>> + }
>>>>> +
>>>>> + cpu_data = get_cpu_data(etm, cpu);
>>>>> + ret = cs_etm__insert_trace_id_node(etmq, trace_id, cpu_data);
>>>>> + if (ret)
>>>>> + return ret;
>>>>> +
>>>>> + ret = cs_etm__metadata_set_trace_id(trace_id, cpu_data);
>>>>> + if (ret)
>>>>> + return ret;
>>>>> +
>>>>> + return 0;
>>>>> +}
>>>>> +
>>>>> static int cs_etm__metadata_get_trace_id(u8 *trace_chan_id, u64
>>>>> *cpu_metadata)
>>>>> {
>>>>> u64 cs_etm_magic = cpu_metadata[CS_ETM_MAGIC];
>>>>> @@ -414,10 +488,10 @@ static int
>>>>> cs_etm__process_aux_output_hw_id(struct perf_session *session,
>>>>>
>>>>> /* extract and parse the HW ID */
>>>>> hw_id = event->aux_output_hw_id.hw_id;
>>>>> - version = FIELD_GET(CS_AUX_HW_ID_VERSION_MASK, hw_id);
>>>>> + version = FIELD_GET(CS_AUX_HW_ID_MAJOR_VERSION_MASK, hw_id);
>>>>>
>>>>> /* check that we can handle this version */
>>>>> - if (version > CS_AUX_HW_ID_CURR_VERSION) {
>>>>> + if (version > CS_AUX_HW_ID_MAJOR_VERSION) {
>>>>> pr_err("CS ETM Trace: PERF_RECORD_AUX_OUTPUT_HW_ID
>>>>> version %d not supported. Please update Perf.\n",
>>>>> version);
>>>>> return -EINVAL;
>>>>> @@ -442,7 +516,10 @@ static int
>>>>> cs_etm__process_aux_output_hw_id(struct perf_session *session,
>>>>> return -EINVAL;
>>>>> }
>>>>>
>>>>> - return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
>>>>
>>>> Perhaps leave this as the final statement of the function
>>>>
>>>>> + if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
>>>>
>>>> this could be moved before and be
>>>>
>>>> if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
>>>> return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
>>>>
>>>>
>>>
>>> Because I was intending minor version changes to be backwards compatible
>>> I have it so that any value other than 0 is treated as v0.1. Otherwise
>>> version updates will break old versions of Perf. And then if we added a
>>> v0.3 it would look like this:
>>
>> That should have said v0.2 ^
>>
>>>
>>> if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 0)
>>> return cs_etm__process_trace_id_v0(etm, cpu, hw_id);
>>> else if (FIELD_GET(CS_AUX_HW_ID_MINOR_VERSION_MASK, hw_id) == 1)
>>> return cs_etm__process_trace_id_v0_1(etm, cpu, hw_id);
>>> else
>>> return cs_etm__process_trace_id_v0_2(etm, cpu, hw_id);
>>>
>>> Based on that I'm not sure if you still think it should be changed?
>
>
>
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 07/17] perf: cs-etm: Print queue number in raw trace dump
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (5 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 06/17] perf: cs-etm: Support version 0.1 of HW_ID packets James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-18 13:25 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 08/17] perf: cs-etm: Add runtime version check for OpenCSD James Clark
` (9 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
Now that we have overlapping trace IDs it's also useful to know what the
queue number is to be able to distinguish the source of the trace so
print it inline.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 ++--
tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 2 +-
tools/perf/util/cs-etm.c | 7 ++++---
3 files changed, 7 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index d49c3e9c7c21..b78ef0262135 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -41,7 +41,7 @@ const u32 INSTR_PER_NS = 10;
struct cs_etm_decoder {
void *data;
- void (*packet_printer)(const char *msg);
+ void (*packet_printer)(const char *msg, void *data);
bool suppress_printing;
dcd_tree_handle_t dcd_tree;
cs_etm_mem_cb_type mem_access;
@@ -202,7 +202,7 @@ static void cs_etm_decoder__print_str_cb(const void *p_context,
const struct cs_etm_decoder *decoder = p_context;
if (p_context && str_len && !decoder->suppress_printing)
- decoder->packet_printer(msg);
+ decoder->packet_printer(msg, decoder->data);
}
static int
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 272c2efe78ee..12c782fa6db2 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -60,7 +60,7 @@ struct cs_etm_trace_params {
struct cs_etm_decoder_params {
int operation;
- void (*packet_printer)(const char *msg);
+ void (*packet_printer)(const char *msg, void *data);
cs_etm_mem_cb_type mem_acc_cb;
bool formatted;
bool fsyncs;
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 87e983da19be..49fadf46f42b 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -762,15 +762,16 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
}
}
-static void cs_etm__packet_dump(const char *pkt_string)
+static void cs_etm__packet_dump(const char *pkt_string, void *data)
{
const char *color = PERF_COLOR_BLUE;
int len = strlen(pkt_string);
+ struct cs_etm_queue *etmq = data;
if (len && (pkt_string[len-1] == '\n'))
- color_fprintf(stdout, color, " %s", pkt_string);
+ color_fprintf(stdout, color, " Qnr:%d; %s", etmq->queue_nr, pkt_string);
else
- color_fprintf(stdout, color, " %s\n", pkt_string);
+ color_fprintf(stdout, color, " Qnr:%d; %s\n", etmq->queue_nr, pkt_string);
fflush(stdout);
}
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 07/17] perf: cs-etm: Print queue number in raw trace dump
2024-07-12 10:20 ` [PATCH v5 07/17] perf: cs-etm: Print queue number in raw trace dump James Clark
@ 2024-07-18 13:25 ` Mike Leach
2024-07-18 14:30 ` James Clark
0 siblings, 1 reply; 40+ messages in thread
From: Mike Leach @ 2024-07-18 13:25 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
Hi James
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> Now that we have overlapping trace IDs it's also useful to know what the
> queue number is to be able to distinguish the source of the trace so
> print it inline.
>
Not sure queue number is meaningful to anyone other than someone
debugging the etm decode in perf. Perhaps cpu number?
Moreover - other additional debugging in the trace output is
controlled with build options.
See:-
Makefile.config -> ifdef CSTRACE_RAW,
thence:-
#ifdef CS_DEBUG_RAW in cs-etm-decoder.c
which adds in the raw byte data from the trace dump.
Could we make this addtional info dependent on either the standard
DEBUG macro, or an additional build macro.
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 ++--
> tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 2 +-
> tools/perf/util/cs-etm.c | 7 ++++---
> 3 files changed, 7 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index d49c3e9c7c21..b78ef0262135 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -41,7 +41,7 @@ const u32 INSTR_PER_NS = 10;
>
> struct cs_etm_decoder {
> void *data;
> - void (*packet_printer)(const char *msg);
> + void (*packet_printer)(const char *msg, void *data);
> bool suppress_printing;
> dcd_tree_handle_t dcd_tree;
> cs_etm_mem_cb_type mem_access;
> @@ -202,7 +202,7 @@ static void cs_etm_decoder__print_str_cb(const void *p_context,
> const struct cs_etm_decoder *decoder = p_context;
>
> if (p_context && str_len && !decoder->suppress_printing)
> - decoder->packet_printer(msg);
> + decoder->packet_printer(msg, decoder->data);
> }
>
> static int
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> index 272c2efe78ee..12c782fa6db2 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> @@ -60,7 +60,7 @@ struct cs_etm_trace_params {
>
> struct cs_etm_decoder_params {
> int operation;
> - void (*packet_printer)(const char *msg);
> + void (*packet_printer)(const char *msg, void *data);
> cs_etm_mem_cb_type mem_acc_cb;
> bool formatted;
> bool fsyncs;
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 87e983da19be..49fadf46f42b 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -762,15 +762,16 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
> }
> }
>
> -static void cs_etm__packet_dump(const char *pkt_string)
> +static void cs_etm__packet_dump(const char *pkt_string, void *data)
> {
> const char *color = PERF_COLOR_BLUE;
> int len = strlen(pkt_string);
> + struct cs_etm_queue *etmq = data;
>
> if (len && (pkt_string[len-1] == '\n'))
> - color_fprintf(stdout, color, " %s", pkt_string);
> + color_fprintf(stdout, color, " Qnr:%d; %s", etmq->queue_nr, pkt_string);
> else
> - color_fprintf(stdout, color, " %s\n", pkt_string);
> + color_fprintf(stdout, color, " Qnr:%d; %s\n", etmq->queue_nr, pkt_string);
>
> fflush(stdout);
> }
> --
> 2.34.1
>
Mike
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v5 07/17] perf: cs-etm: Print queue number in raw trace dump
2024-07-18 13:25 ` Mike Leach
@ 2024-07-18 14:30 ` James Clark
0 siblings, 0 replies; 40+ messages in thread
From: James Clark @ 2024-07-18 14:30 UTC (permalink / raw)
To: Mike Leach
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
On 18/07/2024 2:25 pm, Mike Leach wrote:
> Hi James
>
> On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>>
>> From: James Clark <james.clark@arm.com>
>>
>> Now that we have overlapping trace IDs it's also useful to know what the
>> queue number is to be able to distinguish the source of the trace so
>> print it inline.
>>
>
> Not sure queue number is meaningful to anyone other than someone
> debugging the etm decode in perf. Perhaps cpu number?
It's more than just for debugging Perf, anyone who was previously
reading the raw trace would probably have grepped for, or be looking at
the "ID:" field. Now that doesn't identify trace from a single source
anymore, due to the overlapping IDs. Same applies if you want to process
the output in some way per-line.
With ETE technically it is CPU number, but I didn't want to name it like
that because it's overloaded: With ETM it's the "collection CPU" which
would be too misleading to label as CPU, and in per-thread mode it's
always 0 and not a CPU at all.
Although I suppose it's already labeled as CPU on the
PERF_RECORD_AUXTRACE output, and this just duplicates that so it should
be called CPU. Maybe it is ok to drop this one because the info already
exists in the PERF_RECORD_AUXTRACE output.
>
> Moreover - other additional debugging in the trace output is
> controlled with build options.
> See:-
> Makefile.config -> ifdef CSTRACE_RAW,
> thence:-
> #ifdef CS_DEBUG_RAW in cs-etm-decoder.c
>
> which adds in the raw byte data from the trace dump.
>
> Could we make this addtional info dependent on either the standard
> DEBUG macro, or an additional build macro.
>
>
What about behind the verbose argument to Perf?
>
>> Signed-off-by: James Clark <james.clark@arm.com>
>> Signed-off-by: James Clark <james.clark@linaro.org>
>> ---
>> tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 4 ++--
>> tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 2 +-
>> tools/perf/util/cs-etm.c | 7 ++++---
>> 3 files changed, 7 insertions(+), 6 deletions(-)
>>
>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> index d49c3e9c7c21..b78ef0262135 100644
>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
>> @@ -41,7 +41,7 @@ const u32 INSTR_PER_NS = 10;
>>
>> struct cs_etm_decoder {
>> void *data;
>> - void (*packet_printer)(const char *msg);
>> + void (*packet_printer)(const char *msg, void *data);
>> bool suppress_printing;
>> dcd_tree_handle_t dcd_tree;
>> cs_etm_mem_cb_type mem_access;
>> @@ -202,7 +202,7 @@ static void cs_etm_decoder__print_str_cb(const void *p_context,
>> const struct cs_etm_decoder *decoder = p_context;
>>
>> if (p_context && str_len && !decoder->suppress_printing)
>> - decoder->packet_printer(msg);
>> + decoder->packet_printer(msg, decoder->data);
>> }
>>
>> static int
>> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>> index 272c2efe78ee..12c782fa6db2 100644
>> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
>> @@ -60,7 +60,7 @@ struct cs_etm_trace_params {
>>
>> struct cs_etm_decoder_params {
>> int operation;
>> - void (*packet_printer)(const char *msg);
>> + void (*packet_printer)(const char *msg, void *data);
>> cs_etm_mem_cb_type mem_acc_cb;
>> bool formatted;
>> bool fsyncs;
>> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
>> index 87e983da19be..49fadf46f42b 100644
>> --- a/tools/perf/util/cs-etm.c
>> +++ b/tools/perf/util/cs-etm.c
>> @@ -762,15 +762,16 @@ static void cs_etm__packet_swap(struct cs_etm_auxtrace *etm,
>> }
>> }
>>
>> -static void cs_etm__packet_dump(const char *pkt_string)
>> +static void cs_etm__packet_dump(const char *pkt_string, void *data)
>> {
>> const char *color = PERF_COLOR_BLUE;
>> int len = strlen(pkt_string);
>> + struct cs_etm_queue *etmq = data;
>>
>> if (len && (pkt_string[len-1] == '\n'))
>> - color_fprintf(stdout, color, " %s", pkt_string);
>> + color_fprintf(stdout, color, " Qnr:%d; %s", etmq->queue_nr, pkt_string);
>> else
>> - color_fprintf(stdout, color, " %s\n", pkt_string);
>> + color_fprintf(stdout, color, " Qnr:%d; %s\n", etmq->queue_nr, pkt_string);
>>
>> fflush(stdout);
>> }
>> --
>> 2.34.1
>>
>
> Mike
> --
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 08/17] perf: cs-etm: Add runtime version check for OpenCSD
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (6 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 07/17] perf: cs-etm: Print queue number in raw trace dump James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-18 13:25 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 09/17] coresight: Remove unused ETM Perf stubs James Clark
` (8 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
OpenCSD is dynamically linked so although there is a build time check,
at runtime the user might still have the wrong version. To avoid hard
to debug errors, add a runtime version check.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
tools/build/feature/test-libopencsd.c | 4 ++--
tools/perf/Makefile.config | 2 +-
tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 13 +++++++++++++
tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 1 +
tools/perf/util/cs-etm-decoder/cs-etm-min-version.h | 13 +++++++++++++
tools/perf/util/cs-etm.c | 3 +++
6 files changed, 33 insertions(+), 3 deletions(-)
create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-min-version.h
diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
index 4cfcef9da3e4..d092a0c662f4 100644
--- a/tools/build/feature/test-libopencsd.c
+++ b/tools/build/feature/test-libopencsd.c
@@ -1,12 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <opencsd/c_api/opencsd_c_api.h>
+#include "cs-etm-decoder/cs-etm-min-version.h"
/*
* Check OpenCSD library version is sufficient to provide required features
*/
-#define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1))
#if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
-#error "OpenCSD >= 1.2.1 is required"
+#error "OpenCSD minimum version (OCSD_MIN_VER) not met."
#endif
int main(void)
diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
index 7f1e016a9253..2d21be42820e 100644
--- a/tools/perf/Makefile.config
+++ b/tools/perf/Makefile.config
@@ -141,7 +141,7 @@ endif
ifdef CSLIBS
LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
endif
-FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS)
+FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS) -I$(src-perf)/util
FEATURE_CHECK_LDFLAGS-libopencsd := $(LIBOPENCSD_LDFLAGS) $(OPENCSDLIBS)
# for linking with debug library, run like:
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
index b78ef0262135..5e1b4503aab1 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
@@ -16,6 +16,7 @@
#include "cs-etm.h"
#include "cs-etm-decoder.h"
+#include "cs-etm-min-version.h"
#include "debug.h"
#include "intlist.h"
@@ -835,3 +836,15 @@ const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder)
{
return decoder->decoder_name;
}
+
+int cs_etm_decoder__check_ver(void)
+{
+ if (ocsd_get_version() < OCSD_MIN_VER) {
+ pr_err("OpenCSD >= %d.%d.%d is required\n", OCSD_MIN_MAJOR,
+ OCSD_MIN_MINOR,
+ OCSD_MIN_PATCH);
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
index 12c782fa6db2..2ec426ee16dc 100644
--- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
@@ -107,5 +107,6 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder);
+int cs_etm_decoder__check_ver(void);
#endif /* INCLUDE__CS_ETM_DECODER_H__ */
diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-min-version.h b/tools/perf/util/cs-etm-decoder/cs-etm-min-version.h
new file mode 100644
index 000000000000..c69597e9d0af
--- /dev/null
+++ b/tools/perf/util/cs-etm-decoder/cs-etm-min-version.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef INCLUDE__CS_ETM_MIN_VERSION_H__
+#define INCLUDE__CS_ETM_MIN_VERSION_H__
+
+#define OCSD_MIN_MAJOR 1
+#define OCSD_MIN_MINOR 2
+#define OCSD_MIN_PATCH 1
+
+#define OCSD_MIN_VER ((OCSD_MIN_MAJOR << 16) | \
+ (OCSD_MIN_MINOR << 8) | \
+ (OCSD_MIN_PATCH))
+
+#endif /* INCLUDE__CS_ETM_MIN_VERSION_H__ */
diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
index 49fadf46f42b..2385d5ed5ea5 100644
--- a/tools/perf/util/cs-etm.c
+++ b/tools/perf/util/cs-etm.c
@@ -3369,6 +3369,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
u64 *ptr = NULL;
u64 **metadata = NULL;
+ if (cs_etm_decoder__check_ver())
+ return -EINVAL;
+
/* First the global part */
ptr = (u64 *) auxtrace_info->priv;
num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 08/17] perf: cs-etm: Add runtime version check for OpenCSD
2024-07-12 10:20 ` [PATCH v5 08/17] perf: cs-etm: Add runtime version check for OpenCSD James Clark
@ 2024-07-18 13:25 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-18 13:25 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:22, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> OpenCSD is dynamically linked so although there is a build time check,
> at runtime the user might still have the wrong version. To avoid hard
> to debug errors, add a runtime version check.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> tools/build/feature/test-libopencsd.c | 4 ++--
> tools/perf/Makefile.config | 2 +-
> tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 13 +++++++++++++
> tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 1 +
> tools/perf/util/cs-etm-decoder/cs-etm-min-version.h | 13 +++++++++++++
> tools/perf/util/cs-etm.c | 3 +++
> 6 files changed, 33 insertions(+), 3 deletions(-)
> create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-min-version.h
>
> diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c
> index 4cfcef9da3e4..d092a0c662f4 100644
> --- a/tools/build/feature/test-libopencsd.c
> +++ b/tools/build/feature/test-libopencsd.c
> @@ -1,12 +1,12 @@
> // SPDX-License-Identifier: GPL-2.0
> #include <opencsd/c_api/opencsd_c_api.h>
> +#include "cs-etm-decoder/cs-etm-min-version.h"
>
> /*
> * Check OpenCSD library version is sufficient to provide required features
> */
> -#define OCSD_MIN_VER ((1 << 16) | (2 << 8) | (1))
> #if !defined(OCSD_VER_NUM) || (OCSD_VER_NUM < OCSD_MIN_VER)
> -#error "OpenCSD >= 1.2.1 is required"
> +#error "OpenCSD minimum version (OCSD_MIN_VER) not met."
> #endif
>
> int main(void)
> diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config
> index 7f1e016a9253..2d21be42820e 100644
> --- a/tools/perf/Makefile.config
> +++ b/tools/perf/Makefile.config
> @@ -141,7 +141,7 @@ endif
> ifdef CSLIBS
> LIBOPENCSD_LDFLAGS := -L$(CSLIBS)
> endif
> -FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS)
> +FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS) -I$(src-perf)/util
> FEATURE_CHECK_LDFLAGS-libopencsd := $(LIBOPENCSD_LDFLAGS) $(OPENCSDLIBS)
>
> # for linking with debug library, run like:
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> index b78ef0262135..5e1b4503aab1 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c
> @@ -16,6 +16,7 @@
>
> #include "cs-etm.h"
> #include "cs-etm-decoder.h"
> +#include "cs-etm-min-version.h"
> #include "debug.h"
> #include "intlist.h"
>
> @@ -835,3 +836,15 @@ const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder)
> {
> return decoder->decoder_name;
> }
> +
> +int cs_etm_decoder__check_ver(void)
> +{
> + if (ocsd_get_version() < OCSD_MIN_VER) {
> + pr_err("OpenCSD >= %d.%d.%d is required\n", OCSD_MIN_MAJOR,
> + OCSD_MIN_MINOR,
> + OCSD_MIN_PATCH);
> + return -EINVAL;
> + }
> +
> + return 0;
> +}
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> index 12c782fa6db2..2ec426ee16dc 100644
> --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h
> @@ -107,5 +107,6 @@ int cs_etm_decoder__get_packet(struct cs_etm_packet_queue *packet_queue,
>
> int cs_etm_decoder__reset(struct cs_etm_decoder *decoder);
> const char *cs_etm_decoder__get_name(struct cs_etm_decoder *decoder);
> +int cs_etm_decoder__check_ver(void);
>
> #endif /* INCLUDE__CS_ETM_DECODER_H__ */
> diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-min-version.h b/tools/perf/util/cs-etm-decoder/cs-etm-min-version.h
> new file mode 100644
> index 000000000000..c69597e9d0af
> --- /dev/null
> +++ b/tools/perf/util/cs-etm-decoder/cs-etm-min-version.h
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +#ifndef INCLUDE__CS_ETM_MIN_VERSION_H__
> +#define INCLUDE__CS_ETM_MIN_VERSION_H__
> +
> +#define OCSD_MIN_MAJOR 1
> +#define OCSD_MIN_MINOR 2
> +#define OCSD_MIN_PATCH 1
> +
> +#define OCSD_MIN_VER ((OCSD_MIN_MAJOR << 16) | \
> + (OCSD_MIN_MINOR << 8) | \
> + (OCSD_MIN_PATCH))
> +
> +#endif /* INCLUDE__CS_ETM_MIN_VERSION_H__ */
> diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c
> index 49fadf46f42b..2385d5ed5ea5 100644
> --- a/tools/perf/util/cs-etm.c
> +++ b/tools/perf/util/cs-etm.c
> @@ -3369,6 +3369,9 @@ int cs_etm__process_auxtrace_info_full(union perf_event *event,
> u64 *ptr = NULL;
> u64 **metadata = NULL;
>
> + if (cs_etm_decoder__check_ver())
> + return -EINVAL;
> +
> /* First the global part */
> ptr = (u64 *) auxtrace_info->priv;
> num_cpu = ptr[CS_PMU_TYPE_CPUS] & 0xffffffff;
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 09/17] coresight: Remove unused ETM Perf stubs
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (7 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 08/17] perf: cs-etm: Add runtime version check for OpenCSD James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-12 10:20 ` [PATCH v5 10/17] coresight: Clarify comments around the PID of the sink owner James Clark
` (7 subsequent siblings)
16 siblings, 0 replies; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
This file is never included anywhere if CONFIG_CORESIGHT is not set so
they are unused and aren't currently compile tested with any config so
remove them.
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
.../hwtracing/coresight/coresight-etm-perf.h | 18 ------------------
1 file changed, 18 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.h b/drivers/hwtracing/coresight/coresight-etm-perf.h
index bebbadee2ceb..744531158d6b 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.h
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.h
@@ -62,7 +62,6 @@ struct etm_event_data {
struct list_head * __percpu *path;
};
-#if IS_ENABLED(CONFIG_CORESIGHT)
int etm_perf_symlink(struct coresight_device *csdev, bool link);
int etm_perf_add_symlink_sink(struct coresight_device *csdev);
void etm_perf_del_symlink_sink(struct coresight_device *csdev);
@@ -77,23 +76,6 @@ static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
int etm_perf_add_symlink_cscfg(struct device *dev,
struct cscfg_config_desc *config_desc);
void etm_perf_del_symlink_cscfg(struct cscfg_config_desc *config_desc);
-#else
-static inline int etm_perf_symlink(struct coresight_device *csdev, bool link)
-{ return -EINVAL; }
-int etm_perf_add_symlink_sink(struct coresight_device *csdev)
-{ return -EINVAL; }
-void etm_perf_del_symlink_sink(struct coresight_device *csdev) {}
-static inline void *etm_perf_sink_config(struct perf_output_handle *handle)
-{
- return NULL;
-}
-int etm_perf_add_symlink_cscfg(struct device *dev,
- struct cscfg_config_desc *config_desc)
-{ return -EINVAL; }
-void etm_perf_del_symlink_cscfg(struct cscfg_config_desc *config_desc) {}
-
-#endif /* CONFIG_CORESIGHT */
-
int __init etm_perf_init(void);
void etm_perf_exit(void);
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH v5 10/17] coresight: Clarify comments around the PID of the sink owner
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (8 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 09/17] coresight: Remove unused ETM Perf stubs James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-12 10:20 ` [PATCH v5 11/17] coresight: Move struct coresight_trace_id_map to common header James Clark
` (6 subsequent siblings)
16 siblings, 0 replies; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
"Process being monitored" and "pid of the process to monitor" imply that
this would be the same PID if there were two sessions targeting the same
process. But this is actually the PID of the process that did the Perf
event open call, rather than the target of the session. So update the
comments to make this clearer.
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
drivers/hwtracing/coresight/coresight-tmc-etr.c | 5 +++--
drivers/hwtracing/coresight/coresight-tmc.h | 5 +++--
2 files changed, 6 insertions(+), 4 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-tmc-etr.c b/drivers/hwtracing/coresight/coresight-tmc-etr.c
index e75428fa1592..8962fc27d04f 100644
--- a/drivers/hwtracing/coresight/coresight-tmc-etr.c
+++ b/drivers/hwtracing/coresight/coresight-tmc-etr.c
@@ -36,7 +36,8 @@ struct etr_buf_hw {
* etr_perf_buffer - Perf buffer used for ETR
* @drvdata - The ETR drvdaga this buffer has been allocated for.
* @etr_buf - Actual buffer used by the ETR
- * @pid - The PID this etr_perf_buffer belongs to.
+ * @pid - The PID of the session owner that etr_perf_buffer
+ * belongs to.
* @snaphost - Perf session mode
* @nr_pages - Number of pages in the ring buffer.
* @pages - Array of Pages in the ring buffer.
@@ -1662,7 +1663,7 @@ static int tmc_enable_etr_sink_perf(struct coresight_device *csdev, void *data)
goto unlock_out;
}
- /* Get a handle on the pid of the process to monitor */
+ /* Get a handle on the pid of the session owner */
pid = etr_perf->pid;
/* Do not proceed if this device is associated with another session */
diff --git a/drivers/hwtracing/coresight/coresight-tmc.h b/drivers/hwtracing/coresight/coresight-tmc.h
index c77763b49de0..2671926be62a 100644
--- a/drivers/hwtracing/coresight/coresight-tmc.h
+++ b/drivers/hwtracing/coresight/coresight-tmc.h
@@ -171,8 +171,9 @@ struct etr_buf {
* @csdev: component vitals needed by the framework.
* @miscdev: specifics to handle "/dev/xyz.tmc" entry.
* @spinlock: only one at a time pls.
- * @pid: Process ID of the process being monitored by the session
- * that is using this component.
+ * @pid: Process ID of the process that owns the session that is using
+ * this component. For example this would be the pid of the Perf
+ * process.
* @buf: Snapshot of the trace data for ETF/ETB.
* @etr_buf: details of buffer used in TMC-ETR
* @len: size of the available trace for ETF/ETB.
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH v5 11/17] coresight: Move struct coresight_trace_id_map to common header
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (9 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 10/17] coresight: Clarify comments around the PID of the sink owner James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-12 10:20 ` [PATCH v5 12/17] coresight: Expose map arguments in trace ID API James Clark
` (5 subsequent siblings)
16 siblings, 0 replies; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
The trace ID maps will need to be created and stored by the core and
Perf code so move the definition up to the common header.
Reviewed-by: Anshuman Khandual <anshuman.khandual@arm.com>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
.../hwtracing/coresight/coresight-trace-id.c | 1 +
.../hwtracing/coresight/coresight-trace-id.h | 19 -------------------
include/linux/coresight.h | 18 ++++++++++++++++++
3 files changed, 19 insertions(+), 19 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
index af5b4ef59cea..19005b5b4dc4 100644
--- a/drivers/hwtracing/coresight/coresight-trace-id.c
+++ b/drivers/hwtracing/coresight/coresight-trace-id.c
@@ -3,6 +3,7 @@
* Copyright (c) 2022, Linaro Limited, All rights reserved.
* Author: Mike Leach <mike.leach@linaro.org>
*/
+#include <linux/coresight.h>
#include <linux/coresight-pmu.h>
#include <linux/cpumask.h>
#include <linux/kernel.h>
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.h b/drivers/hwtracing/coresight/coresight-trace-id.h
index 3797777d367e..49438a96fcc6 100644
--- a/drivers/hwtracing/coresight/coresight-trace-id.h
+++ b/drivers/hwtracing/coresight/coresight-trace-id.h
@@ -32,10 +32,6 @@
#include <linux/bitops.h>
#include <linux/types.h>
-
-/* architecturally we have 128 IDs some of which are reserved */
-#define CORESIGHT_TRACE_IDS_MAX 128
-
/* ID 0 is reserved */
#define CORESIGHT_TRACE_ID_RES_0 0
@@ -46,21 +42,6 @@
#define IS_VALID_CS_TRACE_ID(id) \
((id > CORESIGHT_TRACE_ID_RES_0) && (id < CORESIGHT_TRACE_ID_RES_TOP))
-/**
- * Trace ID map.
- *
- * @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs.
- * Initialised so that the reserved IDs are permanently marked as
- * in use.
- * @pend_rel_ids: CPU IDs that have been released by the trace source but not
- * yet marked as available, to allow re-allocation to the same
- * CPU during a perf session.
- */
-struct coresight_trace_id_map {
- DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
- DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
-};
-
/* Allocate and release IDs for a single default trace ID map */
/**
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index f09ace92176e..c16c61a8411d 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -218,6 +218,24 @@ struct coresight_sysfs_link {
const char *target_name;
};
+/* architecturally we have 128 IDs some of which are reserved */
+#define CORESIGHT_TRACE_IDS_MAX 128
+
+/**
+ * Trace ID map.
+ *
+ * @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs.
+ * Initialised so that the reserved IDs are permanently marked as
+ * in use.
+ * @pend_rel_ids: CPU IDs that have been released by the trace source but not
+ * yet marked as available, to allow re-allocation to the same
+ * CPU during a perf session.
+ */
+struct coresight_trace_id_map {
+ DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
+ DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
+};
+
/**
* struct coresight_device - representation of a device as used by the framework
* @pdata: Platform data with device connections associated to this device.
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* [PATCH v5 12/17] coresight: Expose map arguments in trace ID API
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (10 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 11/17] coresight: Move struct coresight_trace_id_map to common header James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-17 15:03 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 13/17] coresight: Make CPU id map a property of a trace ID map James Clark
` (4 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
The trace ID API is currently hard coded to always use the global map.
Add public versions that allow the map to be passed in so that Perf
mode can use per-sink maps. Keep the non-map versions so that sysfs
mode can continue to use the default global map.
System ID functions are unchanged because they will always use the
default map.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
.../hwtracing/coresight/coresight-trace-id.c | 36 ++++++++++++++-----
.../hwtracing/coresight/coresight-trace-id.h | 20 +++++++++--
2 files changed, 45 insertions(+), 11 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
index 19005b5b4dc4..5561989a03fa 100644
--- a/drivers/hwtracing/coresight/coresight-trace-id.c
+++ b/drivers/hwtracing/coresight/coresight-trace-id.c
@@ -12,7 +12,7 @@
#include "coresight-trace-id.h"
-/* Default trace ID map. Used on systems that don't require per sink mappings */
+/* Default trace ID map. Used in sysfs mode and for system sources */
static struct coresight_trace_id_map id_map_default;
/* maintain a record of the mapping of IDs and pending releases per cpu */
@@ -47,7 +47,7 @@ static void coresight_trace_id_dump_table(struct coresight_trace_id_map *id_map,
#endif
/* unlocked read of current trace ID value for given CPU */
-static int _coresight_trace_id_read_cpu_id(int cpu)
+static int _coresight_trace_id_read_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
{
return atomic_read(&per_cpu(cpu_id, cpu));
}
@@ -152,7 +152,7 @@ static void coresight_trace_id_release_all_pending(void)
DUMP_ID_MAP(id_map);
}
-static int coresight_trace_id_map_get_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
+static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
{
unsigned long flags;
int id;
@@ -160,7 +160,7 @@ static int coresight_trace_id_map_get_cpu_id(int cpu, struct coresight_trace_id_
spin_lock_irqsave(&id_map_lock, flags);
/* check for existing allocation for this CPU */
- id = _coresight_trace_id_read_cpu_id(cpu);
+ id = _coresight_trace_id_read_cpu_id(cpu, id_map);
if (id)
goto get_cpu_id_clr_pend;
@@ -196,13 +196,13 @@ static int coresight_trace_id_map_get_cpu_id(int cpu, struct coresight_trace_id_
return id;
}
-static void coresight_trace_id_map_put_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
+static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
{
unsigned long flags;
int id;
/* check for existing allocation for this CPU */
- id = _coresight_trace_id_read_cpu_id(cpu);
+ id = _coresight_trace_id_read_cpu_id(cpu, id_map);
if (!id)
return;
@@ -254,22 +254,40 @@ static void coresight_trace_id_map_put_system_id(struct coresight_trace_id_map *
int coresight_trace_id_get_cpu_id(int cpu)
{
- return coresight_trace_id_map_get_cpu_id(cpu, &id_map_default);
+ return _coresight_trace_id_get_cpu_id(cpu, &id_map_default);
}
EXPORT_SYMBOL_GPL(coresight_trace_id_get_cpu_id);
+int coresight_trace_id_get_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map)
+{
+ return _coresight_trace_id_get_cpu_id(cpu, id_map);
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_get_cpu_id_map);
+
void coresight_trace_id_put_cpu_id(int cpu)
{
- coresight_trace_id_map_put_cpu_id(cpu, &id_map_default);
+ _coresight_trace_id_put_cpu_id(cpu, &id_map_default);
}
EXPORT_SYMBOL_GPL(coresight_trace_id_put_cpu_id);
+void coresight_trace_id_put_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map)
+{
+ _coresight_trace_id_put_cpu_id(cpu, id_map);
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_put_cpu_id_map);
+
int coresight_trace_id_read_cpu_id(int cpu)
{
- return _coresight_trace_id_read_cpu_id(cpu);
+ return _coresight_trace_id_read_cpu_id(cpu, &id_map_default);
}
EXPORT_SYMBOL_GPL(coresight_trace_id_read_cpu_id);
+int coresight_trace_id_read_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map)
+{
+ return _coresight_trace_id_read_cpu_id(cpu, id_map);
+}
+EXPORT_SYMBOL_GPL(coresight_trace_id_read_cpu_id_map);
+
int coresight_trace_id_get_system_id(void)
{
return coresight_trace_id_map_get_system_id(&id_map_default);
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.h b/drivers/hwtracing/coresight/coresight-trace-id.h
index 49438a96fcc6..840babdd0794 100644
--- a/drivers/hwtracing/coresight/coresight-trace-id.h
+++ b/drivers/hwtracing/coresight/coresight-trace-id.h
@@ -42,8 +42,6 @@
#define IS_VALID_CS_TRACE_ID(id) \
((id > CORESIGHT_TRACE_ID_RES_0) && (id < CORESIGHT_TRACE_ID_RES_TOP))
-/* Allocate and release IDs for a single default trace ID map */
-
/**
* Read and optionally allocate a CoreSight trace ID and associate with a CPU.
*
@@ -59,6 +57,12 @@
*/
int coresight_trace_id_get_cpu_id(int cpu);
+/**
+ * Version of coresight_trace_id_get_cpu_id() that allows the ID map to operate
+ * on to be provided.
+ */
+int coresight_trace_id_get_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map);
+
/**
* Release an allocated trace ID associated with the CPU.
*
@@ -72,6 +76,12 @@ int coresight_trace_id_get_cpu_id(int cpu);
*/
void coresight_trace_id_put_cpu_id(int cpu);
+/**
+ * Version of coresight_trace_id_put_cpu_id() that allows the ID map to operate
+ * on to be provided.
+ */
+void coresight_trace_id_put_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map);
+
/**
* Read the current allocated CoreSight Trace ID value for the CPU.
*
@@ -92,6 +102,12 @@ void coresight_trace_id_put_cpu_id(int cpu);
*/
int coresight_trace_id_read_cpu_id(int cpu);
+/**
+ * Version of coresight_trace_id_read_cpu_id() that allows the ID map to operate
+ * on to be provided.
+ */
+int coresight_trace_id_read_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map);
+
/**
* Allocate a CoreSight trace ID for a system component.
*
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 12/17] coresight: Expose map arguments in trace ID API
2024-07-12 10:20 ` [PATCH v5 12/17] coresight: Expose map arguments in trace ID API James Clark
@ 2024-07-17 15:03 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-17 15:03 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:23, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> The trace ID API is currently hard coded to always use the global map.
> Add public versions that allow the map to be passed in so that Perf
> mode can use per-sink maps. Keep the non-map versions so that sysfs
> mode can continue to use the default global map.
>
> System ID functions are unchanged because they will always use the
> default map.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> .../hwtracing/coresight/coresight-trace-id.c | 36 ++++++++++++++-----
> .../hwtracing/coresight/coresight-trace-id.h | 20 +++++++++--
> 2 files changed, 45 insertions(+), 11 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
> index 19005b5b4dc4..5561989a03fa 100644
> --- a/drivers/hwtracing/coresight/coresight-trace-id.c
> +++ b/drivers/hwtracing/coresight/coresight-trace-id.c
> @@ -12,7 +12,7 @@
>
> #include "coresight-trace-id.h"
>
> -/* Default trace ID map. Used on systems that don't require per sink mappings */
> +/* Default trace ID map. Used in sysfs mode and for system sources */
> static struct coresight_trace_id_map id_map_default;
>
> /* maintain a record of the mapping of IDs and pending releases per cpu */
> @@ -47,7 +47,7 @@ static void coresight_trace_id_dump_table(struct coresight_trace_id_map *id_map,
> #endif
>
> /* unlocked read of current trace ID value for given CPU */
> -static int _coresight_trace_id_read_cpu_id(int cpu)
> +static int _coresight_trace_id_read_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
> {
> return atomic_read(&per_cpu(cpu_id, cpu));
> }
> @@ -152,7 +152,7 @@ static void coresight_trace_id_release_all_pending(void)
> DUMP_ID_MAP(id_map);
> }
>
> -static int coresight_trace_id_map_get_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
> +static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
> {
> unsigned long flags;
> int id;
> @@ -160,7 +160,7 @@ static int coresight_trace_id_map_get_cpu_id(int cpu, struct coresight_trace_id_
> spin_lock_irqsave(&id_map_lock, flags);
>
> /* check for existing allocation for this CPU */
> - id = _coresight_trace_id_read_cpu_id(cpu);
> + id = _coresight_trace_id_read_cpu_id(cpu, id_map);
> if (id)
> goto get_cpu_id_clr_pend;
>
> @@ -196,13 +196,13 @@ static int coresight_trace_id_map_get_cpu_id(int cpu, struct coresight_trace_id_
> return id;
> }
>
> -static void coresight_trace_id_map_put_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
> +static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
> {
> unsigned long flags;
> int id;
>
> /* check for existing allocation for this CPU */
> - id = _coresight_trace_id_read_cpu_id(cpu);
> + id = _coresight_trace_id_read_cpu_id(cpu, id_map);
> if (!id)
> return;
>
> @@ -254,22 +254,40 @@ static void coresight_trace_id_map_put_system_id(struct coresight_trace_id_map *
>
> int coresight_trace_id_get_cpu_id(int cpu)
> {
> - return coresight_trace_id_map_get_cpu_id(cpu, &id_map_default);
> + return _coresight_trace_id_get_cpu_id(cpu, &id_map_default);
> }
> EXPORT_SYMBOL_GPL(coresight_trace_id_get_cpu_id);
>
> +int coresight_trace_id_get_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map)
> +{
> + return _coresight_trace_id_get_cpu_id(cpu, id_map);
> +}
> +EXPORT_SYMBOL_GPL(coresight_trace_id_get_cpu_id_map);
> +
> void coresight_trace_id_put_cpu_id(int cpu)
> {
> - coresight_trace_id_map_put_cpu_id(cpu, &id_map_default);
> + _coresight_trace_id_put_cpu_id(cpu, &id_map_default);
> }
> EXPORT_SYMBOL_GPL(coresight_trace_id_put_cpu_id);
>
> +void coresight_trace_id_put_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map)
> +{
> + _coresight_trace_id_put_cpu_id(cpu, id_map);
> +}
> +EXPORT_SYMBOL_GPL(coresight_trace_id_put_cpu_id_map);
> +
> int coresight_trace_id_read_cpu_id(int cpu)
> {
> - return _coresight_trace_id_read_cpu_id(cpu);
> + return _coresight_trace_id_read_cpu_id(cpu, &id_map_default);
> }
> EXPORT_SYMBOL_GPL(coresight_trace_id_read_cpu_id);
>
> +int coresight_trace_id_read_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map)
> +{
> + return _coresight_trace_id_read_cpu_id(cpu, id_map);
> +}
> +EXPORT_SYMBOL_GPL(coresight_trace_id_read_cpu_id_map);
> +
> int coresight_trace_id_get_system_id(void)
> {
> return coresight_trace_id_map_get_system_id(&id_map_default);
> diff --git a/drivers/hwtracing/coresight/coresight-trace-id.h b/drivers/hwtracing/coresight/coresight-trace-id.h
> index 49438a96fcc6..840babdd0794 100644
> --- a/drivers/hwtracing/coresight/coresight-trace-id.h
> +++ b/drivers/hwtracing/coresight/coresight-trace-id.h
> @@ -42,8 +42,6 @@
> #define IS_VALID_CS_TRACE_ID(id) \
> ((id > CORESIGHT_TRACE_ID_RES_0) && (id < CORESIGHT_TRACE_ID_RES_TOP))
>
> -/* Allocate and release IDs for a single default trace ID map */
> -
> /**
> * Read and optionally allocate a CoreSight trace ID and associate with a CPU.
> *
> @@ -59,6 +57,12 @@
> */
> int coresight_trace_id_get_cpu_id(int cpu);
>
> +/**
> + * Version of coresight_trace_id_get_cpu_id() that allows the ID map to operate
> + * on to be provided.
> + */
> +int coresight_trace_id_get_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map);
> +
> /**
> * Release an allocated trace ID associated with the CPU.
> *
> @@ -72,6 +76,12 @@ int coresight_trace_id_get_cpu_id(int cpu);
> */
> void coresight_trace_id_put_cpu_id(int cpu);
>
> +/**
> + * Version of coresight_trace_id_put_cpu_id() that allows the ID map to operate
> + * on to be provided.
> + */
> +void coresight_trace_id_put_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map);
> +
> /**
> * Read the current allocated CoreSight Trace ID value for the CPU.
> *
> @@ -92,6 +102,12 @@ void coresight_trace_id_put_cpu_id(int cpu);
> */
> int coresight_trace_id_read_cpu_id(int cpu);
>
> +/**
> + * Version of coresight_trace_id_read_cpu_id() that allows the ID map to operate
> + * on to be provided.
> + */
> +int coresight_trace_id_read_cpu_id_map(int cpu, struct coresight_trace_id_map *id_map);
> +
> /**
> * Allocate a CoreSight trace ID for a system component.
> *
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 13/17] coresight: Make CPU id map a property of a trace ID map
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (11 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 12/17] coresight: Expose map arguments in trace ID API James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-17 15:03 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 14/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (3 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
The global CPU ID mappings won't work for per-sink ID maps so move it to
the ID map struct. coresight_trace_id_release_all_pending() is hard
coded to operate on the default map, but once Perf sessions use their
own maps the pending release mechanism will be deleted. So it doesn't
need to be extended to accept a trace ID map argument at this point.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
drivers/hwtracing/coresight/coresight-trace-id.c | 16 +++++++++-------
include/linux/coresight.h | 1 +
2 files changed, 10 insertions(+), 7 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
index 5561989a03fa..8a777c0af6ea 100644
--- a/drivers/hwtracing/coresight/coresight-trace-id.c
+++ b/drivers/hwtracing/coresight/coresight-trace-id.c
@@ -13,10 +13,12 @@
#include "coresight-trace-id.h"
/* Default trace ID map. Used in sysfs mode and for system sources */
-static struct coresight_trace_id_map id_map_default;
+static DEFINE_PER_CPU(atomic_t, id_map_default_cpu_ids) = ATOMIC_INIT(0);
+static struct coresight_trace_id_map id_map_default = {
+ .cpu_map = &id_map_default_cpu_ids
+};
-/* maintain a record of the mapping of IDs and pending releases per cpu */
-static DEFINE_PER_CPU(atomic_t, cpu_id) = ATOMIC_INIT(0);
+/* maintain a record of the pending releases per cpu */
static cpumask_t cpu_id_release_pending;
/* perf session active counter */
@@ -49,7 +51,7 @@ static void coresight_trace_id_dump_table(struct coresight_trace_id_map *id_map,
/* unlocked read of current trace ID value for given CPU */
static int _coresight_trace_id_read_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
{
- return atomic_read(&per_cpu(cpu_id, cpu));
+ return atomic_read(per_cpu_ptr(id_map->cpu_map, cpu));
}
/* look for next available odd ID, return 0 if none found */
@@ -145,7 +147,7 @@ static void coresight_trace_id_release_all_pending(void)
clear_bit(bit, id_map->pend_rel_ids);
}
for_each_cpu(cpu, &cpu_id_release_pending) {
- atomic_set(&per_cpu(cpu_id, cpu), 0);
+ atomic_set(per_cpu_ptr(id_map_default.cpu_map, cpu), 0);
cpumask_clear_cpu(cpu, &cpu_id_release_pending);
}
spin_unlock_irqrestore(&id_map_lock, flags);
@@ -181,7 +183,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
goto get_cpu_id_out_unlock;
/* allocate the new id to the cpu */
- atomic_set(&per_cpu(cpu_id, cpu), id);
+ atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), id);
get_cpu_id_clr_pend:
/* we are (re)using this ID - so ensure it is not marked for release */
@@ -215,7 +217,7 @@ static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_ma
} else {
/* otherwise clear id */
coresight_trace_id_free(id, id_map);
- atomic_set(&per_cpu(cpu_id, cpu), 0);
+ atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
}
spin_unlock_irqrestore(&id_map_lock, flags);
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index c16c61a8411d..7d62b88bfb5c 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -234,6 +234,7 @@ struct coresight_sysfs_link {
struct coresight_trace_id_map {
DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
+ atomic_t __percpu *cpu_map;
};
/**
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 13/17] coresight: Make CPU id map a property of a trace ID map
2024-07-12 10:20 ` [PATCH v5 13/17] coresight: Make CPU id map a property of a trace ID map James Clark
@ 2024-07-17 15:03 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-17 15:03 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:23, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> The global CPU ID mappings won't work for per-sink ID maps so move it to
> the ID map struct. coresight_trace_id_release_all_pending() is hard
> coded to operate on the default map, but once Perf sessions use their
> own maps the pending release mechanism will be deleted. So it doesn't
> need to be extended to accept a trace ID map argument at this point.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> drivers/hwtracing/coresight/coresight-trace-id.c | 16 +++++++++-------
> include/linux/coresight.h | 1 +
> 2 files changed, 10 insertions(+), 7 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
> index 5561989a03fa..8a777c0af6ea 100644
> --- a/drivers/hwtracing/coresight/coresight-trace-id.c
> +++ b/drivers/hwtracing/coresight/coresight-trace-id.c
> @@ -13,10 +13,12 @@
> #include "coresight-trace-id.h"
>
> /* Default trace ID map. Used in sysfs mode and for system sources */
> -static struct coresight_trace_id_map id_map_default;
> +static DEFINE_PER_CPU(atomic_t, id_map_default_cpu_ids) = ATOMIC_INIT(0);
> +static struct coresight_trace_id_map id_map_default = {
> + .cpu_map = &id_map_default_cpu_ids
> +};
>
> -/* maintain a record of the mapping of IDs and pending releases per cpu */
> -static DEFINE_PER_CPU(atomic_t, cpu_id) = ATOMIC_INIT(0);
> +/* maintain a record of the pending releases per cpu */
> static cpumask_t cpu_id_release_pending;
>
> /* perf session active counter */
> @@ -49,7 +51,7 @@ static void coresight_trace_id_dump_table(struct coresight_trace_id_map *id_map,
> /* unlocked read of current trace ID value for given CPU */
> static int _coresight_trace_id_read_cpu_id(int cpu, struct coresight_trace_id_map *id_map)
> {
> - return atomic_read(&per_cpu(cpu_id, cpu));
> + return atomic_read(per_cpu_ptr(id_map->cpu_map, cpu));
> }
>
> /* look for next available odd ID, return 0 if none found */
> @@ -145,7 +147,7 @@ static void coresight_trace_id_release_all_pending(void)
> clear_bit(bit, id_map->pend_rel_ids);
> }
> for_each_cpu(cpu, &cpu_id_release_pending) {
> - atomic_set(&per_cpu(cpu_id, cpu), 0);
> + atomic_set(per_cpu_ptr(id_map_default.cpu_map, cpu), 0);
> cpumask_clear_cpu(cpu, &cpu_id_release_pending);
> }
> spin_unlock_irqrestore(&id_map_lock, flags);
> @@ -181,7 +183,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
> goto get_cpu_id_out_unlock;
>
> /* allocate the new id to the cpu */
> - atomic_set(&per_cpu(cpu_id, cpu), id);
> + atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), id);
>
> get_cpu_id_clr_pend:
> /* we are (re)using this ID - so ensure it is not marked for release */
> @@ -215,7 +217,7 @@ static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_ma
> } else {
> /* otherwise clear id */
> coresight_trace_id_free(id, id_map);
> - atomic_set(&per_cpu(cpu_id, cpu), 0);
> + atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
> }
>
> spin_unlock_irqrestore(&id_map_lock, flags);
> diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> index c16c61a8411d..7d62b88bfb5c 100644
> --- a/include/linux/coresight.h
> +++ b/include/linux/coresight.h
> @@ -234,6 +234,7 @@ struct coresight_sysfs_link {
> struct coresight_trace_id_map {
> DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
> DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
> + atomic_t __percpu *cpu_map;
> };
>
> /**
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 14/17] coresight: Use per-sink trace ID maps for Perf sessions
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (12 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 13/17] coresight: Make CPU id map a property of a trace ID map James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-17 15:03 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 15/17] coresight: Remove pending trace ID release mechanism James Clark
` (2 subsequent siblings)
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
This will allow sessions with more than CORESIGHT_TRACE_IDS_MAX ETMs
as long as there are fewer than that many ETMs connected to each sink.
Each sink owns its own trace ID map, and any Perf session connecting to
that sink will allocate from it, even if the sink is currently in use by
other users. This is similar to the existing behavior where the dynamic
trace IDs are constant as long as there is any concurrent Perf session
active. It's not completely optimal because slightly more IDs will be
used than necessary, but the optimal solution involves tracking the PIDs
of each session and allocating ID maps based on the session owner. This
is difficult to do with the combination of per-thread and per-cpu modes
and some scheduling issues. The complexity of this isn't likely to worth
it because even with multiple users they'd just see a difference in the
ordering of ID allocations rather than hitting any limits (unless the
hardware does have too many ETMs connected to one sink).
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
drivers/hwtracing/coresight/coresight-core.c | 10 ++++++++++
drivers/hwtracing/coresight/coresight-dummy.c | 3 ++-
drivers/hwtracing/coresight/coresight-etm-perf.c | 15 ++++++++++-----
.../hwtracing/coresight/coresight-etm3x-core.c | 9 +++++----
.../hwtracing/coresight/coresight-etm4x-core.c | 9 +++++----
drivers/hwtracing/coresight/coresight-stm.c | 3 ++-
drivers/hwtracing/coresight/coresight-sysfs.c | 3 ++-
drivers/hwtracing/coresight/coresight-tpdm.c | 3 ++-
include/linux/coresight.h | 3 ++-
9 files changed, 40 insertions(+), 18 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index 9fc6f6b863e0..faf560ba8d64 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -902,6 +902,7 @@ static void coresight_device_release(struct device *dev)
struct coresight_device *csdev = to_coresight_device(dev);
fwnode_handle_put(csdev->dev.fwnode);
+ free_percpu(csdev->perf_sink_id_map.cpu_map);
kfree(csdev);
}
@@ -1159,6 +1160,15 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
dev_set_name(&csdev->dev, "%s", desc->name);
+ if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
+ csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+ csdev->perf_sink_id_map.cpu_map = alloc_percpu(atomic_t);
+ if (!csdev->perf_sink_id_map.cpu_map) {
+ kfree(csdev);
+ ret = -ENOMEM;
+ goto err_out;
+ }
+ }
/*
* Make sure the device registration and the connection fixup
* are synchronised, so that we don't see uninitialised devices
diff --git a/drivers/hwtracing/coresight/coresight-dummy.c b/drivers/hwtracing/coresight/coresight-dummy.c
index ac70c0b491be..1f1b9ad160f6 100644
--- a/drivers/hwtracing/coresight/coresight-dummy.c
+++ b/drivers/hwtracing/coresight/coresight-dummy.c
@@ -21,7 +21,8 @@ DEFINE_CORESIGHT_DEVLIST(source_devs, "dummy_source");
DEFINE_CORESIGHT_DEVLIST(sink_devs, "dummy_sink");
static int dummy_source_enable(struct coresight_device *csdev,
- struct perf_event *event, enum cs_mode mode)
+ struct perf_event *event, enum cs_mode mode,
+ __maybe_unused struct coresight_trace_id_map *id_map)
{
dev_dbg(csdev->dev.parent, "Dummy source enabled\n");
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index c0c60e6a1703..7fb55dafb639 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -229,10 +229,13 @@ static void free_event_data(struct work_struct *work)
struct list_head **ppath;
ppath = etm_event_cpu_path_ptr(event_data, cpu);
- if (!(IS_ERR_OR_NULL(*ppath)))
+ if (!(IS_ERR_OR_NULL(*ppath))) {
+ struct coresight_device *sink = coresight_get_sink(*ppath);
+
+ coresight_trace_id_put_cpu_id_map(cpu, &sink->perf_sink_id_map);
coresight_release_path(*ppath);
+ }
*ppath = NULL;
- coresight_trace_id_put_cpu_id(cpu);
}
/* mark perf event as done for trace id allocator */
@@ -401,7 +404,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
}
/* ensure we can allocate a trace ID for this CPU */
- trace_id = coresight_trace_id_get_cpu_id(cpu);
+ trace_id = coresight_trace_id_get_cpu_id_map(cpu, &sink->perf_sink_id_map);
if (!IS_VALID_CS_TRACE_ID(trace_id)) {
cpumask_clear_cpu(cpu, mask);
coresight_release_path(path);
@@ -495,7 +498,8 @@ static void etm_event_start(struct perf_event *event, int flags)
goto fail_end_stop;
/* Finally enable the tracer */
- if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
+ if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF,
+ &sink->perf_sink_id_map))
goto fail_disable_path;
/*
@@ -507,7 +511,8 @@ static void etm_event_start(struct perf_event *event, int flags)
hw_id = FIELD_PREP(CS_AUX_HW_ID_VERSION_MASK,
CS_AUX_HW_ID_CURR_VERSION);
hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK,
- coresight_trace_id_read_cpu_id(cpu));
+ coresight_trace_id_read_cpu_id_map(cpu,
+ &sink->perf_sink_id_map));
perf_report_aux_output_id(event, hw_id);
}
diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c
index 8b362605d242..c103f4c70f5d 100644
--- a/drivers/hwtracing/coresight/coresight-etm3x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c
@@ -481,7 +481,8 @@ void etm_release_trace_id(struct etm_drvdata *drvdata)
}
static int etm_enable_perf(struct coresight_device *csdev,
- struct perf_event *event)
+ struct perf_event *event,
+ struct coresight_trace_id_map *id_map)
{
struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
int trace_id;
@@ -500,7 +501,7 @@ static int etm_enable_perf(struct coresight_device *csdev,
* with perf locks - we know the ID cannot change until perf shuts down
* the session
*/
- trace_id = coresight_trace_id_read_cpu_id(drvdata->cpu);
+ trace_id = coresight_trace_id_read_cpu_id_map(drvdata->cpu, id_map);
if (!IS_VALID_CS_TRACE_ID(trace_id)) {
dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n",
dev_name(&drvdata->csdev->dev), drvdata->cpu);
@@ -553,7 +554,7 @@ static int etm_enable_sysfs(struct coresight_device *csdev)
}
static int etm_enable(struct coresight_device *csdev, struct perf_event *event,
- enum cs_mode mode)
+ enum cs_mode mode, struct coresight_trace_id_map *id_map)
{
int ret;
struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -568,7 +569,7 @@ static int etm_enable(struct coresight_device *csdev, struct perf_event *event,
ret = etm_enable_sysfs(csdev);
break;
case CS_MODE_PERF:
- ret = etm_enable_perf(csdev, event);
+ ret = etm_enable_perf(csdev, event, id_map);
break;
default:
ret = -EINVAL;
diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
index bf01f01964cf..66d44a404ad0 100644
--- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
+++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
@@ -752,7 +752,8 @@ static int etm4_parse_event_config(struct coresight_device *csdev,
}
static int etm4_enable_perf(struct coresight_device *csdev,
- struct perf_event *event)
+ struct perf_event *event,
+ struct coresight_trace_id_map *id_map)
{
int ret = 0, trace_id;
struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
@@ -775,7 +776,7 @@ static int etm4_enable_perf(struct coresight_device *csdev,
* with perf locks - we know the ID cannot change until perf shuts down
* the session
*/
- trace_id = coresight_trace_id_read_cpu_id(drvdata->cpu);
+ trace_id = coresight_trace_id_read_cpu_id_map(drvdata->cpu, id_map);
if (!IS_VALID_CS_TRACE_ID(trace_id)) {
dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n",
dev_name(&drvdata->csdev->dev), drvdata->cpu);
@@ -837,7 +838,7 @@ static int etm4_enable_sysfs(struct coresight_device *csdev)
}
static int etm4_enable(struct coresight_device *csdev, struct perf_event *event,
- enum cs_mode mode)
+ enum cs_mode mode, struct coresight_trace_id_map *id_map)
{
int ret;
@@ -851,7 +852,7 @@ static int etm4_enable(struct coresight_device *csdev, struct perf_event *event,
ret = etm4_enable_sysfs(csdev);
break;
case CS_MODE_PERF:
- ret = etm4_enable_perf(csdev, event);
+ ret = etm4_enable_perf(csdev, event, id_map);
break;
default:
ret = -EINVAL;
diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
index 117dbb484543..cb3e04755c99 100644
--- a/drivers/hwtracing/coresight/coresight-stm.c
+++ b/drivers/hwtracing/coresight/coresight-stm.c
@@ -194,7 +194,8 @@ static void stm_enable_hw(struct stm_drvdata *drvdata)
}
static int stm_enable(struct coresight_device *csdev, struct perf_event *event,
- enum cs_mode mode)
+ enum cs_mode mode,
+ __maybe_unused struct coresight_trace_id_map *trace_id)
{
struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c
index 1e67cc7758d7..a01c9e54e2ed 100644
--- a/drivers/hwtracing/coresight/coresight-sysfs.c
+++ b/drivers/hwtracing/coresight/coresight-sysfs.c
@@ -9,6 +9,7 @@
#include <linux/kernel.h>
#include "coresight-priv.h"
+#include "coresight-trace-id.h"
/*
* Use IDR to map the hash of the source's device name
@@ -63,7 +64,7 @@ static int coresight_enable_source_sysfs(struct coresight_device *csdev,
*/
lockdep_assert_held(&coresight_mutex);
if (coresight_get_mode(csdev) != CS_MODE_SYSFS) {
- ret = source_ops(csdev)->enable(csdev, data, mode);
+ ret = source_ops(csdev)->enable(csdev, data, mode, NULL);
if (ret)
return ret;
}
diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c
index 0726f8842552..0a5e20cf23e8 100644
--- a/drivers/hwtracing/coresight/coresight-tpdm.c
+++ b/drivers/hwtracing/coresight/coresight-tpdm.c
@@ -439,7 +439,8 @@ static void __tpdm_enable(struct tpdm_drvdata *drvdata)
}
static int tpdm_enable(struct coresight_device *csdev, struct perf_event *event,
- enum cs_mode mode)
+ enum cs_mode mode,
+ __maybe_unused struct coresight_trace_id_map *id_map)
{
struct tpdm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 7d62b88bfb5c..9c3067e2e38b 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -290,6 +290,7 @@ struct coresight_device {
bool sysfs_sink_activated;
struct dev_ext_attribute *ea;
struct coresight_device *def_sink;
+ struct coresight_trace_id_map perf_sink_id_map;
/* sysfs links between components */
int nr_links;
bool has_conns_grp;
@@ -384,7 +385,7 @@ struct coresight_ops_link {
struct coresight_ops_source {
int (*cpu_id)(struct coresight_device *csdev);
int (*enable)(struct coresight_device *csdev, struct perf_event *event,
- enum cs_mode mode);
+ enum cs_mode mode, struct coresight_trace_id_map *id_map);
void (*disable)(struct coresight_device *csdev,
struct perf_event *event);
};
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 14/17] coresight: Use per-sink trace ID maps for Perf sessions
2024-07-12 10:20 ` [PATCH v5 14/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
@ 2024-07-17 15:03 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-17 15:03 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:23, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> This will allow sessions with more than CORESIGHT_TRACE_IDS_MAX ETMs
> as long as there are fewer than that many ETMs connected to each sink.
>
> Each sink owns its own trace ID map, and any Perf session connecting to
> that sink will allocate from it, even if the sink is currently in use by
> other users. This is similar to the existing behavior where the dynamic
> trace IDs are constant as long as there is any concurrent Perf session
> active. It's not completely optimal because slightly more IDs will be
> used than necessary, but the optimal solution involves tracking the PIDs
> of each session and allocating ID maps based on the session owner. This
> is difficult to do with the combination of per-thread and per-cpu modes
> and some scheduling issues. The complexity of this isn't likely to worth
> it because even with multiple users they'd just see a difference in the
> ordering of ID allocations rather than hitting any limits (unless the
> hardware does have too many ETMs connected to one sink).
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> drivers/hwtracing/coresight/coresight-core.c | 10 ++++++++++
> drivers/hwtracing/coresight/coresight-dummy.c | 3 ++-
> drivers/hwtracing/coresight/coresight-etm-perf.c | 15 ++++++++++-----
> .../hwtracing/coresight/coresight-etm3x-core.c | 9 +++++----
> .../hwtracing/coresight/coresight-etm4x-core.c | 9 +++++----
> drivers/hwtracing/coresight/coresight-stm.c | 3 ++-
> drivers/hwtracing/coresight/coresight-sysfs.c | 3 ++-
> drivers/hwtracing/coresight/coresight-tpdm.c | 3 ++-
> include/linux/coresight.h | 3 ++-
> 9 files changed, 40 insertions(+), 18 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
> index 9fc6f6b863e0..faf560ba8d64 100644
> --- a/drivers/hwtracing/coresight/coresight-core.c
> +++ b/drivers/hwtracing/coresight/coresight-core.c
> @@ -902,6 +902,7 @@ static void coresight_device_release(struct device *dev)
> struct coresight_device *csdev = to_coresight_device(dev);
>
> fwnode_handle_put(csdev->dev.fwnode);
> + free_percpu(csdev->perf_sink_id_map.cpu_map);
> kfree(csdev);
> }
>
> @@ -1159,6 +1160,15 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
> csdev->dev.fwnode = fwnode_handle_get(dev_fwnode(desc->dev));
> dev_set_name(&csdev->dev, "%s", desc->name);
>
> + if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
> + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
> + csdev->perf_sink_id_map.cpu_map = alloc_percpu(atomic_t);
> + if (!csdev->perf_sink_id_map.cpu_map) {
> + kfree(csdev);
> + ret = -ENOMEM;
> + goto err_out;
> + }
> + }
> /*
> * Make sure the device registration and the connection fixup
> * are synchronised, so that we don't see uninitialised devices
> diff --git a/drivers/hwtracing/coresight/coresight-dummy.c b/drivers/hwtracing/coresight/coresight-dummy.c
> index ac70c0b491be..1f1b9ad160f6 100644
> --- a/drivers/hwtracing/coresight/coresight-dummy.c
> +++ b/drivers/hwtracing/coresight/coresight-dummy.c
> @@ -21,7 +21,8 @@ DEFINE_CORESIGHT_DEVLIST(source_devs, "dummy_source");
> DEFINE_CORESIGHT_DEVLIST(sink_devs, "dummy_sink");
>
> static int dummy_source_enable(struct coresight_device *csdev,
> - struct perf_event *event, enum cs_mode mode)
> + struct perf_event *event, enum cs_mode mode,
> + __maybe_unused struct coresight_trace_id_map *id_map)
> {
> dev_dbg(csdev->dev.parent, "Dummy source enabled\n");
>
> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> index c0c60e6a1703..7fb55dafb639 100644
> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> @@ -229,10 +229,13 @@ static void free_event_data(struct work_struct *work)
> struct list_head **ppath;
>
> ppath = etm_event_cpu_path_ptr(event_data, cpu);
> - if (!(IS_ERR_OR_NULL(*ppath)))
> + if (!(IS_ERR_OR_NULL(*ppath))) {
> + struct coresight_device *sink = coresight_get_sink(*ppath);
> +
> + coresight_trace_id_put_cpu_id_map(cpu, &sink->perf_sink_id_map);
> coresight_release_path(*ppath);
> + }
> *ppath = NULL;
> - coresight_trace_id_put_cpu_id(cpu);
> }
>
> /* mark perf event as done for trace id allocator */
> @@ -401,7 +404,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> }
>
> /* ensure we can allocate a trace ID for this CPU */
> - trace_id = coresight_trace_id_get_cpu_id(cpu);
> + trace_id = coresight_trace_id_get_cpu_id_map(cpu, &sink->perf_sink_id_map);
> if (!IS_VALID_CS_TRACE_ID(trace_id)) {
> cpumask_clear_cpu(cpu, mask);
> coresight_release_path(path);
> @@ -495,7 +498,8 @@ static void etm_event_start(struct perf_event *event, int flags)
> goto fail_end_stop;
>
> /* Finally enable the tracer */
> - if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF))
> + if (source_ops(csdev)->enable(csdev, event, CS_MODE_PERF,
> + &sink->perf_sink_id_map))
> goto fail_disable_path;
>
> /*
> @@ -507,7 +511,8 @@ static void etm_event_start(struct perf_event *event, int flags)
> hw_id = FIELD_PREP(CS_AUX_HW_ID_VERSION_MASK,
> CS_AUX_HW_ID_CURR_VERSION);
> hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK,
> - coresight_trace_id_read_cpu_id(cpu));
> + coresight_trace_id_read_cpu_id_map(cpu,
> + &sink->perf_sink_id_map));
> perf_report_aux_output_id(event, hw_id);
> }
>
> diff --git a/drivers/hwtracing/coresight/coresight-etm3x-core.c b/drivers/hwtracing/coresight/coresight-etm3x-core.c
> index 8b362605d242..c103f4c70f5d 100644
> --- a/drivers/hwtracing/coresight/coresight-etm3x-core.c
> +++ b/drivers/hwtracing/coresight/coresight-etm3x-core.c
> @@ -481,7 +481,8 @@ void etm_release_trace_id(struct etm_drvdata *drvdata)
> }
>
> static int etm_enable_perf(struct coresight_device *csdev,
> - struct perf_event *event)
> + struct perf_event *event,
> + struct coresight_trace_id_map *id_map)
> {
> struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
> int trace_id;
> @@ -500,7 +501,7 @@ static int etm_enable_perf(struct coresight_device *csdev,
> * with perf locks - we know the ID cannot change until perf shuts down
> * the session
> */
> - trace_id = coresight_trace_id_read_cpu_id(drvdata->cpu);
> + trace_id = coresight_trace_id_read_cpu_id_map(drvdata->cpu, id_map);
> if (!IS_VALID_CS_TRACE_ID(trace_id)) {
> dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n",
> dev_name(&drvdata->csdev->dev), drvdata->cpu);
> @@ -553,7 +554,7 @@ static int etm_enable_sysfs(struct coresight_device *csdev)
> }
>
> static int etm_enable(struct coresight_device *csdev, struct perf_event *event,
> - enum cs_mode mode)
> + enum cs_mode mode, struct coresight_trace_id_map *id_map)
> {
> int ret;
> struct etm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
> @@ -568,7 +569,7 @@ static int etm_enable(struct coresight_device *csdev, struct perf_event *event,
> ret = etm_enable_sysfs(csdev);
> break;
> case CS_MODE_PERF:
> - ret = etm_enable_perf(csdev, event);
> + ret = etm_enable_perf(csdev, event, id_map);
> break;
> default:
> ret = -EINVAL;
> diff --git a/drivers/hwtracing/coresight/coresight-etm4x-core.c b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> index bf01f01964cf..66d44a404ad0 100644
> --- a/drivers/hwtracing/coresight/coresight-etm4x-core.c
> +++ b/drivers/hwtracing/coresight/coresight-etm4x-core.c
> @@ -752,7 +752,8 @@ static int etm4_parse_event_config(struct coresight_device *csdev,
> }
>
> static int etm4_enable_perf(struct coresight_device *csdev,
> - struct perf_event *event)
> + struct perf_event *event,
> + struct coresight_trace_id_map *id_map)
> {
> int ret = 0, trace_id;
> struct etmv4_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
> @@ -775,7 +776,7 @@ static int etm4_enable_perf(struct coresight_device *csdev,
> * with perf locks - we know the ID cannot change until perf shuts down
> * the session
> */
> - trace_id = coresight_trace_id_read_cpu_id(drvdata->cpu);
> + trace_id = coresight_trace_id_read_cpu_id_map(drvdata->cpu, id_map);
> if (!IS_VALID_CS_TRACE_ID(trace_id)) {
> dev_err(&drvdata->csdev->dev, "Failed to set trace ID for %s on CPU%d\n",
> dev_name(&drvdata->csdev->dev), drvdata->cpu);
> @@ -837,7 +838,7 @@ static int etm4_enable_sysfs(struct coresight_device *csdev)
> }
>
> static int etm4_enable(struct coresight_device *csdev, struct perf_event *event,
> - enum cs_mode mode)
> + enum cs_mode mode, struct coresight_trace_id_map *id_map)
> {
> int ret;
>
> @@ -851,7 +852,7 @@ static int etm4_enable(struct coresight_device *csdev, struct perf_event *event,
> ret = etm4_enable_sysfs(csdev);
> break;
> case CS_MODE_PERF:
> - ret = etm4_enable_perf(csdev, event);
> + ret = etm4_enable_perf(csdev, event, id_map);
> break;
> default:
> ret = -EINVAL;
> diff --git a/drivers/hwtracing/coresight/coresight-stm.c b/drivers/hwtracing/coresight/coresight-stm.c
> index 117dbb484543..cb3e04755c99 100644
> --- a/drivers/hwtracing/coresight/coresight-stm.c
> +++ b/drivers/hwtracing/coresight/coresight-stm.c
> @@ -194,7 +194,8 @@ static void stm_enable_hw(struct stm_drvdata *drvdata)
> }
>
> static int stm_enable(struct coresight_device *csdev, struct perf_event *event,
> - enum cs_mode mode)
> + enum cs_mode mode,
> + __maybe_unused struct coresight_trace_id_map *trace_id)
> {
> struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
>
> diff --git a/drivers/hwtracing/coresight/coresight-sysfs.c b/drivers/hwtracing/coresight/coresight-sysfs.c
> index 1e67cc7758d7..a01c9e54e2ed 100644
> --- a/drivers/hwtracing/coresight/coresight-sysfs.c
> +++ b/drivers/hwtracing/coresight/coresight-sysfs.c
> @@ -9,6 +9,7 @@
> #include <linux/kernel.h>
>
> #include "coresight-priv.h"
> +#include "coresight-trace-id.h"
>
> /*
> * Use IDR to map the hash of the source's device name
> @@ -63,7 +64,7 @@ static int coresight_enable_source_sysfs(struct coresight_device *csdev,
> */
> lockdep_assert_held(&coresight_mutex);
> if (coresight_get_mode(csdev) != CS_MODE_SYSFS) {
> - ret = source_ops(csdev)->enable(csdev, data, mode);
> + ret = source_ops(csdev)->enable(csdev, data, mode, NULL);
> if (ret)
> return ret;
> }
> diff --git a/drivers/hwtracing/coresight/coresight-tpdm.c b/drivers/hwtracing/coresight/coresight-tpdm.c
> index 0726f8842552..0a5e20cf23e8 100644
> --- a/drivers/hwtracing/coresight/coresight-tpdm.c
> +++ b/drivers/hwtracing/coresight/coresight-tpdm.c
> @@ -439,7 +439,8 @@ static void __tpdm_enable(struct tpdm_drvdata *drvdata)
> }
>
> static int tpdm_enable(struct coresight_device *csdev, struct perf_event *event,
> - enum cs_mode mode)
> + enum cs_mode mode,
> + __maybe_unused struct coresight_trace_id_map *id_map)
> {
> struct tpdm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
>
> diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> index 7d62b88bfb5c..9c3067e2e38b 100644
> --- a/include/linux/coresight.h
> +++ b/include/linux/coresight.h
> @@ -290,6 +290,7 @@ struct coresight_device {
> bool sysfs_sink_activated;
> struct dev_ext_attribute *ea;
> struct coresight_device *def_sink;
> + struct coresight_trace_id_map perf_sink_id_map;
> /* sysfs links between components */
> int nr_links;
> bool has_conns_grp;
> @@ -384,7 +385,7 @@ struct coresight_ops_link {
> struct coresight_ops_source {
> int (*cpu_id)(struct coresight_device *csdev);
> int (*enable)(struct coresight_device *csdev, struct perf_event *event,
> - enum cs_mode mode);
> + enum cs_mode mode, struct coresight_trace_id_map *id_map);
> void (*disable)(struct coresight_device *csdev,
> struct perf_event *event);
> };
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 15/17] coresight: Remove pending trace ID release mechanism
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (13 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 14/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-17 15:03 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 16/17] coresight: Emit sink ID in the HW_ID packets James Clark
2024-07-12 10:20 ` [PATCH v5 17/17] coresight: Make trace ID map spinlock local to the map James Clark
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
Pending the release of IDs was a way of managing concurrent sysfs and
Perf sessions in a single global ID map. Perf may have finished while
sysfs hadn't, and Perf shouldn't release the IDs in use by sysfs and
vice versa.
Now that Perf uses its own exclusive ID maps, pending release doesn't
result in any different behavior than just releasing all IDs when the
last Perf session finishes. As part of the per-sink trace ID change, we
would have still had to make the pending mechanism work on a per-sink
basis, due to the overlapping ID allocations, so instead of making that
more complicated, just remove it.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
.../hwtracing/coresight/coresight-etm-perf.c | 18 +++--
.../hwtracing/coresight/coresight-trace-id.c | 67 +++++--------------
.../hwtracing/coresight/coresight-trace-id.h | 31 ++++-----
include/linux/coresight.h | 6 +-
4 files changed, 43 insertions(+), 79 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 7fb55dafb639..70c99f0409b2 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -232,15 +232,21 @@ static void free_event_data(struct work_struct *work)
if (!(IS_ERR_OR_NULL(*ppath))) {
struct coresight_device *sink = coresight_get_sink(*ppath);
- coresight_trace_id_put_cpu_id_map(cpu, &sink->perf_sink_id_map);
+ /*
+ * Mark perf event as done for trace id allocator, but don't call
+ * coresight_trace_id_put_cpu_id_map() on individual IDs. Perf sessions
+ * never free trace IDs to ensure that the ID associated with a CPU
+ * cannot change during their and other's concurrent sessions. Instead,
+ * a refcount is used so that the last event to call
+ * coresight_trace_id_perf_stop() frees all IDs.
+ */
+ coresight_trace_id_perf_stop(&sink->perf_sink_id_map);
+
coresight_release_path(*ppath);
}
*ppath = NULL;
}
- /* mark perf event as done for trace id allocator */
- coresight_trace_id_perf_stop();
-
free_percpu(event_data->path);
kfree(event_data);
}
@@ -328,9 +334,6 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
sink = user_sink = coresight_get_sink_by_id(id);
}
- /* tell the trace ID allocator that a perf event is starting up */
- coresight_trace_id_perf_start();
-
/* check if user wants a coresight configuration selected */
cfg_hash = (u32)((event->attr.config2 & GENMASK_ULL(63, 32)) >> 32);
if (cfg_hash) {
@@ -411,6 +414,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
continue;
}
+ coresight_trace_id_perf_start(&sink->perf_sink_id_map);
*etm_event_cpu_path_ptr(event_data, cpu) = path;
}
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
index 8a777c0af6ea..bddaed3e5cf8 100644
--- a/drivers/hwtracing/coresight/coresight-trace-id.c
+++ b/drivers/hwtracing/coresight/coresight-trace-id.c
@@ -18,12 +18,6 @@ static struct coresight_trace_id_map id_map_default = {
.cpu_map = &id_map_default_cpu_ids
};
-/* maintain a record of the pending releases per cpu */
-static cpumask_t cpu_id_release_pending;
-
-/* perf session active counter */
-static atomic_t perf_cs_etm_session_active = ATOMIC_INIT(0);
-
/* lock to protect id_map and cpu data */
static DEFINE_SPINLOCK(id_map_lock);
@@ -35,7 +29,6 @@ static void coresight_trace_id_dump_table(struct coresight_trace_id_map *id_map,
{
pr_debug("%s id_map::\n", func_name);
pr_debug("Used = %*pb\n", CORESIGHT_TRACE_IDS_MAX, id_map->used_ids);
- pr_debug("Pend = %*pb\n", CORESIGHT_TRACE_IDS_MAX, id_map->pend_rel_ids);
}
#define DUMP_ID_MAP(map) coresight_trace_id_dump_table(map, __func__)
#define DUMP_ID_CPU(cpu, id) pr_debug("%s called; cpu=%d, id=%d\n", __func__, cpu, id)
@@ -122,34 +115,18 @@ static void coresight_trace_id_free(int id, struct coresight_trace_id_map *id_ma
clear_bit(id, id_map->used_ids);
}
-static void coresight_trace_id_set_pend_rel(int id, struct coresight_trace_id_map *id_map)
-{
- if (WARN(!IS_VALID_CS_TRACE_ID(id), "Invalid Trace ID %d\n", id))
- return;
- set_bit(id, id_map->pend_rel_ids);
-}
-
/*
- * release all pending IDs for all current maps & clear CPU associations
- *
- * This currently operates on the default id map, but may be extended to
- * operate on all registered id maps if per sink id maps are used.
+ * Release all IDs and clear CPU associations.
*/
-static void coresight_trace_id_release_all_pending(void)
+static void coresight_trace_id_release_all(struct coresight_trace_id_map *id_map)
{
- struct coresight_trace_id_map *id_map = &id_map_default;
unsigned long flags;
- int cpu, bit;
+ int cpu;
spin_lock_irqsave(&id_map_lock, flags);
- for_each_set_bit(bit, id_map->pend_rel_ids, CORESIGHT_TRACE_ID_RES_TOP) {
- clear_bit(bit, id_map->used_ids);
- clear_bit(bit, id_map->pend_rel_ids);
- }
- for_each_cpu(cpu, &cpu_id_release_pending) {
- atomic_set(per_cpu_ptr(id_map_default.cpu_map, cpu), 0);
- cpumask_clear_cpu(cpu, &cpu_id_release_pending);
- }
+ bitmap_zero(id_map->used_ids, CORESIGHT_TRACE_IDS_MAX);
+ for_each_possible_cpu(cpu)
+ atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
spin_unlock_irqrestore(&id_map_lock, flags);
DUMP_ID_MAP(id_map);
}
@@ -164,7 +141,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
/* check for existing allocation for this CPU */
id = _coresight_trace_id_read_cpu_id(cpu, id_map);
if (id)
- goto get_cpu_id_clr_pend;
+ goto get_cpu_id_out_unlock;
/*
* Find a new ID.
@@ -185,11 +162,6 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
/* allocate the new id to the cpu */
atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), id);
-get_cpu_id_clr_pend:
- /* we are (re)using this ID - so ensure it is not marked for release */
- cpumask_clear_cpu(cpu, &cpu_id_release_pending);
- clear_bit(id, id_map->pend_rel_ids);
-
get_cpu_id_out_unlock:
spin_unlock_irqrestore(&id_map_lock, flags);
@@ -210,15 +182,8 @@ static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_ma
spin_lock_irqsave(&id_map_lock, flags);
- if (atomic_read(&perf_cs_etm_session_active)) {
- /* set release at pending if perf still active */
- coresight_trace_id_set_pend_rel(id, id_map);
- cpumask_set_cpu(cpu, &cpu_id_release_pending);
- } else {
- /* otherwise clear id */
- coresight_trace_id_free(id, id_map);
- atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
- }
+ coresight_trace_id_free(id, id_map);
+ atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
spin_unlock_irqrestore(&id_map_lock, flags);
DUMP_ID_CPU(cpu, id);
@@ -302,17 +267,17 @@ void coresight_trace_id_put_system_id(int id)
}
EXPORT_SYMBOL_GPL(coresight_trace_id_put_system_id);
-void coresight_trace_id_perf_start(void)
+void coresight_trace_id_perf_start(struct coresight_trace_id_map *id_map)
{
- atomic_inc(&perf_cs_etm_session_active);
- PERF_SESSION(atomic_read(&perf_cs_etm_session_active));
+ atomic_inc(&id_map->perf_cs_etm_session_active);
+ PERF_SESSION(atomic_read(&id_map->perf_cs_etm_session_active));
}
EXPORT_SYMBOL_GPL(coresight_trace_id_perf_start);
-void coresight_trace_id_perf_stop(void)
+void coresight_trace_id_perf_stop(struct coresight_trace_id_map *id_map)
{
- if (!atomic_dec_return(&perf_cs_etm_session_active))
- coresight_trace_id_release_all_pending();
- PERF_SESSION(atomic_read(&perf_cs_etm_session_active));
+ if (!atomic_dec_return(&id_map->perf_cs_etm_session_active))
+ coresight_trace_id_release_all(id_map);
+ PERF_SESSION(atomic_read(&id_map->perf_cs_etm_session_active));
}
EXPORT_SYMBOL_GPL(coresight_trace_id_perf_stop);
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.h b/drivers/hwtracing/coresight/coresight-trace-id.h
index 840babdd0794..9aae50a553ca 100644
--- a/drivers/hwtracing/coresight/coresight-trace-id.h
+++ b/drivers/hwtracing/coresight/coresight-trace-id.h
@@ -17,9 +17,10 @@
* released when done.
*
* In order to ensure that a consistent cpu / ID matching is maintained
- * throughout a perf cs_etm event session - a session in progress flag will
- * be maintained, and released IDs not cleared until the perf session is
- * complete. This allows the same CPU to be re-allocated its prior ID.
+ * throughout a perf cs_etm event session - a session in progress flag will be
+ * maintained for each sink, and IDs are cleared when all the perf sessions
+ * complete. This allows the same CPU to be re-allocated its prior ID when
+ * events are scheduled in and out.
*
*
* Trace ID maps will be created and initialised to prevent architecturally
@@ -66,11 +67,7 @@ int coresight_trace_id_get_cpu_id_map(int cpu, struct coresight_trace_id_map *id
/**
* Release an allocated trace ID associated with the CPU.
*
- * This will release the CoreSight trace ID associated with the CPU,
- * unless a perf session is in operation.
- *
- * If a perf session is in operation then the ID will be marked as pending
- * release.
+ * This will release the CoreSight trace ID associated with the CPU.
*
* @cpu: The CPU index to release the associated trace ID.
*/
@@ -133,21 +130,21 @@ void coresight_trace_id_put_system_id(int id);
/**
* Notify the Trace ID allocator that a perf session is starting.
*
- * Increase the perf session reference count - called by perf when setting up
- * a trace event.
+ * Increase the perf session reference count - called by perf when setting up a
+ * trace event.
*
- * This reference count is used by the ID allocator to ensure that trace IDs
- * associated with a CPU cannot change or be released during a perf session.
+ * Perf sessions never free trace IDs to ensure that the ID associated with a
+ * CPU cannot change during their and other's concurrent sessions. Instead,
+ * this refcount is used so that the last event to finish always frees all IDs.
*/
-void coresight_trace_id_perf_start(void);
+void coresight_trace_id_perf_start(struct coresight_trace_id_map *id_map);
/**
* Notify the ID allocator that a perf session is stopping.
*
- * Decrease the perf session reference count.
- * if this causes the count to go to zero, then all Trace IDs marked as pending
- * release, will be released.
+ * Decrease the perf session reference count. If this causes the count to go to
+ * zero, then all Trace IDs will be released.
*/
-void coresight_trace_id_perf_stop(void);
+void coresight_trace_id_perf_stop(struct coresight_trace_id_map *id_map);
#endif /* _CORESIGHT_TRACE_ID_H */
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 9c3067e2e38b..197949fd2c35 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -227,14 +227,12 @@ struct coresight_sysfs_link {
* @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs.
* Initialised so that the reserved IDs are permanently marked as
* in use.
- * @pend_rel_ids: CPU IDs that have been released by the trace source but not
- * yet marked as available, to allow re-allocation to the same
- * CPU during a perf session.
+ * @perf_cs_etm_session_active: Number of Perf sessions using this ID map.
*/
struct coresight_trace_id_map {
DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
- DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
atomic_t __percpu *cpu_map;
+ atomic_t perf_cs_etm_session_active;
};
/**
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 15/17] coresight: Remove pending trace ID release mechanism
2024-07-12 10:20 ` [PATCH v5 15/17] coresight: Remove pending trace ID release mechanism James Clark
@ 2024-07-17 15:03 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-17 15:03 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:23, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> Pending the release of IDs was a way of managing concurrent sysfs and
> Perf sessions in a single global ID map. Perf may have finished while
> sysfs hadn't, and Perf shouldn't release the IDs in use by sysfs and
> vice versa.
>
> Now that Perf uses its own exclusive ID maps, pending release doesn't
> result in any different behavior than just releasing all IDs when the
> last Perf session finishes. As part of the per-sink trace ID change, we
> would have still had to make the pending mechanism work on a per-sink
> basis, due to the overlapping ID allocations, so instead of making that
> more complicated, just remove it.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> .../hwtracing/coresight/coresight-etm-perf.c | 18 +++--
> .../hwtracing/coresight/coresight-trace-id.c | 67 +++++--------------
> .../hwtracing/coresight/coresight-trace-id.h | 31 ++++-----
> include/linux/coresight.h | 6 +-
> 4 files changed, 43 insertions(+), 79 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> index 7fb55dafb639..70c99f0409b2 100644
> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> @@ -232,15 +232,21 @@ static void free_event_data(struct work_struct *work)
> if (!(IS_ERR_OR_NULL(*ppath))) {
> struct coresight_device *sink = coresight_get_sink(*ppath);
>
> - coresight_trace_id_put_cpu_id_map(cpu, &sink->perf_sink_id_map);
> + /*
> + * Mark perf event as done for trace id allocator, but don't call
> + * coresight_trace_id_put_cpu_id_map() on individual IDs. Perf sessions
> + * never free trace IDs to ensure that the ID associated with a CPU
> + * cannot change during their and other's concurrent sessions. Instead,
> + * a refcount is used so that the last event to call
> + * coresight_trace_id_perf_stop() frees all IDs.
> + */
> + coresight_trace_id_perf_stop(&sink->perf_sink_id_map);
> +
> coresight_release_path(*ppath);
> }
> *ppath = NULL;
> }
>
> - /* mark perf event as done for trace id allocator */
> - coresight_trace_id_perf_stop();
> -
> free_percpu(event_data->path);
> kfree(event_data);
> }
> @@ -328,9 +334,6 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> sink = user_sink = coresight_get_sink_by_id(id);
> }
>
> - /* tell the trace ID allocator that a perf event is starting up */
> - coresight_trace_id_perf_start();
> -
> /* check if user wants a coresight configuration selected */
> cfg_hash = (u32)((event->attr.config2 & GENMASK_ULL(63, 32)) >> 32);
> if (cfg_hash) {
> @@ -411,6 +414,7 @@ static void *etm_setup_aux(struct perf_event *event, void **pages,
> continue;
> }
>
> + coresight_trace_id_perf_start(&sink->perf_sink_id_map);
> *etm_event_cpu_path_ptr(event_data, cpu) = path;
> }
>
> diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
> index 8a777c0af6ea..bddaed3e5cf8 100644
> --- a/drivers/hwtracing/coresight/coresight-trace-id.c
> +++ b/drivers/hwtracing/coresight/coresight-trace-id.c
> @@ -18,12 +18,6 @@ static struct coresight_trace_id_map id_map_default = {
> .cpu_map = &id_map_default_cpu_ids
> };
>
> -/* maintain a record of the pending releases per cpu */
> -static cpumask_t cpu_id_release_pending;
> -
> -/* perf session active counter */
> -static atomic_t perf_cs_etm_session_active = ATOMIC_INIT(0);
> -
> /* lock to protect id_map and cpu data */
> static DEFINE_SPINLOCK(id_map_lock);
>
> @@ -35,7 +29,6 @@ static void coresight_trace_id_dump_table(struct coresight_trace_id_map *id_map,
> {
> pr_debug("%s id_map::\n", func_name);
> pr_debug("Used = %*pb\n", CORESIGHT_TRACE_IDS_MAX, id_map->used_ids);
> - pr_debug("Pend = %*pb\n", CORESIGHT_TRACE_IDS_MAX, id_map->pend_rel_ids);
> }
> #define DUMP_ID_MAP(map) coresight_trace_id_dump_table(map, __func__)
> #define DUMP_ID_CPU(cpu, id) pr_debug("%s called; cpu=%d, id=%d\n", __func__, cpu, id)
> @@ -122,34 +115,18 @@ static void coresight_trace_id_free(int id, struct coresight_trace_id_map *id_ma
> clear_bit(id, id_map->used_ids);
> }
>
> -static void coresight_trace_id_set_pend_rel(int id, struct coresight_trace_id_map *id_map)
> -{
> - if (WARN(!IS_VALID_CS_TRACE_ID(id), "Invalid Trace ID %d\n", id))
> - return;
> - set_bit(id, id_map->pend_rel_ids);
> -}
> -
> /*
> - * release all pending IDs for all current maps & clear CPU associations
> - *
> - * This currently operates on the default id map, but may be extended to
> - * operate on all registered id maps if per sink id maps are used.
> + * Release all IDs and clear CPU associations.
> */
> -static void coresight_trace_id_release_all_pending(void)
> +static void coresight_trace_id_release_all(struct coresight_trace_id_map *id_map)
> {
> - struct coresight_trace_id_map *id_map = &id_map_default;
> unsigned long flags;
> - int cpu, bit;
> + int cpu;
>
> spin_lock_irqsave(&id_map_lock, flags);
> - for_each_set_bit(bit, id_map->pend_rel_ids, CORESIGHT_TRACE_ID_RES_TOP) {
> - clear_bit(bit, id_map->used_ids);
> - clear_bit(bit, id_map->pend_rel_ids);
> - }
> - for_each_cpu(cpu, &cpu_id_release_pending) {
> - atomic_set(per_cpu_ptr(id_map_default.cpu_map, cpu), 0);
> - cpumask_clear_cpu(cpu, &cpu_id_release_pending);
> - }
> + bitmap_zero(id_map->used_ids, CORESIGHT_TRACE_IDS_MAX);
> + for_each_possible_cpu(cpu)
> + atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
> spin_unlock_irqrestore(&id_map_lock, flags);
> DUMP_ID_MAP(id_map);
> }
> @@ -164,7 +141,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
> /* check for existing allocation for this CPU */
> id = _coresight_trace_id_read_cpu_id(cpu, id_map);
> if (id)
> - goto get_cpu_id_clr_pend;
> + goto get_cpu_id_out_unlock;
>
> /*
> * Find a new ID.
> @@ -185,11 +162,6 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
> /* allocate the new id to the cpu */
> atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), id);
>
> -get_cpu_id_clr_pend:
> - /* we are (re)using this ID - so ensure it is not marked for release */
> - cpumask_clear_cpu(cpu, &cpu_id_release_pending);
> - clear_bit(id, id_map->pend_rel_ids);
> -
> get_cpu_id_out_unlock:
> spin_unlock_irqrestore(&id_map_lock, flags);
>
> @@ -210,15 +182,8 @@ static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_ma
>
> spin_lock_irqsave(&id_map_lock, flags);
>
> - if (atomic_read(&perf_cs_etm_session_active)) {
> - /* set release at pending if perf still active */
> - coresight_trace_id_set_pend_rel(id, id_map);
> - cpumask_set_cpu(cpu, &cpu_id_release_pending);
> - } else {
> - /* otherwise clear id */
> - coresight_trace_id_free(id, id_map);
> - atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
> - }
> + coresight_trace_id_free(id, id_map);
> + atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
>
> spin_unlock_irqrestore(&id_map_lock, flags);
> DUMP_ID_CPU(cpu, id);
> @@ -302,17 +267,17 @@ void coresight_trace_id_put_system_id(int id)
> }
> EXPORT_SYMBOL_GPL(coresight_trace_id_put_system_id);
>
> -void coresight_trace_id_perf_start(void)
> +void coresight_trace_id_perf_start(struct coresight_trace_id_map *id_map)
> {
> - atomic_inc(&perf_cs_etm_session_active);
> - PERF_SESSION(atomic_read(&perf_cs_etm_session_active));
> + atomic_inc(&id_map->perf_cs_etm_session_active);
> + PERF_SESSION(atomic_read(&id_map->perf_cs_etm_session_active));
> }
> EXPORT_SYMBOL_GPL(coresight_trace_id_perf_start);
>
> -void coresight_trace_id_perf_stop(void)
> +void coresight_trace_id_perf_stop(struct coresight_trace_id_map *id_map)
> {
> - if (!atomic_dec_return(&perf_cs_etm_session_active))
> - coresight_trace_id_release_all_pending();
> - PERF_SESSION(atomic_read(&perf_cs_etm_session_active));
> + if (!atomic_dec_return(&id_map->perf_cs_etm_session_active))
> + coresight_trace_id_release_all(id_map);
> + PERF_SESSION(atomic_read(&id_map->perf_cs_etm_session_active));
> }
> EXPORT_SYMBOL_GPL(coresight_trace_id_perf_stop);
> diff --git a/drivers/hwtracing/coresight/coresight-trace-id.h b/drivers/hwtracing/coresight/coresight-trace-id.h
> index 840babdd0794..9aae50a553ca 100644
> --- a/drivers/hwtracing/coresight/coresight-trace-id.h
> +++ b/drivers/hwtracing/coresight/coresight-trace-id.h
> @@ -17,9 +17,10 @@
> * released when done.
> *
> * In order to ensure that a consistent cpu / ID matching is maintained
> - * throughout a perf cs_etm event session - a session in progress flag will
> - * be maintained, and released IDs not cleared until the perf session is
> - * complete. This allows the same CPU to be re-allocated its prior ID.
> + * throughout a perf cs_etm event session - a session in progress flag will be
> + * maintained for each sink, and IDs are cleared when all the perf sessions
> + * complete. This allows the same CPU to be re-allocated its prior ID when
> + * events are scheduled in and out.
> *
> *
> * Trace ID maps will be created and initialised to prevent architecturally
> @@ -66,11 +67,7 @@ int coresight_trace_id_get_cpu_id_map(int cpu, struct coresight_trace_id_map *id
> /**
> * Release an allocated trace ID associated with the CPU.
> *
> - * This will release the CoreSight trace ID associated with the CPU,
> - * unless a perf session is in operation.
> - *
> - * If a perf session is in operation then the ID will be marked as pending
> - * release.
> + * This will release the CoreSight trace ID associated with the CPU.
> *
> * @cpu: The CPU index to release the associated trace ID.
> */
> @@ -133,21 +130,21 @@ void coresight_trace_id_put_system_id(int id);
> /**
> * Notify the Trace ID allocator that a perf session is starting.
> *
> - * Increase the perf session reference count - called by perf when setting up
> - * a trace event.
> + * Increase the perf session reference count - called by perf when setting up a
> + * trace event.
> *
> - * This reference count is used by the ID allocator to ensure that trace IDs
> - * associated with a CPU cannot change or be released during a perf session.
> + * Perf sessions never free trace IDs to ensure that the ID associated with a
> + * CPU cannot change during their and other's concurrent sessions. Instead,
> + * this refcount is used so that the last event to finish always frees all IDs.
> */
> -void coresight_trace_id_perf_start(void);
> +void coresight_trace_id_perf_start(struct coresight_trace_id_map *id_map);
>
> /**
> * Notify the ID allocator that a perf session is stopping.
> *
> - * Decrease the perf session reference count.
> - * if this causes the count to go to zero, then all Trace IDs marked as pending
> - * release, will be released.
> + * Decrease the perf session reference count. If this causes the count to go to
> + * zero, then all Trace IDs will be released.
> */
> -void coresight_trace_id_perf_stop(void);
> +void coresight_trace_id_perf_stop(struct coresight_trace_id_map *id_map);
>
> #endif /* _CORESIGHT_TRACE_ID_H */
> diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> index 9c3067e2e38b..197949fd2c35 100644
> --- a/include/linux/coresight.h
> +++ b/include/linux/coresight.h
> @@ -227,14 +227,12 @@ struct coresight_sysfs_link {
> * @used_ids: Bitmap to register available (bit = 0) and in use (bit = 1) IDs.
> * Initialised so that the reserved IDs are permanently marked as
> * in use.
> - * @pend_rel_ids: CPU IDs that have been released by the trace source but not
> - * yet marked as available, to allow re-allocation to the same
> - * CPU during a perf session.
> + * @perf_cs_etm_session_active: Number of Perf sessions using this ID map.
> */
> struct coresight_trace_id_map {
> DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
> - DECLARE_BITMAP(pend_rel_ids, CORESIGHT_TRACE_IDS_MAX);
> atomic_t __percpu *cpu_map;
> + atomic_t perf_cs_etm_session_active;
> };
>
> /**
> --
> 2.34.1
>
Reviewed-by: Mike Leach <mike.leach@linaro.org>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 16/17] coresight: Emit sink ID in the HW_ID packets
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (14 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 15/17] coresight: Remove pending trace ID release mechanism James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-17 15:03 ` Mike Leach
2024-07-12 10:20 ` [PATCH v5 17/17] coresight: Make trace ID map spinlock local to the map James Clark
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
For Perf to be able to decode when per-sink trace IDs are used, emit the
sink that's being written to for each ETM.
Perf currently errors out if it sees a newer packet version so instead
of bumping it, add a new minor version field. This can be used to
signify new versions that have backwards compatible fields. Considering
this change is only for high core count machines, it doesn't make sense
to make a breaking change for everyone.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
drivers/hwtracing/coresight/coresight-core.c | 26 ++++++++++---------
.../hwtracing/coresight/coresight-etm-perf.c | 16 ++++++++----
drivers/hwtracing/coresight/coresight-priv.h | 1 +
include/linux/coresight-pmu.h | 17 +++++++++---
4 files changed, 39 insertions(+), 21 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index faf560ba8d64..c427e9344a84 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -487,23 +487,25 @@ struct coresight_device *coresight_get_sink(struct list_head *path)
return csdev;
}
+u32 coresight_get_sink_id(struct coresight_device *csdev)
+{
+ if (!csdev->ea)
+ return 0;
+
+ /*
+ * See function etm_perf_add_symlink_sink() to know where
+ * this comes from.
+ */
+ return (u32) (unsigned long) csdev->ea->var;
+}
+
static int coresight_sink_by_id(struct device *dev, const void *data)
{
struct coresight_device *csdev = to_coresight_device(dev);
- unsigned long hash;
if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
- csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
-
- if (!csdev->ea)
- return 0;
- /*
- * See function etm_perf_add_symlink_sink() to know where
- * this comes from.
- */
- hash = (unsigned long)csdev->ea->var;
-
- if ((u32)hash == *(u32 *)data)
+ csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+ if (coresight_get_sink_id(csdev) == *(u32 *)data)
return 1;
}
diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
index 70c99f0409b2..ad6a8f4b70b6 100644
--- a/drivers/hwtracing/coresight/coresight-etm-perf.c
+++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
@@ -460,6 +460,7 @@ static void etm_event_start(struct perf_event *event, int flags)
struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
struct list_head *path;
u64 hw_id;
+ u8 trace_id;
if (!csdev)
goto fail;
@@ -512,11 +513,16 @@ static void etm_event_start(struct perf_event *event, int flags)
*/
if (!cpumask_test_cpu(cpu, &event_data->aux_hwid_done)) {
cpumask_set_cpu(cpu, &event_data->aux_hwid_done);
- hw_id = FIELD_PREP(CS_AUX_HW_ID_VERSION_MASK,
- CS_AUX_HW_ID_CURR_VERSION);
- hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK,
- coresight_trace_id_read_cpu_id_map(cpu,
- &sink->perf_sink_id_map));
+
+ trace_id = coresight_trace_id_read_cpu_id_map(cpu, &sink->perf_sink_id_map);
+
+ hw_id = FIELD_PREP(CS_AUX_HW_ID_MAJOR_VERSION_MASK,
+ CS_AUX_HW_ID_MAJOR_VERSION);
+ hw_id |= FIELD_PREP(CS_AUX_HW_ID_MINOR_VERSION_MASK,
+ CS_AUX_HW_ID_MINOR_VERSION);
+ hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK, trace_id);
+ hw_id |= FIELD_PREP(CS_AUX_HW_ID_SINK_ID_MASK, coresight_get_sink_id(sink));
+
perf_report_aux_output_id(event, hw_id);
}
diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
index 61a46d3bdcc8..05f891ca6b5c 100644
--- a/drivers/hwtracing/coresight/coresight-priv.h
+++ b/drivers/hwtracing/coresight/coresight-priv.h
@@ -148,6 +148,7 @@ int coresight_make_links(struct coresight_device *orig,
struct coresight_device *target);
void coresight_remove_links(struct coresight_device *orig,
struct coresight_connection *conn);
+u32 coresight_get_sink_id(struct coresight_device *csdev);
#if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM3X)
extern int etm_readl_cp14(u32 off, unsigned int *val);
diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
index 51ac441a37c3..89b0ac0014b0 100644
--- a/include/linux/coresight-pmu.h
+++ b/include/linux/coresight-pmu.h
@@ -49,12 +49,21 @@
* Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
* Used to associate a CPU with the CoreSight Trace ID.
* [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
- * [59:08] - Unused (SBZ)
- * [63:60] - Version
+ * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
+ * Added in minor version 1.
+ * [55:40] - Unused (SBZ)
+ * [59:56] - Minor Version - previously existing fields are compatible with
+ * all minor versions.
+ * [63:60] - Major Version - previously existing fields mean different things
+ * in new major versions.
*/
#define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
-#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
+#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
-#define CS_AUX_HW_ID_CURR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
+#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
+
+#define CS_AUX_HW_ID_MAJOR_VERSION 0
+#define CS_AUX_HW_ID_MINOR_VERSION 1
#endif
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 16/17] coresight: Emit sink ID in the HW_ID packets
2024-07-12 10:20 ` [PATCH v5 16/17] coresight: Emit sink ID in the HW_ID packets James Clark
@ 2024-07-17 15:03 ` Mike Leach
2024-07-19 9:29 ` James Clark
0 siblings, 1 reply; 40+ messages in thread
From: Mike Leach @ 2024-07-17 15:03 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On Fri, 12 Jul 2024 at 11:23, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> For Perf to be able to decode when per-sink trace IDs are used, emit the
> sink that's being written to for each ETM.
>
> Perf currently errors out if it sees a newer packet version so instead
> of bumping it, add a new minor version field. This can be used to
> signify new versions that have backwards compatible fields. Considering
> this change is only for high core count machines, it doesn't make sense
> to make a breaking change for everyone.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> drivers/hwtracing/coresight/coresight-core.c | 26 ++++++++++---------
> .../hwtracing/coresight/coresight-etm-perf.c | 16 ++++++++----
> drivers/hwtracing/coresight/coresight-priv.h | 1 +
> include/linux/coresight-pmu.h | 17 +++++++++---
> 4 files changed, 39 insertions(+), 21 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
> index faf560ba8d64..c427e9344a84 100644
> --- a/drivers/hwtracing/coresight/coresight-core.c
> +++ b/drivers/hwtracing/coresight/coresight-core.c
> @@ -487,23 +487,25 @@ struct coresight_device *coresight_get_sink(struct list_head *path)
> return csdev;
> }
>
> +u32 coresight_get_sink_id(struct coresight_device *csdev)
> +{
> + if (!csdev->ea)
> + return 0;
> +
> + /*
> + * See function etm_perf_add_symlink_sink() to know where
> + * this comes from.
> + */
> + return (u32) (unsigned long) csdev->ea->var;
> +}
> +
> static int coresight_sink_by_id(struct device *dev, const void *data)
> {
> struct coresight_device *csdev = to_coresight_device(dev);
> - unsigned long hash;
>
> if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
> - csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
> -
> - if (!csdev->ea)
> - return 0;
> - /*
> - * See function etm_perf_add_symlink_sink() to know where
> - * this comes from.
> - */
> - hash = (unsigned long)csdev->ea->var;
> -
> - if ((u32)hash == *(u32 *)data)
> + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
> + if (coresight_get_sink_id(csdev) == *(u32 *)data)
> return 1;
> }
>
> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> index 70c99f0409b2..ad6a8f4b70b6 100644
> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> @@ -460,6 +460,7 @@ static void etm_event_start(struct perf_event *event, int flags)
> struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
> struct list_head *path;
> u64 hw_id;
> + u8 trace_id;
>
> if (!csdev)
> goto fail;
> @@ -512,11 +513,16 @@ static void etm_event_start(struct perf_event *event, int flags)
> */
> if (!cpumask_test_cpu(cpu, &event_data->aux_hwid_done)) {
> cpumask_set_cpu(cpu, &event_data->aux_hwid_done);
> - hw_id = FIELD_PREP(CS_AUX_HW_ID_VERSION_MASK,
> - CS_AUX_HW_ID_CURR_VERSION);
> - hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK,
> - coresight_trace_id_read_cpu_id_map(cpu,
> - &sink->perf_sink_id_map));
> +
> + trace_id = coresight_trace_id_read_cpu_id_map(cpu, &sink->perf_sink_id_map);
> +
> + hw_id = FIELD_PREP(CS_AUX_HW_ID_MAJOR_VERSION_MASK,
> + CS_AUX_HW_ID_MAJOR_VERSION);
> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_MINOR_VERSION_MASK,
> + CS_AUX_HW_ID_MINOR_VERSION);
> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK, trace_id);
> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_SINK_ID_MASK, coresight_get_sink_id(sink));
> +
> perf_report_aux_output_id(event, hw_id);
> }
>
> diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
> index 61a46d3bdcc8..05f891ca6b5c 100644
> --- a/drivers/hwtracing/coresight/coresight-priv.h
> +++ b/drivers/hwtracing/coresight/coresight-priv.h
> @@ -148,6 +148,7 @@ int coresight_make_links(struct coresight_device *orig,
> struct coresight_device *target);
> void coresight_remove_links(struct coresight_device *orig,
> struct coresight_connection *conn);
> +u32 coresight_get_sink_id(struct coresight_device *csdev);
>
> #if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM3X)
> extern int etm_readl_cp14(u32 off, unsigned int *val);
> diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
> index 51ac441a37c3..89b0ac0014b0 100644
> --- a/include/linux/coresight-pmu.h
> +++ b/include/linux/coresight-pmu.h
> @@ -49,12 +49,21 @@
> * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
> * Used to associate a CPU with the CoreSight Trace ID.
> * [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
> - * [59:08] - Unused (SBZ)
> - * [63:60] - Version
> + * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
> + * Added in minor version 1.
> + * [55:40] - Unused (SBZ)
> + * [59:56] - Minor Version - previously existing fields are compatible with
> + * all minor versions.
> + * [63:60] - Major Version - previously existing fields mean different things
> + * in new major versions.
> */
> #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
> -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
> +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
>
> -#define CS_AUX_HW_ID_CURR_VERSION 0
> +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
> +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
> +
> +#define CS_AUX_HW_ID_MAJOR_VERSION 0
> +#define CS_AUX_HW_ID_MINOR_VERSION 1
>
> #endif
> --
> 2.34.1
>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v5 16/17] coresight: Emit sink ID in the HW_ID packets
2024-07-17 15:03 ` Mike Leach
@ 2024-07-19 9:29 ` James Clark
2024-07-19 9:48 ` Mike Leach
0 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-19 9:29 UTC (permalink / raw)
To: Mike Leach
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
On 17/07/2024 4:03 pm, Mike Leach wrote:
> On Fri, 12 Jul 2024 at 11:23, James Clark <james.clark@linaro.org> wrote:
>>
>> From: James Clark <james.clark@arm.com>
>>
>> For Perf to be able to decode when per-sink trace IDs are used, emit the
>> sink that's being written to for each ETM.
>>
>> Perf currently errors out if it sees a newer packet version so instead
>> of bumping it, add a new minor version field. This can be used to
>> signify new versions that have backwards compatible fields. Considering
>> this change is only for high core count machines, it doesn't make sense
>> to make a breaking change for everyone.
>>
>> Signed-off-by: James Clark <james.clark@arm.com>
>> Signed-off-by: James Clark <james.clark@linaro.org>
>> ---
>> drivers/hwtracing/coresight/coresight-core.c | 26 ++++++++++---------
>> .../hwtracing/coresight/coresight-etm-perf.c | 16 ++++++++----
>> drivers/hwtracing/coresight/coresight-priv.h | 1 +
>> include/linux/coresight-pmu.h | 17 +++++++++---
>> 4 files changed, 39 insertions(+), 21 deletions(-)
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
>> index faf560ba8d64..c427e9344a84 100644
>> --- a/drivers/hwtracing/coresight/coresight-core.c
>> +++ b/drivers/hwtracing/coresight/coresight-core.c
>> @@ -487,23 +487,25 @@ struct coresight_device *coresight_get_sink(struct list_head *path)
>> return csdev;
>> }
>>
>> +u32 coresight_get_sink_id(struct coresight_device *csdev)
>> +{
>> + if (!csdev->ea)
>> + return 0;
>> +
>> + /*
>> + * See function etm_perf_add_symlink_sink() to know where
>> + * this comes from.
>> + */
>> + return (u32) (unsigned long) csdev->ea->var;
>> +}
>> +
>> static int coresight_sink_by_id(struct device *dev, const void *data)
>> {
>> struct coresight_device *csdev = to_coresight_device(dev);
>> - unsigned long hash;
>>
>> if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
>> - csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
>> -
>> - if (!csdev->ea)
>> - return 0;
>> - /*
>> - * See function etm_perf_add_symlink_sink() to know where
>> - * this comes from.
>> - */
>> - hash = (unsigned long)csdev->ea->var;
>> -
>> - if ((u32)hash == *(u32 *)data)
>> + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
>> + if (coresight_get_sink_id(csdev) == *(u32 *)data)
>> return 1;
>> }
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
>> index 70c99f0409b2..ad6a8f4b70b6 100644
>> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
>> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
>> @@ -460,6 +460,7 @@ static void etm_event_start(struct perf_event *event, int flags)
>> struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
>> struct list_head *path;
>> u64 hw_id;
>> + u8 trace_id;
>>
>> if (!csdev)
>> goto fail;
>> @@ -512,11 +513,16 @@ static void etm_event_start(struct perf_event *event, int flags)
>> */
>> if (!cpumask_test_cpu(cpu, &event_data->aux_hwid_done)) {
>> cpumask_set_cpu(cpu, &event_data->aux_hwid_done);
>> - hw_id = FIELD_PREP(CS_AUX_HW_ID_VERSION_MASK,
>> - CS_AUX_HW_ID_CURR_VERSION);
>> - hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK,
>> - coresight_trace_id_read_cpu_id_map(cpu,
>> - &sink->perf_sink_id_map));
>> +
>> + trace_id = coresight_trace_id_read_cpu_id_map(cpu, &sink->perf_sink_id_map);
>> +
>> + hw_id = FIELD_PREP(CS_AUX_HW_ID_MAJOR_VERSION_MASK,
>> + CS_AUX_HW_ID_MAJOR_VERSION);
>> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_MINOR_VERSION_MASK,
>> + CS_AUX_HW_ID_MINOR_VERSION);
>> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK, trace_id);
>> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_SINK_ID_MASK, coresight_get_sink_id(sink));
>> +
>> perf_report_aux_output_id(event, hw_id);
>> }
>>
>> diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
>> index 61a46d3bdcc8..05f891ca6b5c 100644
>> --- a/drivers/hwtracing/coresight/coresight-priv.h
>> +++ b/drivers/hwtracing/coresight/coresight-priv.h
>> @@ -148,6 +148,7 @@ int coresight_make_links(struct coresight_device *orig,
>> struct coresight_device *target);
>> void coresight_remove_links(struct coresight_device *orig,
>> struct coresight_connection *conn);
>> +u32 coresight_get_sink_id(struct coresight_device *csdev);
>>
>> #if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM3X)
>> extern int etm_readl_cp14(u32 off, unsigned int *val);
>> diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
>> index 51ac441a37c3..89b0ac0014b0 100644
>> --- a/include/linux/coresight-pmu.h
>> +++ b/include/linux/coresight-pmu.h
>> @@ -49,12 +49,21 @@
>> * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
>> * Used to associate a CPU with the CoreSight Trace ID.
>> * [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
>> - * [59:08] - Unused (SBZ)
>> - * [63:60] - Version
>> + * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
>> + * Added in minor version 1.
>> + * [55:40] - Unused (SBZ)
>> + * [59:56] - Minor Version - previously existing fields are compatible with
>> + * all minor versions.
>> + * [63:60] - Major Version - previously existing fields mean different things
>> + * in new major versions.
>> */
>> #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
>> -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
>> +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
>>
>> -#define CS_AUX_HW_ID_CURR_VERSION 0
>> +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
>> +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
>> +
>> +#define CS_AUX_HW_ID_MAJOR_VERSION 0
>> +#define CS_AUX_HW_ID_MINOR_VERSION 1
>>
>> #endif
>> --
>> 2.34.1
>>
>
>
> --
> Mike Leach
> Principal Engineer, ARM Ltd.
> Manchester Design Centre. UK
Hi Mike,
I think you miss-sent this one
^ permalink raw reply [flat|nested] 40+ messages in thread
* Re: [PATCH v5 16/17] coresight: Emit sink ID in the HW_ID packets
2024-07-19 9:29 ` James Clark
@ 2024-07-19 9:48 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-19 9:48 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
Indeed! missed out the:
Reviewed-by: Mike Leach <mike.leach@linaro.org>
On Fri, 19 Jul 2024 at 10:29, James Clark <james.clark@linaro.org> wrote:
>
>
>
> On 17/07/2024 4:03 pm, Mike Leach wrote:
> > On Fri, 12 Jul 2024 at 11:23, James Clark <james.clark@linaro.org> wrote:
> >>
> >> From: James Clark <james.clark@arm.com>
> >>
> >> For Perf to be able to decode when per-sink trace IDs are used, emit the
> >> sink that's being written to for each ETM.
> >>
> >> Perf currently errors out if it sees a newer packet version so instead
> >> of bumping it, add a new minor version field. This can be used to
> >> signify new versions that have backwards compatible fields. Considering
> >> this change is only for high core count machines, it doesn't make sense
> >> to make a breaking change for everyone.
> >>
> >> Signed-off-by: James Clark <james.clark@arm.com>
> >> Signed-off-by: James Clark <james.clark@linaro.org>
> >> ---
> >> drivers/hwtracing/coresight/coresight-core.c | 26 ++++++++++---------
> >> .../hwtracing/coresight/coresight-etm-perf.c | 16 ++++++++----
> >> drivers/hwtracing/coresight/coresight-priv.h | 1 +
> >> include/linux/coresight-pmu.h | 17 +++++++++---
> >> 4 files changed, 39 insertions(+), 21 deletions(-)
> >>
> >> diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
> >> index faf560ba8d64..c427e9344a84 100644
> >> --- a/drivers/hwtracing/coresight/coresight-core.c
> >> +++ b/drivers/hwtracing/coresight/coresight-core.c
> >> @@ -487,23 +487,25 @@ struct coresight_device *coresight_get_sink(struct list_head *path)
> >> return csdev;
> >> }
> >>
> >> +u32 coresight_get_sink_id(struct coresight_device *csdev)
> >> +{
> >> + if (!csdev->ea)
> >> + return 0;
> >> +
> >> + /*
> >> + * See function etm_perf_add_symlink_sink() to know where
> >> + * this comes from.
> >> + */
> >> + return (u32) (unsigned long) csdev->ea->var;
> >> +}
> >> +
> >> static int coresight_sink_by_id(struct device *dev, const void *data)
> >> {
> >> struct coresight_device *csdev = to_coresight_device(dev);
> >> - unsigned long hash;
> >>
> >> if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
> >> - csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
> >> -
> >> - if (!csdev->ea)
> >> - return 0;
> >> - /*
> >> - * See function etm_perf_add_symlink_sink() to know where
> >> - * this comes from.
> >> - */
> >> - hash = (unsigned long)csdev->ea->var;
> >> -
> >> - if ((u32)hash == *(u32 *)data)
> >> + csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
> >> + if (coresight_get_sink_id(csdev) == *(u32 *)data)
> >> return 1;
> >> }
> >>
> >> diff --git a/drivers/hwtracing/coresight/coresight-etm-perf.c b/drivers/hwtracing/coresight/coresight-etm-perf.c
> >> index 70c99f0409b2..ad6a8f4b70b6 100644
> >> --- a/drivers/hwtracing/coresight/coresight-etm-perf.c
> >> +++ b/drivers/hwtracing/coresight/coresight-etm-perf.c
> >> @@ -460,6 +460,7 @@ static void etm_event_start(struct perf_event *event, int flags)
> >> struct coresight_device *sink, *csdev = per_cpu(csdev_src, cpu);
> >> struct list_head *path;
> >> u64 hw_id;
> >> + u8 trace_id;
> >>
> >> if (!csdev)
> >> goto fail;
> >> @@ -512,11 +513,16 @@ static void etm_event_start(struct perf_event *event, int flags)
> >> */
> >> if (!cpumask_test_cpu(cpu, &event_data->aux_hwid_done)) {
> >> cpumask_set_cpu(cpu, &event_data->aux_hwid_done);
> >> - hw_id = FIELD_PREP(CS_AUX_HW_ID_VERSION_MASK,
> >> - CS_AUX_HW_ID_CURR_VERSION);
> >> - hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK,
> >> - coresight_trace_id_read_cpu_id_map(cpu,
> >> - &sink->perf_sink_id_map));
> >> +
> >> + trace_id = coresight_trace_id_read_cpu_id_map(cpu, &sink->perf_sink_id_map);
> >> +
> >> + hw_id = FIELD_PREP(CS_AUX_HW_ID_MAJOR_VERSION_MASK,
> >> + CS_AUX_HW_ID_MAJOR_VERSION);
> >> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_MINOR_VERSION_MASK,
> >> + CS_AUX_HW_ID_MINOR_VERSION);
> >> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_TRACE_ID_MASK, trace_id);
> >> + hw_id |= FIELD_PREP(CS_AUX_HW_ID_SINK_ID_MASK, coresight_get_sink_id(sink));
> >> +
> >> perf_report_aux_output_id(event, hw_id);
> >> }
> >>
> >> diff --git a/drivers/hwtracing/coresight/coresight-priv.h b/drivers/hwtracing/coresight/coresight-priv.h
> >> index 61a46d3bdcc8..05f891ca6b5c 100644
> >> --- a/drivers/hwtracing/coresight/coresight-priv.h
> >> +++ b/drivers/hwtracing/coresight/coresight-priv.h
> >> @@ -148,6 +148,7 @@ int coresight_make_links(struct coresight_device *orig,
> >> struct coresight_device *target);
> >> void coresight_remove_links(struct coresight_device *orig,
> >> struct coresight_connection *conn);
> >> +u32 coresight_get_sink_id(struct coresight_device *csdev);
> >>
> >> #if IS_ENABLED(CONFIG_CORESIGHT_SOURCE_ETM3X)
> >> extern int etm_readl_cp14(u32 off, unsigned int *val);
> >> diff --git a/include/linux/coresight-pmu.h b/include/linux/coresight-pmu.h
> >> index 51ac441a37c3..89b0ac0014b0 100644
> >> --- a/include/linux/coresight-pmu.h
> >> +++ b/include/linux/coresight-pmu.h
> >> @@ -49,12 +49,21 @@
> >> * Interpretation of the PERF_RECORD_AUX_OUTPUT_HW_ID payload.
> >> * Used to associate a CPU with the CoreSight Trace ID.
> >> * [07:00] - Trace ID - uses 8 bits to make value easy to read in file.
> >> - * [59:08] - Unused (SBZ)
> >> - * [63:60] - Version
> >> + * [39:08] - Sink ID - as reported in /sys/bus/event_source/devices/cs_etm/sinks/
> >> + * Added in minor version 1.
> >> + * [55:40] - Unused (SBZ)
> >> + * [59:56] - Minor Version - previously existing fields are compatible with
> >> + * all minor versions.
> >> + * [63:60] - Major Version - previously existing fields mean different things
> >> + * in new major versions.
> >> */
> >> #define CS_AUX_HW_ID_TRACE_ID_MASK GENMASK_ULL(7, 0)
> >> -#define CS_AUX_HW_ID_VERSION_MASK GENMASK_ULL(63, 60)
> >> +#define CS_AUX_HW_ID_SINK_ID_MASK GENMASK_ULL(39, 8)
> >>
> >> -#define CS_AUX_HW_ID_CURR_VERSION 0
> >> +#define CS_AUX_HW_ID_MINOR_VERSION_MASK GENMASK_ULL(59, 56)
> >> +#define CS_AUX_HW_ID_MAJOR_VERSION_MASK GENMASK_ULL(63, 60)
> >> +
> >> +#define CS_AUX_HW_ID_MAJOR_VERSION 0
> >> +#define CS_AUX_HW_ID_MINOR_VERSION 1
> >>
> >> #endif
> >> --
> >> 2.34.1
> >>
> >
> >
> > --
> > Mike Leach
> > Principal Engineer, ARM Ltd.
> > Manchester Design Centre. UK
>
> Hi Mike,
>
> I think you miss-sent this one
>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread
* [PATCH v5 17/17] coresight: Make trace ID map spinlock local to the map
2024-07-12 10:20 [PATCH v5 00/17] coresight: Use per-sink trace ID maps for Perf sessions James Clark
` (15 preceding siblings ...)
2024-07-12 10:20 ` [PATCH v5 16/17] coresight: Emit sink ID in the HW_ID packets James Clark
@ 2024-07-12 10:20 ` James Clark
2024-07-17 15:03 ` Mike Leach
16 siblings, 1 reply; 40+ messages in thread
From: James Clark @ 2024-07-12 10:20 UTC (permalink / raw)
To: coresight, suzuki.poulose, gankulkarni, mike.leach, leo.yan,
anshuman.khandual
Cc: James Clark, James Clark, Alexander Shishkin, Maxime Coquelin,
Alexandre Torgue, John Garry, Will Deacon, Peter Zijlstra,
Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Mark Rutland,
Jiri Olsa, Ian Rogers, Adrian Hunter, Liang, Kan, linux-kernel,
linux-arm-kernel, linux-stm32, linux-perf-users
From: James Clark <james.clark@arm.com>
Reduce contention on the lock by replacing the global lock with one for
each map.
Signed-off-by: James Clark <james.clark@arm.com>
Signed-off-by: James Clark <james.clark@linaro.org>
---
drivers/hwtracing/coresight/coresight-core.c | 1 +
.../hwtracing/coresight/coresight-trace-id.c | 26 +++++++++----------
include/linux/coresight.h | 1 +
3 files changed, 14 insertions(+), 14 deletions(-)
diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
index c427e9344a84..ea38ecf26fcb 100644
--- a/drivers/hwtracing/coresight/coresight-core.c
+++ b/drivers/hwtracing/coresight/coresight-core.c
@@ -1164,6 +1164,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+ spin_lock_init(&csdev->perf_sink_id_map.lock);
csdev->perf_sink_id_map.cpu_map = alloc_percpu(atomic_t);
if (!csdev->perf_sink_id_map.cpu_map) {
kfree(csdev);
diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
index bddaed3e5cf8..d98e12cb30ec 100644
--- a/drivers/hwtracing/coresight/coresight-trace-id.c
+++ b/drivers/hwtracing/coresight/coresight-trace-id.c
@@ -15,12 +15,10 @@
/* Default trace ID map. Used in sysfs mode and for system sources */
static DEFINE_PER_CPU(atomic_t, id_map_default_cpu_ids) = ATOMIC_INIT(0);
static struct coresight_trace_id_map id_map_default = {
- .cpu_map = &id_map_default_cpu_ids
+ .cpu_map = &id_map_default_cpu_ids,
+ .lock = __SPIN_LOCK_UNLOCKED(id_map_default.lock)
};
-/* lock to protect id_map and cpu data */
-static DEFINE_SPINLOCK(id_map_lock);
-
/* #define TRACE_ID_DEBUG 1 */
#if defined(TRACE_ID_DEBUG) || defined(CONFIG_COMPILE_TEST)
@@ -123,11 +121,11 @@ static void coresight_trace_id_release_all(struct coresight_trace_id_map *id_map
unsigned long flags;
int cpu;
- spin_lock_irqsave(&id_map_lock, flags);
+ spin_lock_irqsave(&id_map->lock, flags);
bitmap_zero(id_map->used_ids, CORESIGHT_TRACE_IDS_MAX);
for_each_possible_cpu(cpu)
atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
- spin_unlock_irqrestore(&id_map_lock, flags);
+ spin_unlock_irqrestore(&id_map->lock, flags);
DUMP_ID_MAP(id_map);
}
@@ -136,7 +134,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
unsigned long flags;
int id;
- spin_lock_irqsave(&id_map_lock, flags);
+ spin_lock_irqsave(&id_map->lock, flags);
/* check for existing allocation for this CPU */
id = _coresight_trace_id_read_cpu_id(cpu, id_map);
@@ -163,7 +161,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), id);
get_cpu_id_out_unlock:
- spin_unlock_irqrestore(&id_map_lock, flags);
+ spin_unlock_irqrestore(&id_map->lock, flags);
DUMP_ID_CPU(cpu, id);
DUMP_ID_MAP(id_map);
@@ -180,12 +178,12 @@ static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_ma
if (!id)
return;
- spin_lock_irqsave(&id_map_lock, flags);
+ spin_lock_irqsave(&id_map->lock, flags);
coresight_trace_id_free(id, id_map);
atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
- spin_unlock_irqrestore(&id_map_lock, flags);
+ spin_unlock_irqrestore(&id_map->lock, flags);
DUMP_ID_CPU(cpu, id);
DUMP_ID_MAP(id_map);
}
@@ -195,10 +193,10 @@ static int coresight_trace_id_map_get_system_id(struct coresight_trace_id_map *i
unsigned long flags;
int id;
- spin_lock_irqsave(&id_map_lock, flags);
+ spin_lock_irqsave(&id_map->lock, flags);
/* prefer odd IDs for system components to avoid legacy CPU IDS */
id = coresight_trace_id_alloc_new_id(id_map, 0, true);
- spin_unlock_irqrestore(&id_map_lock, flags);
+ spin_unlock_irqrestore(&id_map->lock, flags);
DUMP_ID(id);
DUMP_ID_MAP(id_map);
@@ -209,9 +207,9 @@ static void coresight_trace_id_map_put_system_id(struct coresight_trace_id_map *
{
unsigned long flags;
- spin_lock_irqsave(&id_map_lock, flags);
+ spin_lock_irqsave(&id_map->lock, flags);
coresight_trace_id_free(id, id_map);
- spin_unlock_irqrestore(&id_map_lock, flags);
+ spin_unlock_irqrestore(&id_map->lock, flags);
DUMP_ID(id);
DUMP_ID_MAP(id_map);
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index 197949fd2c35..c13342594278 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -233,6 +233,7 @@ struct coresight_trace_id_map {
DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
atomic_t __percpu *cpu_map;
atomic_t perf_cs_etm_session_active;
+ spinlock_t lock;
};
/**
--
2.34.1
^ permalink raw reply related [flat|nested] 40+ messages in thread
* Re: [PATCH v5 17/17] coresight: Make trace ID map spinlock local to the map
2024-07-12 10:20 ` [PATCH v5 17/17] coresight: Make trace ID map spinlock local to the map James Clark
@ 2024-07-17 15:03 ` Mike Leach
0 siblings, 0 replies; 40+ messages in thread
From: Mike Leach @ 2024-07-17 15:03 UTC (permalink / raw)
To: James Clark
Cc: coresight, suzuki.poulose, gankulkarni, leo.yan,
anshuman.khandual, James Clark, Alexander Shishkin,
Maxime Coquelin, Alexandre Torgue, John Garry, Will Deacon,
Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Mark Rutland, Jiri Olsa, Ian Rogers, Adrian Hunter,
Liang, Kan, linux-kernel, linux-arm-kernel, linux-stm32,
linux-perf-users
Reviewed-by: Mike Leach <mike.leach@linaro.org>
On Fri, 12 Jul 2024 at 11:23, James Clark <james.clark@linaro.org> wrote:
>
> From: James Clark <james.clark@arm.com>
>
> Reduce contention on the lock by replacing the global lock with one for
> each map.
>
> Signed-off-by: James Clark <james.clark@arm.com>
> Signed-off-by: James Clark <james.clark@linaro.org>
> ---
> drivers/hwtracing/coresight/coresight-core.c | 1 +
> .../hwtracing/coresight/coresight-trace-id.c | 26 +++++++++----------
> include/linux/coresight.h | 1 +
> 3 files changed, 14 insertions(+), 14 deletions(-)
>
> diff --git a/drivers/hwtracing/coresight/coresight-core.c b/drivers/hwtracing/coresight/coresight-core.c
> index c427e9344a84..ea38ecf26fcb 100644
> --- a/drivers/hwtracing/coresight/coresight-core.c
> +++ b/drivers/hwtracing/coresight/coresight-core.c
> @@ -1164,6 +1164,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
>
> if (csdev->type == CORESIGHT_DEV_TYPE_SINK ||
> csdev->type == CORESIGHT_DEV_TYPE_LINKSINK) {
> + spin_lock_init(&csdev->perf_sink_id_map.lock);
> csdev->perf_sink_id_map.cpu_map = alloc_percpu(atomic_t);
> if (!csdev->perf_sink_id_map.cpu_map) {
> kfree(csdev);
> diff --git a/drivers/hwtracing/coresight/coresight-trace-id.c b/drivers/hwtracing/coresight/coresight-trace-id.c
> index bddaed3e5cf8..d98e12cb30ec 100644
> --- a/drivers/hwtracing/coresight/coresight-trace-id.c
> +++ b/drivers/hwtracing/coresight/coresight-trace-id.c
> @@ -15,12 +15,10 @@
> /* Default trace ID map. Used in sysfs mode and for system sources */
> static DEFINE_PER_CPU(atomic_t, id_map_default_cpu_ids) = ATOMIC_INIT(0);
> static struct coresight_trace_id_map id_map_default = {
> - .cpu_map = &id_map_default_cpu_ids
> + .cpu_map = &id_map_default_cpu_ids,
> + .lock = __SPIN_LOCK_UNLOCKED(id_map_default.lock)
> };
>
> -/* lock to protect id_map and cpu data */
> -static DEFINE_SPINLOCK(id_map_lock);
> -
> /* #define TRACE_ID_DEBUG 1 */
> #if defined(TRACE_ID_DEBUG) || defined(CONFIG_COMPILE_TEST)
>
> @@ -123,11 +121,11 @@ static void coresight_trace_id_release_all(struct coresight_trace_id_map *id_map
> unsigned long flags;
> int cpu;
>
> - spin_lock_irqsave(&id_map_lock, flags);
> + spin_lock_irqsave(&id_map->lock, flags);
> bitmap_zero(id_map->used_ids, CORESIGHT_TRACE_IDS_MAX);
> for_each_possible_cpu(cpu)
> atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
> - spin_unlock_irqrestore(&id_map_lock, flags);
> + spin_unlock_irqrestore(&id_map->lock, flags);
> DUMP_ID_MAP(id_map);
> }
>
> @@ -136,7 +134,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
> unsigned long flags;
> int id;
>
> - spin_lock_irqsave(&id_map_lock, flags);
> + spin_lock_irqsave(&id_map->lock, flags);
>
> /* check for existing allocation for this CPU */
> id = _coresight_trace_id_read_cpu_id(cpu, id_map);
> @@ -163,7 +161,7 @@ static int _coresight_trace_id_get_cpu_id(int cpu, struct coresight_trace_id_map
> atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), id);
>
> get_cpu_id_out_unlock:
> - spin_unlock_irqrestore(&id_map_lock, flags);
> + spin_unlock_irqrestore(&id_map->lock, flags);
>
> DUMP_ID_CPU(cpu, id);
> DUMP_ID_MAP(id_map);
> @@ -180,12 +178,12 @@ static void _coresight_trace_id_put_cpu_id(int cpu, struct coresight_trace_id_ma
> if (!id)
> return;
>
> - spin_lock_irqsave(&id_map_lock, flags);
> + spin_lock_irqsave(&id_map->lock, flags);
>
> coresight_trace_id_free(id, id_map);
> atomic_set(per_cpu_ptr(id_map->cpu_map, cpu), 0);
>
> - spin_unlock_irqrestore(&id_map_lock, flags);
> + spin_unlock_irqrestore(&id_map->lock, flags);
> DUMP_ID_CPU(cpu, id);
> DUMP_ID_MAP(id_map);
> }
> @@ -195,10 +193,10 @@ static int coresight_trace_id_map_get_system_id(struct coresight_trace_id_map *i
> unsigned long flags;
> int id;
>
> - spin_lock_irqsave(&id_map_lock, flags);
> + spin_lock_irqsave(&id_map->lock, flags);
> /* prefer odd IDs for system components to avoid legacy CPU IDS */
> id = coresight_trace_id_alloc_new_id(id_map, 0, true);
> - spin_unlock_irqrestore(&id_map_lock, flags);
> + spin_unlock_irqrestore(&id_map->lock, flags);
>
> DUMP_ID(id);
> DUMP_ID_MAP(id_map);
> @@ -209,9 +207,9 @@ static void coresight_trace_id_map_put_system_id(struct coresight_trace_id_map *
> {
> unsigned long flags;
>
> - spin_lock_irqsave(&id_map_lock, flags);
> + spin_lock_irqsave(&id_map->lock, flags);
> coresight_trace_id_free(id, id_map);
> - spin_unlock_irqrestore(&id_map_lock, flags);
> + spin_unlock_irqrestore(&id_map->lock, flags);
>
> DUMP_ID(id);
> DUMP_ID_MAP(id_map);
> diff --git a/include/linux/coresight.h b/include/linux/coresight.h
> index 197949fd2c35..c13342594278 100644
> --- a/include/linux/coresight.h
> +++ b/include/linux/coresight.h
> @@ -233,6 +233,7 @@ struct coresight_trace_id_map {
> DECLARE_BITMAP(used_ids, CORESIGHT_TRACE_IDS_MAX);
> atomic_t __percpu *cpu_map;
> atomic_t perf_cs_etm_session_active;
> + spinlock_t lock;
> };
>
> /**
> --
> 2.34.1
>
--
Mike Leach
Principal Engineer, ARM Ltd.
Manchester Design Centre. UK
^ permalink raw reply [flat|nested] 40+ messages in thread