* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
@ 2019-06-06 20:10 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 13+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-06-06 20:10 UTC (permalink / raw)
To: Mathieu Poirier
Cc: suzuki.poulose, leo.yan, peterz, mingo, alexander.shishkin, jolsa,
linux-arm-kernel, linux-kernel
Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> This patch adds the necessay intelligence to properly compute the value
> of 'old' and 'head' when operating in snapshot mode. That way we can get
> the latest information in the AUX buffer and be compatible with the
> generic AUX ring buffer mechanic.
Leo, have you had the chance to test/review this one? Suzuki?
I also changed the subject to:
[PATCH] perf cs-etm: Properly set the value of 'old' and 'head' in snapshot mode
So that when looking at a 'git log --oneline' one can have the proper
context and know that its about cs-etm.
- Arnaldo
> Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> ---
> tools/perf/arch/arm/util/cs-etm.c | 127 +++++++++++++++++++++++++++++-
> 1 file changed, 123 insertions(+), 4 deletions(-)
>
> diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> index 911426721170..0a278bbcaba6 100644
> --- a/tools/perf/arch/arm/util/cs-etm.c
> +++ b/tools/perf/arch/arm/util/cs-etm.c
> @@ -31,6 +31,8 @@ struct cs_etm_recording {
> struct auxtrace_record itr;
> struct perf_pmu *cs_etm_pmu;
> struct perf_evlist *evlist;
> + int wrapped_cnt;
> + bool *wrapped;
> bool snapshot_mode;
> size_t snapshot_size;
> };
> @@ -536,16 +538,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> return 0;
> }
>
> -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
> +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
> +{
> + bool *wrapped;
> + int cnt = ptr->wrapped_cnt;
> +
> + /* Make @ptr->wrapped as big as @idx */
> + while (cnt <= idx)
> + cnt++;
> +
> + /*
> + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
> + * cross compilation problems where the host's system supports
> + * reallocarray() but not the target.
> + */
> + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
> + if (!wrapped)
> + return -ENOMEM;
> +
> + wrapped[cnt - 1] = false;
> + ptr->wrapped_cnt = cnt;
> + ptr->wrapped = wrapped;
> +
> + return 0;
> +}
> +
> +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
> + size_t buffer_size, u64 head)
> +{
> + u64 i, watermark;
> + u64 *buf = (u64 *)buffer;
> + size_t buf_size = buffer_size;
> +
> + /*
> + * We want to look the very last 512 byte (chosen arbitrarily) in
> + * the ring buffer.
> + */
> + watermark = buf_size - 512;
> +
> + /*
> + * @head is continuously increasing - if its value is equal or greater
> + * than the size of the ring buffer, it has wrapped around.
> + */
> + if (head >= buffer_size)
> + return true;
> +
> + /*
> + * The value of @head is somewhere within the size of the ring buffer.
> + * This can be that there hasn't been enough data to fill the ring
> + * buffer yet or the trace time was so long that @head has numerically
> + * wrapped around. To find we need to check if we have data at the very
> + * end of the ring buffer. We can reliably do this because mmap'ed
> + * pages are zeroed out and there is a fresh mapping with every new
> + * session.
> + */
> +
> + /* @head is less than 512 byte from the end of the ring buffer */
> + if (head > watermark)
> + watermark = head;
> +
> + /*
> + * Speed things up by using 64 bit transactions (see "u64 *buf" above)
> + */
> + watermark >>= 3;
> + buf_size >>= 3;
> +
> + /*
> + * If we find trace data at the end of the ring buffer, @head has
> + * been there and has numerically wrapped around at least once.
> + */
> + for (i = watermark; i < buf_size; i++)
> + if (buf[i])
> + return true;
> +
> + return false;
> +}
> +
> +static int cs_etm_find_snapshot(struct auxtrace_record *itr,
> int idx, struct auxtrace_mmap *mm,
> - unsigned char *data __maybe_unused,
> + unsigned char *data,
> u64 *head, u64 *old)
> {
> + int err;
> + bool wrapped;
> + struct cs_etm_recording *ptr =
> + container_of(itr, struct cs_etm_recording, itr);
> +
> + /*
> + * Allocate memory to keep track of wrapping if this is the first
> + * time we deal with this *mm.
> + */
> + if (idx >= ptr->wrapped_cnt) {
> + err = cs_etm_alloc_wrapped_array(ptr, idx);
> + if (err)
> + return err;
> + }
> +
> + /*
> + * Check to see if *head has wrapped around. If it hasn't only the
> + * amount of data between *head and *old is snapshot'ed to avoid
> + * bloating the perf.data file with zeros. But as soon as *head has
> + * wrapped around the entire size of the AUX ring buffer it taken.
> + */
> + wrapped = ptr->wrapped[idx];
> + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
> + wrapped = true;
> + ptr->wrapped[idx] = true;
> + }
> +
> pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
> __func__, idx, (size_t)*old, (size_t)*head, mm->len);
>
> - *old = *head;
> - *head += mm->len;
> + /* No wrap has occurred, we can just use *head and *old. */
> + if (!wrapped)
> + return 0;
> +
> + /*
> + * *head has wrapped around - adjust *head and *old to pickup the
> + * entire content of the AUX buffer.
> + */
> + if (*head >= mm->len) {
> + *old = *head - mm->len;
> + } else {
> + *head += mm->len;
> + *old = *head - mm->len;
> + }
>
> return 0;
> }
> @@ -586,6 +703,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
> {
> struct cs_etm_recording *ptr =
> container_of(itr, struct cs_etm_recording, itr);
> +
> + zfree(&ptr->wrapped);
> free(ptr);
> }
>
> --
> 2.17.1
--
- Arnaldo
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
2019-06-06 20:10 ` Arnaldo Carvalho de Melo
@ 2019-06-07 6:44 ` Leo Yan
-1 siblings, 0 replies; 13+ messages in thread
From: Leo Yan @ 2019-06-07 6:44 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Mathieu Poirier, suzuki.poulose, peterz, linux-kernel,
alexander.shishkin, mingo, jolsa, linux-arm-kernel
On Thu, Jun 06, 2019 at 05:10:56PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> > This patch adds the necessay intelligence to properly compute the value
> > of 'old' and 'head' when operating in snapshot mode. That way we can get
> > the latest information in the AUX buffer and be compatible with the
> > generic AUX ring buffer mechanic.
>
> Leo, have you had the chance to test/review this one? Suzuki?
Sure. I applied this patch on the perf/core branch (with latest
commit 3e4fbf36c1e3 'perf augmented_raw_syscalls: Move reading
filename to the loop') and passed testing with below steps:
# perf record -e cs_etm/@tmc_etr0/ -S -m,64 --per-thread ./sort &
[1] 19097
Bubble sorting array of 30000 elements
# kill -USR2 19097
# kill -USR2 19097
# kill -USR2 19097
[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.753 MB perf.data ]
FWIW:
Tested-by: Leo Yan <leo.yan@linaro.org>
> I also changed the subject to:
>
> [PATCH] perf cs-etm: Properly set the value of 'old' and 'head' in snapshot mode
>
> So that when looking at a 'git log --oneline' one can have the proper
> context and know that its about cs-etm.
>
> - Arnaldo
>
> > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> > ---
> > tools/perf/arch/arm/util/cs-etm.c | 127 +++++++++++++++++++++++++++++-
> > 1 file changed, 123 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > index 911426721170..0a278bbcaba6 100644
> > --- a/tools/perf/arch/arm/util/cs-etm.c
> > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > @@ -31,6 +31,8 @@ struct cs_etm_recording {
> > struct auxtrace_record itr;
> > struct perf_pmu *cs_etm_pmu;
> > struct perf_evlist *evlist;
> > + int wrapped_cnt;
> > + bool *wrapped;
> > bool snapshot_mode;
> > size_t snapshot_size;
> > };
> > @@ -536,16 +538,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> > return 0;
> > }
> >
> > -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
> > +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
> > +{
> > + bool *wrapped;
> > + int cnt = ptr->wrapped_cnt;
> > +
> > + /* Make @ptr->wrapped as big as @idx */
> > + while (cnt <= idx)
> > + cnt++;
> > +
> > + /*
> > + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
> > + * cross compilation problems where the host's system supports
> > + * reallocarray() but not the target.
> > + */
> > + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
> > + if (!wrapped)
> > + return -ENOMEM;
> > +
> > + wrapped[cnt - 1] = false;
> > + ptr->wrapped_cnt = cnt;
> > + ptr->wrapped = wrapped;
> > +
> > + return 0;
> > +}
> > +
> > +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
> > + size_t buffer_size, u64 head)
> > +{
> > + u64 i, watermark;
> > + u64 *buf = (u64 *)buffer;
> > + size_t buf_size = buffer_size;
> > +
> > + /*
> > + * We want to look the very last 512 byte (chosen arbitrarily) in
> > + * the ring buffer.
> > + */
> > + watermark = buf_size - 512;
> > +
> > + /*
> > + * @head is continuously increasing - if its value is equal or greater
> > + * than the size of the ring buffer, it has wrapped around.
> > + */
> > + if (head >= buffer_size)
> > + return true;
> > +
> > + /*
> > + * The value of @head is somewhere within the size of the ring buffer.
> > + * This can be that there hasn't been enough data to fill the ring
> > + * buffer yet or the trace time was so long that @head has numerically
> > + * wrapped around. To find we need to check if we have data at the very
> > + * end of the ring buffer. We can reliably do this because mmap'ed
> > + * pages are zeroed out and there is a fresh mapping with every new
> > + * session.
> > + */
> > +
> > + /* @head is less than 512 byte from the end of the ring buffer */
> > + if (head > watermark)
> > + watermark = head;
> > +
> > + /*
> > + * Speed things up by using 64 bit transactions (see "u64 *buf" above)
> > + */
> > + watermark >>= 3;
> > + buf_size >>= 3;
> > +
> > + /*
> > + * If we find trace data at the end of the ring buffer, @head has
> > + * been there and has numerically wrapped around at least once.
> > + */
> > + for (i = watermark; i < buf_size; i++)
> > + if (buf[i])
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +static int cs_etm_find_snapshot(struct auxtrace_record *itr,
> > int idx, struct auxtrace_mmap *mm,
> > - unsigned char *data __maybe_unused,
> > + unsigned char *data,
> > u64 *head, u64 *old)
> > {
> > + int err;
> > + bool wrapped;
> > + struct cs_etm_recording *ptr =
> > + container_of(itr, struct cs_etm_recording, itr);
> > +
> > + /*
> > + * Allocate memory to keep track of wrapping if this is the first
> > + * time we deal with this *mm.
> > + */
> > + if (idx >= ptr->wrapped_cnt) {
> > + err = cs_etm_alloc_wrapped_array(ptr, idx);
> > + if (err)
> > + return err;
> > + }
> > +
> > + /*
> > + * Check to see if *head has wrapped around. If it hasn't only the
> > + * amount of data between *head and *old is snapshot'ed to avoid
> > + * bloating the perf.data file with zeros. But as soon as *head has
> > + * wrapped around the entire size of the AUX ring buffer it taken.
> > + */
> > + wrapped = ptr->wrapped[idx];
> > + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
> > + wrapped = true;
> > + ptr->wrapped[idx] = true;
> > + }
> > +
> > pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
> > __func__, idx, (size_t)*old, (size_t)*head, mm->len);
> >
> > - *old = *head;
> > - *head += mm->len;
> > + /* No wrap has occurred, we can just use *head and *old. */
> > + if (!wrapped)
> > + return 0;
> > +
> > + /*
> > + * *head has wrapped around - adjust *head and *old to pickup the
> > + * entire content of the AUX buffer.
> > + */
> > + if (*head >= mm->len) {
> > + *old = *head - mm->len;
> > + } else {
> > + *head += mm->len;
> > + *old = *head - mm->len;
> > + }
> >
> > return 0;
> > }
> > @@ -586,6 +703,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
> > {
> > struct cs_etm_recording *ptr =
> > container_of(itr, struct cs_etm_recording, itr);
> > +
> > + zfree(&ptr->wrapped);
> > free(ptr);
> > }
> >
> > --
> > 2.17.1
>
> --
>
> - Arnaldo
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
@ 2019-06-07 6:44 ` Leo Yan
0 siblings, 0 replies; 13+ messages in thread
From: Leo Yan @ 2019-06-07 6:44 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Mathieu Poirier, suzuki.poulose, peterz, mingo,
alexander.shishkin, jolsa, linux-arm-kernel, linux-kernel
On Thu, Jun 06, 2019 at 05:10:56PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> > This patch adds the necessay intelligence to properly compute the value
> > of 'old' and 'head' when operating in snapshot mode. That way we can get
> > the latest information in the AUX buffer and be compatible with the
> > generic AUX ring buffer mechanic.
>
> Leo, have you had the chance to test/review this one? Suzuki?
Sure. I applied this patch on the perf/core branch (with latest
commit 3e4fbf36c1e3 'perf augmented_raw_syscalls: Move reading
filename to the loop') and passed testing with below steps:
# perf record -e cs_etm/@tmc_etr0/ -S -m,64 --per-thread ./sort &
[1] 19097
Bubble sorting array of 30000 elements
# kill -USR2 19097
# kill -USR2 19097
# kill -USR2 19097
[ perf record: Woken up 4 times to write data ]
[ perf record: Captured and wrote 0.753 MB perf.data ]
FWIW:
Tested-by: Leo Yan <leo.yan@linaro.org>
> I also changed the subject to:
>
> [PATCH] perf cs-etm: Properly set the value of 'old' and 'head' in snapshot mode
>
> So that when looking at a 'git log --oneline' one can have the proper
> context and know that its about cs-etm.
>
> - Arnaldo
>
> > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> > ---
> > tools/perf/arch/arm/util/cs-etm.c | 127 +++++++++++++++++++++++++++++-
> > 1 file changed, 123 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > index 911426721170..0a278bbcaba6 100644
> > --- a/tools/perf/arch/arm/util/cs-etm.c
> > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > @@ -31,6 +31,8 @@ struct cs_etm_recording {
> > struct auxtrace_record itr;
> > struct perf_pmu *cs_etm_pmu;
> > struct perf_evlist *evlist;
> > + int wrapped_cnt;
> > + bool *wrapped;
> > bool snapshot_mode;
> > size_t snapshot_size;
> > };
> > @@ -536,16 +538,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> > return 0;
> > }
> >
> > -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
> > +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
> > +{
> > + bool *wrapped;
> > + int cnt = ptr->wrapped_cnt;
> > +
> > + /* Make @ptr->wrapped as big as @idx */
> > + while (cnt <= idx)
> > + cnt++;
> > +
> > + /*
> > + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
> > + * cross compilation problems where the host's system supports
> > + * reallocarray() but not the target.
> > + */
> > + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
> > + if (!wrapped)
> > + return -ENOMEM;
> > +
> > + wrapped[cnt - 1] = false;
> > + ptr->wrapped_cnt = cnt;
> > + ptr->wrapped = wrapped;
> > +
> > + return 0;
> > +}
> > +
> > +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
> > + size_t buffer_size, u64 head)
> > +{
> > + u64 i, watermark;
> > + u64 *buf = (u64 *)buffer;
> > + size_t buf_size = buffer_size;
> > +
> > + /*
> > + * We want to look the very last 512 byte (chosen arbitrarily) in
> > + * the ring buffer.
> > + */
> > + watermark = buf_size - 512;
> > +
> > + /*
> > + * @head is continuously increasing - if its value is equal or greater
> > + * than the size of the ring buffer, it has wrapped around.
> > + */
> > + if (head >= buffer_size)
> > + return true;
> > +
> > + /*
> > + * The value of @head is somewhere within the size of the ring buffer.
> > + * This can be that there hasn't been enough data to fill the ring
> > + * buffer yet or the trace time was so long that @head has numerically
> > + * wrapped around. To find we need to check if we have data at the very
> > + * end of the ring buffer. We can reliably do this because mmap'ed
> > + * pages are zeroed out and there is a fresh mapping with every new
> > + * session.
> > + */
> > +
> > + /* @head is less than 512 byte from the end of the ring buffer */
> > + if (head > watermark)
> > + watermark = head;
> > +
> > + /*
> > + * Speed things up by using 64 bit transactions (see "u64 *buf" above)
> > + */
> > + watermark >>= 3;
> > + buf_size >>= 3;
> > +
> > + /*
> > + * If we find trace data at the end of the ring buffer, @head has
> > + * been there and has numerically wrapped around at least once.
> > + */
> > + for (i = watermark; i < buf_size; i++)
> > + if (buf[i])
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +static int cs_etm_find_snapshot(struct auxtrace_record *itr,
> > int idx, struct auxtrace_mmap *mm,
> > - unsigned char *data __maybe_unused,
> > + unsigned char *data,
> > u64 *head, u64 *old)
> > {
> > + int err;
> > + bool wrapped;
> > + struct cs_etm_recording *ptr =
> > + container_of(itr, struct cs_etm_recording, itr);
> > +
> > + /*
> > + * Allocate memory to keep track of wrapping if this is the first
> > + * time we deal with this *mm.
> > + */
> > + if (idx >= ptr->wrapped_cnt) {
> > + err = cs_etm_alloc_wrapped_array(ptr, idx);
> > + if (err)
> > + return err;
> > + }
> > +
> > + /*
> > + * Check to see if *head has wrapped around. If it hasn't only the
> > + * amount of data between *head and *old is snapshot'ed to avoid
> > + * bloating the perf.data file with zeros. But as soon as *head has
> > + * wrapped around the entire size of the AUX ring buffer it taken.
> > + */
> > + wrapped = ptr->wrapped[idx];
> > + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
> > + wrapped = true;
> > + ptr->wrapped[idx] = true;
> > + }
> > +
> > pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
> > __func__, idx, (size_t)*old, (size_t)*head, mm->len);
> >
> > - *old = *head;
> > - *head += mm->len;
> > + /* No wrap has occurred, we can just use *head and *old. */
> > + if (!wrapped)
> > + return 0;
> > +
> > + /*
> > + * *head has wrapped around - adjust *head and *old to pickup the
> > + * entire content of the AUX buffer.
> > + */
> > + if (*head >= mm->len) {
> > + *old = *head - mm->len;
> > + } else {
> > + *head += mm->len;
> > + *old = *head - mm->len;
> > + }
> >
> > return 0;
> > }
> > @@ -586,6 +703,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
> > {
> > struct cs_etm_recording *ptr =
> > container_of(itr, struct cs_etm_recording, itr);
> > +
> > + zfree(&ptr->wrapped);
> > free(ptr);
> > }
> >
> > --
> > 2.17.1
>
> --
>
> - Arnaldo
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
2019-06-07 6:44 ` Leo Yan
@ 2019-06-07 18:23 ` Arnaldo Carvalho de Melo
-1 siblings, 0 replies; 13+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-06-07 18:23 UTC (permalink / raw)
To: Leo Yan
Cc: Mathieu Poirier, suzuki.poulose, peterz, Arnaldo Carvalho de Melo,
linux-kernel, alexander.shishkin, mingo, jolsa, linux-arm-kernel
Em Fri, Jun 07, 2019 at 02:44:25PM +0800, Leo Yan escreveu:
> On Thu, Jun 06, 2019 at 05:10:56PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> > > This patch adds the necessay intelligence to properly compute the value
> > > of 'old' and 'head' when operating in snapshot mode. That way we can get
> > > the latest information in the AUX buffer and be compatible with the
> > > generic AUX ring buffer mechanic.
> >
> > Leo, have you had the chance to test/review this one? Suzuki?
>
> Sure. I applied this patch on the perf/core branch (with latest
> commit 3e4fbf36c1e3 'perf augmented_raw_syscalls: Move reading
> filename to the loop') and passed testing with below steps:
>
> # perf record -e cs_etm/@tmc_etr0/ -S -m,64 --per-thread ./sort &
> [1] 19097
> Bubble sorting array of 30000 elements
>
> # kill -USR2 19097
> # kill -USR2 19097
> # kill -USR2 19097
> [ perf record: Woken up 4 times to write data ]
> [ perf record: Captured and wrote 0.753 MB perf.data ]
>
> FWIW:
>
> Tested-by: Leo Yan <leo.yan@linaro.org>
Thanks a lot, I've added your "Tester notes:" and also your Tested-by:.
As I don't have hardware (yet) to test these patches, tests by people
who can test on real hardware is always super appreciated.
Any suggestions for a SBC that I could buy to be able to do so?
Regards,
- Arnaldo
> > I also changed the subject to:
> >
> > [PATCH] perf cs-etm: Properly set the value of 'old' and 'head' in snapshot mode
> >
> > So that when looking at a 'git log --oneline' one can have the proper
> > context and know that its about cs-etm.
> >
> > - Arnaldo
> >
> > > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> > > ---
> > > tools/perf/arch/arm/util/cs-etm.c | 127 +++++++++++++++++++++++++++++-
> > > 1 file changed, 123 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > > index 911426721170..0a278bbcaba6 100644
> > > --- a/tools/perf/arch/arm/util/cs-etm.c
> > > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > > @@ -31,6 +31,8 @@ struct cs_etm_recording {
> > > struct auxtrace_record itr;
> > > struct perf_pmu *cs_etm_pmu;
> > > struct perf_evlist *evlist;
> > > + int wrapped_cnt;
> > > + bool *wrapped;
> > > bool snapshot_mode;
> > > size_t snapshot_size;
> > > };
> > > @@ -536,16 +538,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> > > return 0;
> > > }
> > >
> > > -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
> > > +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
> > > +{
> > > + bool *wrapped;
> > > + int cnt = ptr->wrapped_cnt;
> > > +
> > > + /* Make @ptr->wrapped as big as @idx */
> > > + while (cnt <= idx)
> > > + cnt++;
> > > +
> > > + /*
> > > + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
> > > + * cross compilation problems where the host's system supports
> > > + * reallocarray() but not the target.
> > > + */
> > > + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
> > > + if (!wrapped)
> > > + return -ENOMEM;
> > > +
> > > + wrapped[cnt - 1] = false;
> > > + ptr->wrapped_cnt = cnt;
> > > + ptr->wrapped = wrapped;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
> > > + size_t buffer_size, u64 head)
> > > +{
> > > + u64 i, watermark;
> > > + u64 *buf = (u64 *)buffer;
> > > + size_t buf_size = buffer_size;
> > > +
> > > + /*
> > > + * We want to look the very last 512 byte (chosen arbitrarily) in
> > > + * the ring buffer.
> > > + */
> > > + watermark = buf_size - 512;
> > > +
> > > + /*
> > > + * @head is continuously increasing - if its value is equal or greater
> > > + * than the size of the ring buffer, it has wrapped around.
> > > + */
> > > + if (head >= buffer_size)
> > > + return true;
> > > +
> > > + /*
> > > + * The value of @head is somewhere within the size of the ring buffer.
> > > + * This can be that there hasn't been enough data to fill the ring
> > > + * buffer yet or the trace time was so long that @head has numerically
> > > + * wrapped around. To find we need to check if we have data at the very
> > > + * end of the ring buffer. We can reliably do this because mmap'ed
> > > + * pages are zeroed out and there is a fresh mapping with every new
> > > + * session.
> > > + */
> > > +
> > > + /* @head is less than 512 byte from the end of the ring buffer */
> > > + if (head > watermark)
> > > + watermark = head;
> > > +
> > > + /*
> > > + * Speed things up by using 64 bit transactions (see "u64 *buf" above)
> > > + */
> > > + watermark >>= 3;
> > > + buf_size >>= 3;
> > > +
> > > + /*
> > > + * If we find trace data at the end of the ring buffer, @head has
> > > + * been there and has numerically wrapped around at least once.
> > > + */
> > > + for (i = watermark; i < buf_size; i++)
> > > + if (buf[i])
> > > + return true;
> > > +
> > > + return false;
> > > +}
> > > +
> > > +static int cs_etm_find_snapshot(struct auxtrace_record *itr,
> > > int idx, struct auxtrace_mmap *mm,
> > > - unsigned char *data __maybe_unused,
> > > + unsigned char *data,
> > > u64 *head, u64 *old)
> > > {
> > > + int err;
> > > + bool wrapped;
> > > + struct cs_etm_recording *ptr =
> > > + container_of(itr, struct cs_etm_recording, itr);
> > > +
> > > + /*
> > > + * Allocate memory to keep track of wrapping if this is the first
> > > + * time we deal with this *mm.
> > > + */
> > > + if (idx >= ptr->wrapped_cnt) {
> > > + err = cs_etm_alloc_wrapped_array(ptr, idx);
> > > + if (err)
> > > + return err;
> > > + }
> > > +
> > > + /*
> > > + * Check to see if *head has wrapped around. If it hasn't only the
> > > + * amount of data between *head and *old is snapshot'ed to avoid
> > > + * bloating the perf.data file with zeros. But as soon as *head has
> > > + * wrapped around the entire size of the AUX ring buffer it taken.
> > > + */
> > > + wrapped = ptr->wrapped[idx];
> > > + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
> > > + wrapped = true;
> > > + ptr->wrapped[idx] = true;
> > > + }
> > > +
> > > pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
> > > __func__, idx, (size_t)*old, (size_t)*head, mm->len);
> > >
> > > - *old = *head;
> > > - *head += mm->len;
> > > + /* No wrap has occurred, we can just use *head and *old. */
> > > + if (!wrapped)
> > > + return 0;
> > > +
> > > + /*
> > > + * *head has wrapped around - adjust *head and *old to pickup the
> > > + * entire content of the AUX buffer.
> > > + */
> > > + if (*head >= mm->len) {
> > > + *old = *head - mm->len;
> > > + } else {
> > > + *head += mm->len;
> > > + *old = *head - mm->len;
> > > + }
> > >
> > > return 0;
> > > }
> > > @@ -586,6 +703,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
> > > {
> > > struct cs_etm_recording *ptr =
> > > container_of(itr, struct cs_etm_recording, itr);
> > > +
> > > + zfree(&ptr->wrapped);
> > > free(ptr);
> > > }
> > >
> > > --
> > > 2.17.1
> >
> > --
> >
> > - Arnaldo
--
- Arnaldo
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
@ 2019-06-07 18:23 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 13+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-06-07 18:23 UTC (permalink / raw)
To: Leo Yan
Cc: Arnaldo Carvalho de Melo, Mathieu Poirier, suzuki.poulose, peterz,
mingo, alexander.shishkin, jolsa, linux-arm-kernel, linux-kernel
Em Fri, Jun 07, 2019 at 02:44:25PM +0800, Leo Yan escreveu:
> On Thu, Jun 06, 2019 at 05:10:56PM -0300, Arnaldo Carvalho de Melo wrote:
> > Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> > > This patch adds the necessay intelligence to properly compute the value
> > > of 'old' and 'head' when operating in snapshot mode. That way we can get
> > > the latest information in the AUX buffer and be compatible with the
> > > generic AUX ring buffer mechanic.
> >
> > Leo, have you had the chance to test/review this one? Suzuki?
>
> Sure. I applied this patch on the perf/core branch (with latest
> commit 3e4fbf36c1e3 'perf augmented_raw_syscalls: Move reading
> filename to the loop') and passed testing with below steps:
>
> # perf record -e cs_etm/@tmc_etr0/ -S -m,64 --per-thread ./sort &
> [1] 19097
> Bubble sorting array of 30000 elements
>
> # kill -USR2 19097
> # kill -USR2 19097
> # kill -USR2 19097
> [ perf record: Woken up 4 times to write data ]
> [ perf record: Captured and wrote 0.753 MB perf.data ]
>
> FWIW:
>
> Tested-by: Leo Yan <leo.yan@linaro.org>
Thanks a lot, I've added your "Tester notes:" and also your Tested-by:.
As I don't have hardware (yet) to test these patches, tests by people
who can test on real hardware is always super appreciated.
Any suggestions for a SBC that I could buy to be able to do so?
Regards,
- Arnaldo
> > I also changed the subject to:
> >
> > [PATCH] perf cs-etm: Properly set the value of 'old' and 'head' in snapshot mode
> >
> > So that when looking at a 'git log --oneline' one can have the proper
> > context and know that its about cs-etm.
> >
> > - Arnaldo
> >
> > > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> > > ---
> > > tools/perf/arch/arm/util/cs-etm.c | 127 +++++++++++++++++++++++++++++-
> > > 1 file changed, 123 insertions(+), 4 deletions(-)
> > >
> > > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > > index 911426721170..0a278bbcaba6 100644
> > > --- a/tools/perf/arch/arm/util/cs-etm.c
> > > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > > @@ -31,6 +31,8 @@ struct cs_etm_recording {
> > > struct auxtrace_record itr;
> > > struct perf_pmu *cs_etm_pmu;
> > > struct perf_evlist *evlist;
> > > + int wrapped_cnt;
> > > + bool *wrapped;
> > > bool snapshot_mode;
> > > size_t snapshot_size;
> > > };
> > > @@ -536,16 +538,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> > > return 0;
> > > }
> > >
> > > -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
> > > +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
> > > +{
> > > + bool *wrapped;
> > > + int cnt = ptr->wrapped_cnt;
> > > +
> > > + /* Make @ptr->wrapped as big as @idx */
> > > + while (cnt <= idx)
> > > + cnt++;
> > > +
> > > + /*
> > > + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
> > > + * cross compilation problems where the host's system supports
> > > + * reallocarray() but not the target.
> > > + */
> > > + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
> > > + if (!wrapped)
> > > + return -ENOMEM;
> > > +
> > > + wrapped[cnt - 1] = false;
> > > + ptr->wrapped_cnt = cnt;
> > > + ptr->wrapped = wrapped;
> > > +
> > > + return 0;
> > > +}
> > > +
> > > +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
> > > + size_t buffer_size, u64 head)
> > > +{
> > > + u64 i, watermark;
> > > + u64 *buf = (u64 *)buffer;
> > > + size_t buf_size = buffer_size;
> > > +
> > > + /*
> > > + * We want to look the very last 512 byte (chosen arbitrarily) in
> > > + * the ring buffer.
> > > + */
> > > + watermark = buf_size - 512;
> > > +
> > > + /*
> > > + * @head is continuously increasing - if its value is equal or greater
> > > + * than the size of the ring buffer, it has wrapped around.
> > > + */
> > > + if (head >= buffer_size)
> > > + return true;
> > > +
> > > + /*
> > > + * The value of @head is somewhere within the size of the ring buffer.
> > > + * This can be that there hasn't been enough data to fill the ring
> > > + * buffer yet or the trace time was so long that @head has numerically
> > > + * wrapped around. To find we need to check if we have data at the very
> > > + * end of the ring buffer. We can reliably do this because mmap'ed
> > > + * pages are zeroed out and there is a fresh mapping with every new
> > > + * session.
> > > + */
> > > +
> > > + /* @head is less than 512 byte from the end of the ring buffer */
> > > + if (head > watermark)
> > > + watermark = head;
> > > +
> > > + /*
> > > + * Speed things up by using 64 bit transactions (see "u64 *buf" above)
> > > + */
> > > + watermark >>= 3;
> > > + buf_size >>= 3;
> > > +
> > > + /*
> > > + * If we find trace data at the end of the ring buffer, @head has
> > > + * been there and has numerically wrapped around at least once.
> > > + */
> > > + for (i = watermark; i < buf_size; i++)
> > > + if (buf[i])
> > > + return true;
> > > +
> > > + return false;
> > > +}
> > > +
> > > +static int cs_etm_find_snapshot(struct auxtrace_record *itr,
> > > int idx, struct auxtrace_mmap *mm,
> > > - unsigned char *data __maybe_unused,
> > > + unsigned char *data,
> > > u64 *head, u64 *old)
> > > {
> > > + int err;
> > > + bool wrapped;
> > > + struct cs_etm_recording *ptr =
> > > + container_of(itr, struct cs_etm_recording, itr);
> > > +
> > > + /*
> > > + * Allocate memory to keep track of wrapping if this is the first
> > > + * time we deal with this *mm.
> > > + */
> > > + if (idx >= ptr->wrapped_cnt) {
> > > + err = cs_etm_alloc_wrapped_array(ptr, idx);
> > > + if (err)
> > > + return err;
> > > + }
> > > +
> > > + /*
> > > + * Check to see if *head has wrapped around. If it hasn't only the
> > > + * amount of data between *head and *old is snapshot'ed to avoid
> > > + * bloating the perf.data file with zeros. But as soon as *head has
> > > + * wrapped around the entire size of the AUX ring buffer it taken.
> > > + */
> > > + wrapped = ptr->wrapped[idx];
> > > + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
> > > + wrapped = true;
> > > + ptr->wrapped[idx] = true;
> > > + }
> > > +
> > > pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
> > > __func__, idx, (size_t)*old, (size_t)*head, mm->len);
> > >
> > > - *old = *head;
> > > - *head += mm->len;
> > > + /* No wrap has occurred, we can just use *head and *old. */
> > > + if (!wrapped)
> > > + return 0;
> > > +
> > > + /*
> > > + * *head has wrapped around - adjust *head and *old to pickup the
> > > + * entire content of the AUX buffer.
> > > + */
> > > + if (*head >= mm->len) {
> > > + *old = *head - mm->len;
> > > + } else {
> > > + *head += mm->len;
> > > + *old = *head - mm->len;
> > > + }
> > >
> > > return 0;
> > > }
> > > @@ -586,6 +703,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
> > > {
> > > struct cs_etm_recording *ptr =
> > > container_of(itr, struct cs_etm_recording, itr);
> > > +
> > > + zfree(&ptr->wrapped);
> > > free(ptr);
> > > }
> > >
> > > --
> > > 2.17.1
> >
> > --
> >
> > - Arnaldo
--
- Arnaldo
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
2019-06-07 18:23 ` Arnaldo Carvalho de Melo
@ 2019-06-09 6:51 ` Leo Yan
-1 siblings, 0 replies; 13+ messages in thread
From: Leo Yan @ 2019-06-09 6:51 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Mathieu Poirier, suzuki.poulose, peterz, linux-kernel,
alexander.shishkin, mingo, jolsa, linux-arm-kernel
Hi Arnaldo,
On Fri, Jun 07, 2019 at 03:23:25PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Fri, Jun 07, 2019 at 02:44:25PM +0800, Leo Yan escreveu:
> > On Thu, Jun 06, 2019 at 05:10:56PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> > > > This patch adds the necessay intelligence to properly compute the value
> > > > of 'old' and 'head' when operating in snapshot mode. That way we can get
> > > > the latest information in the AUX buffer and be compatible with the
> > > > generic AUX ring buffer mechanic.
> > >
> > > Leo, have you had the chance to test/review this one? Suzuki?
> >
> > Sure. I applied this patch on the perf/core branch (with latest
> > commit 3e4fbf36c1e3 'perf augmented_raw_syscalls: Move reading
> > filename to the loop') and passed testing with below steps:
> >
> > # perf record -e cs_etm/@tmc_etr0/ -S -m,64 --per-thread ./sort &
> > [1] 19097
> > Bubble sorting array of 30000 elements
> >
> > # kill -USR2 19097
> > # kill -USR2 19097
> > # kill -USR2 19097
> > [ perf record: Woken up 4 times to write data ]
> > [ perf record: Captured and wrote 0.753 MB perf.data ]
> >
> > FWIW:
> >
> > Tested-by: Leo Yan <leo.yan@linaro.org>
>
> Thanks a lot, I've added your "Tester notes:" and also your Tested-by:.
>
> As I don't have hardware (yet) to test these patches, tests by people
> who can test on real hardware is always super appreciated.
You are very welcome and it's my pleasure :)
> Any suggestions for a SBC that I could buy to be able to do so?
Below are several Arm development boards for referrence:
- DB410c [1]: This board is the first choice for myself, since this
board provides Debian (and Fedora :) support and it supports the
mainline kernel pretty well; the CoreSight also is well supported.
This board is about 80 USD so the cost is not expensive; on the
other hand, please note one cons is the SDRAM is only 1GB, this will
be impossible if you build some big projects (e.g. LLVM/Clang and
BCC); but it's sufficient for perf related development and
verification.
- There have other several boards are in my mind:
Raspberry Pi3 [2] and Hikey960 [3].
Raspberry Pi3 misses some features in the mainline kernel [4] and it
has not enabled CoreSight hardware tracing feature; Hikey960 also
have some patches are out of the mainline kenrel.
Except you have special requirement (e.g. you want to use the board to
build LLVM/Clang/BCC with big DDR size, etc), these two boards can be
secondary choices.
Please feel free let me know if you have questions for boards.
Thanks,
Leo Yan
[1] https://www.96boards.org/product/dragonboard410c/
[2] https://www.raspberrypi.org/products/raspberry-pi-3-model-b-plus/
[3] https://www.96boards.org/product/hikey960/
[4] https://www.raspberrypi.org/forums/viewtopic.php?t=236568
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
@ 2019-06-09 6:51 ` Leo Yan
0 siblings, 0 replies; 13+ messages in thread
From: Leo Yan @ 2019-06-09 6:51 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Mathieu Poirier, suzuki.poulose, peterz, mingo,
alexander.shishkin, jolsa, linux-arm-kernel, linux-kernel
Hi Arnaldo,
On Fri, Jun 07, 2019 at 03:23:25PM -0300, Arnaldo Carvalho de Melo wrote:
> Em Fri, Jun 07, 2019 at 02:44:25PM +0800, Leo Yan escreveu:
> > On Thu, Jun 06, 2019 at 05:10:56PM -0300, Arnaldo Carvalho de Melo wrote:
> > > Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> > > > This patch adds the necessay intelligence to properly compute the value
> > > > of 'old' and 'head' when operating in snapshot mode. That way we can get
> > > > the latest information in the AUX buffer and be compatible with the
> > > > generic AUX ring buffer mechanic.
> > >
> > > Leo, have you had the chance to test/review this one? Suzuki?
> >
> > Sure. I applied this patch on the perf/core branch (with latest
> > commit 3e4fbf36c1e3 'perf augmented_raw_syscalls: Move reading
> > filename to the loop') and passed testing with below steps:
> >
> > # perf record -e cs_etm/@tmc_etr0/ -S -m,64 --per-thread ./sort &
> > [1] 19097
> > Bubble sorting array of 30000 elements
> >
> > # kill -USR2 19097
> > # kill -USR2 19097
> > # kill -USR2 19097
> > [ perf record: Woken up 4 times to write data ]
> > [ perf record: Captured and wrote 0.753 MB perf.data ]
> >
> > FWIW:
> >
> > Tested-by: Leo Yan <leo.yan@linaro.org>
>
> Thanks a lot, I've added your "Tester notes:" and also your Tested-by:.
>
> As I don't have hardware (yet) to test these patches, tests by people
> who can test on real hardware is always super appreciated.
You are very welcome and it's my pleasure :)
> Any suggestions for a SBC that I could buy to be able to do so?
Below are several Arm development boards for referrence:
- DB410c [1]: This board is the first choice for myself, since this
board provides Debian (and Fedora :) support and it supports the
mainline kernel pretty well; the CoreSight also is well supported.
This board is about 80 USD so the cost is not expensive; on the
other hand, please note one cons is the SDRAM is only 1GB, this will
be impossible if you build some big projects (e.g. LLVM/Clang and
BCC); but it's sufficient for perf related development and
verification.
- There have other several boards are in my mind:
Raspberry Pi3 [2] and Hikey960 [3].
Raspberry Pi3 misses some features in the mainline kernel [4] and it
has not enabled CoreSight hardware tracing feature; Hikey960 also
have some patches are out of the mainline kenrel.
Except you have special requirement (e.g. you want to use the board to
build LLVM/Clang/BCC with big DDR size, etc), these two boards can be
secondary choices.
Please feel free let me know if you have questions for boards.
Thanks,
Leo Yan
[1] https://www.96boards.org/product/dragonboard410c/
[2] https://www.raspberrypi.org/products/raspberry-pi-3-model-b-plus/
[3] https://www.96boards.org/product/hikey960/
[4] https://www.raspberrypi.org/forums/viewtopic.php?t=236568
^ permalink raw reply [flat|nested] 13+ messages in thread
* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
2019-06-06 20:10 ` Arnaldo Carvalho de Melo
@ 2019-06-07 14:44 ` Mathieu Poirier
-1 siblings, 0 replies; 13+ messages in thread
From: Mathieu Poirier @ 2019-06-07 14:44 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Suzuki K. Poulose, Peter Zijlstra, Linux Kernel Mailing List,
Alexander Shishkin, Ingo Molnar, Leo Yan, Jiri Olsa,
linux-arm-kernel
On Thu, 6 Jun 2019 at 14:11, Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> > This patch adds the necessay intelligence to properly compute the value
> > of 'old' and 'head' when operating in snapshot mode. That way we can get
> > the latest information in the AUX buffer and be compatible with the
> > generic AUX ring buffer mechanic.
>
> Leo, have you had the chance to test/review this one? Suzuki?
Leo did test this before and added his Tested-by on the Coresight
mailing list. I did not carried it here because I changed the call to
reallocarray() to realloc() in order to avoid cross compilation
problems. I think it is safe enough but other people's opinion may
differ so I played it safe. Leo, please test this again if/when you
have the time.
>
> I also changed the subject to:
>
> [PATCH] perf cs-etm: Properly set the value of 'old' and 'head' in snapshot mode
>
> So that when looking at a 'git log --oneline' one can have the proper
> context and know that its about cs-etm.
Very well.
Mathieu
>
> - Arnaldo
>
> > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> > ---
> > tools/perf/arch/arm/util/cs-etm.c | 127 +++++++++++++++++++++++++++++-
> > 1 file changed, 123 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > index 911426721170..0a278bbcaba6 100644
> > --- a/tools/perf/arch/arm/util/cs-etm.c
> > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > @@ -31,6 +31,8 @@ struct cs_etm_recording {
> > struct auxtrace_record itr;
> > struct perf_pmu *cs_etm_pmu;
> > struct perf_evlist *evlist;
> > + int wrapped_cnt;
> > + bool *wrapped;
> > bool snapshot_mode;
> > size_t snapshot_size;
> > };
> > @@ -536,16 +538,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> > return 0;
> > }
> >
> > -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
> > +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
> > +{
> > + bool *wrapped;
> > + int cnt = ptr->wrapped_cnt;
> > +
> > + /* Make @ptr->wrapped as big as @idx */
> > + while (cnt <= idx)
> > + cnt++;
> > +
> > + /*
> > + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
> > + * cross compilation problems where the host's system supports
> > + * reallocarray() but not the target.
> > + */
> > + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
> > + if (!wrapped)
> > + return -ENOMEM;
> > +
> > + wrapped[cnt - 1] = false;
> > + ptr->wrapped_cnt = cnt;
> > + ptr->wrapped = wrapped;
> > +
> > + return 0;
> > +}
> > +
> > +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
> > + size_t buffer_size, u64 head)
> > +{
> > + u64 i, watermark;
> > + u64 *buf = (u64 *)buffer;
> > + size_t buf_size = buffer_size;
> > +
> > + /*
> > + * We want to look the very last 512 byte (chosen arbitrarily) in
> > + * the ring buffer.
> > + */
> > + watermark = buf_size - 512;
> > +
> > + /*
> > + * @head is continuously increasing - if its value is equal or greater
> > + * than the size of the ring buffer, it has wrapped around.
> > + */
> > + if (head >= buffer_size)
> > + return true;
> > +
> > + /*
> > + * The value of @head is somewhere within the size of the ring buffer.
> > + * This can be that there hasn't been enough data to fill the ring
> > + * buffer yet or the trace time was so long that @head has numerically
> > + * wrapped around. To find we need to check if we have data at the very
> > + * end of the ring buffer. We can reliably do this because mmap'ed
> > + * pages are zeroed out and there is a fresh mapping with every new
> > + * session.
> > + */
> > +
> > + /* @head is less than 512 byte from the end of the ring buffer */
> > + if (head > watermark)
> > + watermark = head;
> > +
> > + /*
> > + * Speed things up by using 64 bit transactions (see "u64 *buf" above)
> > + */
> > + watermark >>= 3;
> > + buf_size >>= 3;
> > +
> > + /*
> > + * If we find trace data at the end of the ring buffer, @head has
> > + * been there and has numerically wrapped around at least once.
> > + */
> > + for (i = watermark; i < buf_size; i++)
> > + if (buf[i])
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +static int cs_etm_find_snapshot(struct auxtrace_record *itr,
> > int idx, struct auxtrace_mmap *mm,
> > - unsigned char *data __maybe_unused,
> > + unsigned char *data,
> > u64 *head, u64 *old)
> > {
> > + int err;
> > + bool wrapped;
> > + struct cs_etm_recording *ptr =
> > + container_of(itr, struct cs_etm_recording, itr);
> > +
> > + /*
> > + * Allocate memory to keep track of wrapping if this is the first
> > + * time we deal with this *mm.
> > + */
> > + if (idx >= ptr->wrapped_cnt) {
> > + err = cs_etm_alloc_wrapped_array(ptr, idx);
> > + if (err)
> > + return err;
> > + }
> > +
> > + /*
> > + * Check to see if *head has wrapped around. If it hasn't only the
> > + * amount of data between *head and *old is snapshot'ed to avoid
> > + * bloating the perf.data file with zeros. But as soon as *head has
> > + * wrapped around the entire size of the AUX ring buffer it taken.
> > + */
> > + wrapped = ptr->wrapped[idx];
> > + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
> > + wrapped = true;
> > + ptr->wrapped[idx] = true;
> > + }
> > +
> > pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
> > __func__, idx, (size_t)*old, (size_t)*head, mm->len);
> >
> > - *old = *head;
> > - *head += mm->len;
> > + /* No wrap has occurred, we can just use *head and *old. */
> > + if (!wrapped)
> > + return 0;
> > +
> > + /*
> > + * *head has wrapped around - adjust *head and *old to pickup the
> > + * entire content of the AUX buffer.
> > + */
> > + if (*head >= mm->len) {
> > + *old = *head - mm->len;
> > + } else {
> > + *head += mm->len;
> > + *old = *head - mm->len;
> > + }
> >
> > return 0;
> > }
> > @@ -586,6 +703,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
> > {
> > struct cs_etm_recording *ptr =
> > container_of(itr, struct cs_etm_recording, itr);
> > +
> > + zfree(&ptr->wrapped);
> > free(ptr);
> > }
> >
> > --
> > 2.17.1
>
> --
>
> - Arnaldo
_______________________________________________
linux-arm-kernel mailing list
linux-arm-kernel@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-arm-kernel
^ permalink raw reply [flat|nested] 13+ messages in thread* Re: [PATCH] perf tools: Properly set the value of 'old' and 'head' in snapshot mode
@ 2019-06-07 14:44 ` Mathieu Poirier
0 siblings, 0 replies; 13+ messages in thread
From: Mathieu Poirier @ 2019-06-07 14:44 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: Suzuki K. Poulose, Leo Yan, Peter Zijlstra, Ingo Molnar,
Alexander Shishkin, Jiri Olsa, linux-arm-kernel,
Linux Kernel Mailing List
On Thu, 6 Jun 2019 at 14:11, Arnaldo Carvalho de Melo
<arnaldo.melo@gmail.com> wrote:
>
> Em Wed, Jun 05, 2019 at 10:16:33AM -0600, Mathieu Poirier escreveu:
> > This patch adds the necessay intelligence to properly compute the value
> > of 'old' and 'head' when operating in snapshot mode. That way we can get
> > the latest information in the AUX buffer and be compatible with the
> > generic AUX ring buffer mechanic.
>
> Leo, have you had the chance to test/review this one? Suzuki?
Leo did test this before and added his Tested-by on the Coresight
mailing list. I did not carried it here because I changed the call to
reallocarray() to realloc() in order to avoid cross compilation
problems. I think it is safe enough but other people's opinion may
differ so I played it safe. Leo, please test this again if/when you
have the time.
>
> I also changed the subject to:
>
> [PATCH] perf cs-etm: Properly set the value of 'old' and 'head' in snapshot mode
>
> So that when looking at a 'git log --oneline' one can have the proper
> context and know that its about cs-etm.
Very well.
Mathieu
>
> - Arnaldo
>
> > Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
> > ---
> > tools/perf/arch/arm/util/cs-etm.c | 127 +++++++++++++++++++++++++++++-
> > 1 file changed, 123 insertions(+), 4 deletions(-)
> >
> > diff --git a/tools/perf/arch/arm/util/cs-etm.c b/tools/perf/arch/arm/util/cs-etm.c
> > index 911426721170..0a278bbcaba6 100644
> > --- a/tools/perf/arch/arm/util/cs-etm.c
> > +++ b/tools/perf/arch/arm/util/cs-etm.c
> > @@ -31,6 +31,8 @@ struct cs_etm_recording {
> > struct auxtrace_record itr;
> > struct perf_pmu *cs_etm_pmu;
> > struct perf_evlist *evlist;
> > + int wrapped_cnt;
> > + bool *wrapped;
> > bool snapshot_mode;
> > size_t snapshot_size;
> > };
> > @@ -536,16 +538,131 @@ static int cs_etm_info_fill(struct auxtrace_record *itr,
> > return 0;
> > }
> >
> > -static int cs_etm_find_snapshot(struct auxtrace_record *itr __maybe_unused,
> > +static int cs_etm_alloc_wrapped_array(struct cs_etm_recording *ptr, int idx)
> > +{
> > + bool *wrapped;
> > + int cnt = ptr->wrapped_cnt;
> > +
> > + /* Make @ptr->wrapped as big as @idx */
> > + while (cnt <= idx)
> > + cnt++;
> > +
> > + /*
> > + * Free'ed in cs_etm_recording_free(). Using realloc() to avoid
> > + * cross compilation problems where the host's system supports
> > + * reallocarray() but not the target.
> > + */
> > + wrapped = realloc(ptr->wrapped, cnt * sizeof(bool));
> > + if (!wrapped)
> > + return -ENOMEM;
> > +
> > + wrapped[cnt - 1] = false;
> > + ptr->wrapped_cnt = cnt;
> > + ptr->wrapped = wrapped;
> > +
> > + return 0;
> > +}
> > +
> > +static bool cs_etm_buffer_has_wrapped(unsigned char *buffer,
> > + size_t buffer_size, u64 head)
> > +{
> > + u64 i, watermark;
> > + u64 *buf = (u64 *)buffer;
> > + size_t buf_size = buffer_size;
> > +
> > + /*
> > + * We want to look the very last 512 byte (chosen arbitrarily) in
> > + * the ring buffer.
> > + */
> > + watermark = buf_size - 512;
> > +
> > + /*
> > + * @head is continuously increasing - if its value is equal or greater
> > + * than the size of the ring buffer, it has wrapped around.
> > + */
> > + if (head >= buffer_size)
> > + return true;
> > +
> > + /*
> > + * The value of @head is somewhere within the size of the ring buffer.
> > + * This can be that there hasn't been enough data to fill the ring
> > + * buffer yet or the trace time was so long that @head has numerically
> > + * wrapped around. To find we need to check if we have data at the very
> > + * end of the ring buffer. We can reliably do this because mmap'ed
> > + * pages are zeroed out and there is a fresh mapping with every new
> > + * session.
> > + */
> > +
> > + /* @head is less than 512 byte from the end of the ring buffer */
> > + if (head > watermark)
> > + watermark = head;
> > +
> > + /*
> > + * Speed things up by using 64 bit transactions (see "u64 *buf" above)
> > + */
> > + watermark >>= 3;
> > + buf_size >>= 3;
> > +
> > + /*
> > + * If we find trace data at the end of the ring buffer, @head has
> > + * been there and has numerically wrapped around at least once.
> > + */
> > + for (i = watermark; i < buf_size; i++)
> > + if (buf[i])
> > + return true;
> > +
> > + return false;
> > +}
> > +
> > +static int cs_etm_find_snapshot(struct auxtrace_record *itr,
> > int idx, struct auxtrace_mmap *mm,
> > - unsigned char *data __maybe_unused,
> > + unsigned char *data,
> > u64 *head, u64 *old)
> > {
> > + int err;
> > + bool wrapped;
> > + struct cs_etm_recording *ptr =
> > + container_of(itr, struct cs_etm_recording, itr);
> > +
> > + /*
> > + * Allocate memory to keep track of wrapping if this is the first
> > + * time we deal with this *mm.
> > + */
> > + if (idx >= ptr->wrapped_cnt) {
> > + err = cs_etm_alloc_wrapped_array(ptr, idx);
> > + if (err)
> > + return err;
> > + }
> > +
> > + /*
> > + * Check to see if *head has wrapped around. If it hasn't only the
> > + * amount of data between *head and *old is snapshot'ed to avoid
> > + * bloating the perf.data file with zeros. But as soon as *head has
> > + * wrapped around the entire size of the AUX ring buffer it taken.
> > + */
> > + wrapped = ptr->wrapped[idx];
> > + if (!wrapped && cs_etm_buffer_has_wrapped(data, mm->len, *head)) {
> > + wrapped = true;
> > + ptr->wrapped[idx] = true;
> > + }
> > +
> > pr_debug3("%s: mmap index %d old head %zu new head %zu size %zu\n",
> > __func__, idx, (size_t)*old, (size_t)*head, mm->len);
> >
> > - *old = *head;
> > - *head += mm->len;
> > + /* No wrap has occurred, we can just use *head and *old. */
> > + if (!wrapped)
> > + return 0;
> > +
> > + /*
> > + * *head has wrapped around - adjust *head and *old to pickup the
> > + * entire content of the AUX buffer.
> > + */
> > + if (*head >= mm->len) {
> > + *old = *head - mm->len;
> > + } else {
> > + *head += mm->len;
> > + *old = *head - mm->len;
> > + }
> >
> > return 0;
> > }
> > @@ -586,6 +703,8 @@ static void cs_etm_recording_free(struct auxtrace_record *itr)
> > {
> > struct cs_etm_recording *ptr =
> > container_of(itr, struct cs_etm_recording, itr);
> > +
> > + zfree(&ptr->wrapped);
> > free(ptr);
> > }
> >
> > --
> > 2.17.1
>
> --
>
> - Arnaldo
^ permalink raw reply [flat|nested] 13+ messages in thread