* [RFC PATCH] perf: workaround unaligned NEON vector load
@ 2019-03-06 14:01 Lucas Stach
2019-03-06 20:14 ` Arnaldo Carvalho de Melo
0 siblings, 1 reply; 2+ messages in thread
From: Lucas Stach @ 2019-03-06 14:01 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Alexander Shishkin, Jiri Olsa, Namhyung Kim
Cc: linux-kernel, kernel, patchwork-lst
The mmap event buffer may end up in a location that violates the
alignment requirements for a NEON vector load, which GCC generates to
load consecutive values from the event structure. Fix this by copying
the event structure into a properly aligned buffer.
Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
---
tools/perf/util/machine.c | 29 ++++++++++++++++-------------
1 file changed, 16 insertions(+), 13 deletions(-)
diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 143f7057d581..ab5500e85173 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1565,37 +1565,40 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
}
int machine__process_mmap2_event(struct machine *machine,
- union perf_event *event,
+ union perf_event *event_in,
struct perf_sample *sample)
{
+ union perf_event event;
struct thread *thread;
struct map *map;
int ret = 0;
+ memcpy(&event, event_in, sizeof(union perf_event));
+
if (dump_trace)
- perf_event__fprintf_mmap2(event, stdout);
+ perf_event__fprintf_mmap2(&event, stdout);
if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
sample->cpumode == PERF_RECORD_MISC_KERNEL) {
- ret = machine__process_kernel_mmap_event(machine, event);
+ ret = machine__process_kernel_mmap_event(machine, &event);
if (ret < 0)
goto out_problem;
return 0;
}
- thread = machine__findnew_thread(machine, event->mmap2.pid,
- event->mmap2.tid);
+ thread = machine__findnew_thread(machine, event.mmap2.pid,
+ event.mmap2.tid);
if (thread == NULL)
goto out_problem;
- map = map__new(machine, event->mmap2.start,
- event->mmap2.len, event->mmap2.pgoff,
- event->mmap2.maj,
- event->mmap2.min, event->mmap2.ino,
- event->mmap2.ino_generation,
- event->mmap2.prot,
- event->mmap2.flags,
- event->mmap2.filename, thread);
+ map = map__new(machine, event.mmap2.start,
+ event.mmap2.len, event.mmap2.pgoff,
+ event.mmap2.maj,
+ event.mmap2.min, event.mmap2.ino,
+ event.mmap2.ino_generation,
+ event.mmap2.prot,
+ event.mmap2.flags,
+ event.mmap2.filename, thread);
if (map == NULL)
goto out_problem_map;
--
2.20.1
^ permalink raw reply related [flat|nested] 2+ messages in thread* Re: [RFC PATCH] perf: workaround unaligned NEON vector load
2019-03-06 14:01 [RFC PATCH] perf: workaround unaligned NEON vector load Lucas Stach
@ 2019-03-06 20:14 ` Arnaldo Carvalho de Melo
0 siblings, 0 replies; 2+ messages in thread
From: Arnaldo Carvalho de Melo @ 2019-03-06 20:14 UTC (permalink / raw)
To: Lucas Stach
Cc: Peter Zijlstra, Ingo Molnar, Alexander Shishkin, Jiri Olsa,
Namhyung Kim, linux-kernel, kernel, patchwork-lst
Em Wed, Mar 06, 2019 at 03:01:16PM +0100, Lucas Stach escreveu:
> The mmap event buffer may end up in a location that violates the
> alignment requirements for a NEON vector load,
which are?
> which GCC generates to load consecutive values from the event
> structure. Fix this by copying the event structure into a properly
> aligned buffer.
At a minimum this would be done only for such arch (is that an arch?),
so that the rest of the world doesn't have to eat this extra cost?
What is it that perf_event_mmap_event() is doing to
mmap_event->event_id.header.size that this NEON vector load dislikes?
- Arnaldo
> Signed-off-by: Lucas Stach <l.stach@pengutronix.de>
> ---
> tools/perf/util/machine.c | 29 ++++++++++++++++-------------
> 1 file changed, 16 insertions(+), 13 deletions(-)
>
> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
> index 143f7057d581..ab5500e85173 100644
> --- a/tools/perf/util/machine.c
> +++ b/tools/perf/util/machine.c
> @@ -1565,37 +1565,40 @@ static int machine__process_kernel_mmap_event(struct machine *machine,
> }
>
> int machine__process_mmap2_event(struct machine *machine,
> - union perf_event *event,
> + union perf_event *event_in,
> struct perf_sample *sample)
> {
> + union perf_event event;
> struct thread *thread;
> struct map *map;
> int ret = 0;
>
> + memcpy(&event, event_in, sizeof(union perf_event));
> +
> if (dump_trace)
> - perf_event__fprintf_mmap2(event, stdout);
> + perf_event__fprintf_mmap2(&event, stdout);
>
> if (sample->cpumode == PERF_RECORD_MISC_GUEST_KERNEL ||
> sample->cpumode == PERF_RECORD_MISC_KERNEL) {
> - ret = machine__process_kernel_mmap_event(machine, event);
> + ret = machine__process_kernel_mmap_event(machine, &event);
> if (ret < 0)
> goto out_problem;
> return 0;
> }
>
> - thread = machine__findnew_thread(machine, event->mmap2.pid,
> - event->mmap2.tid);
> + thread = machine__findnew_thread(machine, event.mmap2.pid,
> + event.mmap2.tid);
> if (thread == NULL)
> goto out_problem;
>
> - map = map__new(machine, event->mmap2.start,
> - event->mmap2.len, event->mmap2.pgoff,
> - event->mmap2.maj,
> - event->mmap2.min, event->mmap2.ino,
> - event->mmap2.ino_generation,
> - event->mmap2.prot,
> - event->mmap2.flags,
> - event->mmap2.filename, thread);
> + map = map__new(machine, event.mmap2.start,
> + event.mmap2.len, event.mmap2.pgoff,
> + event.mmap2.maj,
> + event.mmap2.min, event.mmap2.ino,
> + event.mmap2.ino_generation,
> + event.mmap2.prot,
> + event.mmap2.flags,
> + event.mmap2.filename, thread);
>
> if (map == NULL)
> goto out_problem_map;
> --
> 2.20.1
--
- Arnaldo
^ permalink raw reply [flat|nested] 2+ messages in thread
end of thread, other threads:[~2019-03-06 20:14 UTC | newest]
Thread overview: 2+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-03-06 14:01 [RFC PATCH] perf: workaround unaligned NEON vector load Lucas Stach
2019-03-06 20:14 ` Arnaldo Carvalho de Melo
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox