* [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses
@ 2026-04-24 22:05 Ian Rogers
2026-04-24 22:05 ` [PATCH v1 2/2] perf test: Add inject ASLR test Ian Rogers
` (2 more replies)
0 siblings, 3 replies; 183+ messages in thread
From: Ian Rogers @ 2026-04-24 22:05 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo,
Namhyung Kim, Jiri Olsa, Ian Rogers, Adrian Hunter, James Clark,
Gabriel Marin, linux-kernel, linux-perf-users
If perf.data files are taken from one machine to another they make
leak virtual addresses and so weaken ASLR on the machine they are
coming from. Add a '--aslr' option for perf inject that remaps all
virtual addresses, or drops data/events, so that the virtual address
information isn't leaked.
When events are not known/handled by the tool they are dropped. This
makes the tool conservative and it should never leak ASLR information,
but it means virtual address remapping is needed for cases like
auxtrace.
Signed-off-by: Ian Rogers <irogers@google.com>
Co-developed-by: Gabriel Marin <gmx@google.com>
Signed-off-by: Gabriel Marin <gmx@google.com>
---
tools/perf/builtin-inject.c | 11 +-
tools/perf/util/Build | 1 +
tools/perf/util/aslr.c | 752 ++++++++++++++++++++++++++++++++++++
tools/perf/util/aslr.h | 10 +
4 files changed, 773 insertions(+), 1 deletion(-)
create mode 100644 tools/perf/util/aslr.c
create mode 100644 tools/perf/util/aslr.h
diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c
index f174bc69cec4..fa3a71e23f1b 100644
--- a/tools/perf/builtin-inject.c
+++ b/tools/perf/builtin-inject.c
@@ -8,6 +8,7 @@
*/
#include "builtin.h"
+#include "util/aslr.h"
#include "util/color.h"
#include "util/dso.h"
#include "util/vdso.h"
@@ -123,6 +124,7 @@ struct perf_inject {
bool in_place_update_dry_run;
bool copy_kcore_dir;
bool convert_callchain;
+ bool aslr;
const char *input_name;
struct perf_data output;
u64 bytes_written;
@@ -2564,6 +2566,8 @@ int cmd_inject(int argc, const char **argv)
" instance has a subdir"),
OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain,
"Generate callchains using DWARF and drop register/stack data"),
+ OPT_BOOLEAN(0, "aslr", &inject.aslr,
+ "Remap virtual memory addresses similar to ASLR"),
OPT_END()
};
const char * const inject_usage[] = {
@@ -2571,6 +2575,7 @@ int cmd_inject(int argc, const char **argv)
NULL
};
bool ordered_events;
+ struct perf_tool *tool = &inject.tool;
if (!inject.itrace_synth_opts.set) {
/* Disable eager loading of kernel symbols that adds overhead to perf inject. */
@@ -2684,7 +2689,9 @@ int cmd_inject(int argc, const char **argv)
inject.tool.schedstat_domain = perf_event__repipe_op2_synth;
inject.tool.dont_split_sample_group = true;
inject.tool.merge_deferred_callchains = false;
- inject.session = __perf_session__new(&data, &inject.tool,
+ if (inject.aslr)
+ tool = aslr_tool__new(&inject.tool);
+ inject.session = __perf_session__new(&data, tool,
/*trace_event_repipe=*/inject.output.is_pipe,
/*host_env=*/NULL);
@@ -2789,6 +2796,8 @@ int cmd_inject(int argc, const char **argv)
strlist__delete(inject.known_build_ids);
zstd_fini(&(inject.session->zstd_data));
perf_session__delete(inject.session);
+ if (inject.aslr)
+ aslr_tool__delete(tool);
out_close_output:
if (!inject.in_place_update)
perf_data__close(&inject.output);
diff --git a/tools/perf/util/Build b/tools/perf/util/Build
index 70cc91d00804..65b96f3b87e2 100644
--- a/tools/perf/util/Build
+++ b/tools/perf/util/Build
@@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o
perf-util-y += addr2line.o
perf-util-y += addr_location.o
perf-util-y += annotate.o
+perf-util-y += aslr.o
perf-util-y += blake2s.o
perf-util-y += block-info.o
perf-util-y += block-range.o
diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c
new file mode 100644
index 000000000000..23ef7b68896c
--- /dev/null
+++ b/tools/perf/util/aslr.c
@@ -0,0 +1,752 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "aslr.h"
+
+#include "addr_location.h"
+#include "debug.h"
+#include "event.h"
+#include "evsel.h"
+#include "machine.h"
+#include "map.h"
+#include "thread.h"
+#include "tool.h"
+
+#include <internal/lib.h> // page_size
+#include <linux/compiler.h>
+#include <errno.h>
+#include <inttypes.h>
+
+struct remap_addresses_key {
+ u64 start_addr;
+ pid_t pid;
+};
+
+struct aslr_tool {
+ /** @tool: The tool implemented here and a pointer to a delegate to process the data. */
+ struct delegate_tool tool;
+ /** @machine: The machine with the input, not remapped, virtual address layout. */
+ struct machine machine;
+ /** @event_copy: Buffer used to create an event to pass to the delegate. */
+ char event_copy[PERF_SAMPLE_MAX_SIZE];
+ /** @remap_addresses: mapping from remap_addresses_key to remapped address. */
+ struct hashmap remap_addresses;
+ /** @top_addresses: mapping from process to max remapped address. */
+ struct hashmap top_addresses;
+ /** @first_kernel_mapping: flag indicating if we are still to process any kernel mapping. */
+ bool first_kernel_mapping;
+};
+
+static const pid_t kernel_pid = -1;
+
+/* Start remapping user processes from a small non-zero offset. */
+static const u64 user_space_start = 0x200000;
+static const u64 kernel_space_start = 0xffff800010000000;
+
+static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused)
+{
+ struct remap_addresses_key *key = (struct remap_addresses_key *)_key;
+
+ return key->start_addr ^ (key->start_addr >> 12) ^ key->pid;
+}
+
+static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused)
+{
+ struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1;
+ struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2;
+
+ return key1->pid == key2->pid && key1->start_addr == key2->start_addr;
+}
+
+static size_t top_addresses__hash(long key, void *ctx __maybe_unused)
+{
+ return key;
+}
+
+static bool top_addresses__equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return key1 == key2;
+}
+
+static u64 round_up_to_page_size(u64 addr)
+{
+ return (addr + page_size - 1) & ~((u64)page_size - 1);
+}
+
+static u64 aslr_tool__remap_address(struct aslr_tool *aslr,
+ struct thread *aslr_thread,
+ u8 cpumode,
+ u64 addr)
+{
+ struct addr_location al;
+ struct remap_addresses_key key;
+ u64 remap_addr = 0;
+ u8 effective_cpumode = cpumode;
+
+ if (!aslr_thread)
+ return 0; // No thread.
+
+ addr_location__init(&al);
+ if (!thread__find_map(aslr_thread, cpumode, addr, &al)) {
+ /*
+ * If lookup fails with specified cpumode, try fallback to the other space
+ * to be robust against bad cpumode in samples.
+ */
+ effective_cpumode = (cpumode == PERF_RECORD_MISC_KERNEL) ?
+ PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL;
+ if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) {
+ pr_debug("Cannot find mmap for address %lx in either space, pid=%d\n",
+ addr, aslr_thread->pid_);
+ addr_location__exit(&al);
+ return 0; // No mmap.
+ }
+ }
+
+ key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_;
+ key.start_addr = map__start(al.map);
+ if (!hashmap__find(&aslr->remap_addresses, &key, &remap_addr)) {
+ pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n",
+ addr, map__start(al.map), map__size(al.map), key.pid);
+ addr_location__exit(&al);
+ return 0;
+ }
+ remap_addr += addr - map__start(al.map);
+ addr_location__exit(&al);
+ return remap_addr;
+}
+
+static u64 aslr_tool__remap_mapping(struct aslr_tool *aslr,
+ struct thread *aslr_thread,
+ u8 cpumode,
+ u64 start, u64 len)
+{
+ struct addr_location prev_al;
+ struct remap_addresses_key key;
+ u64 remap_addr = 0;
+ /* If mapping is contiguous to the previous process mapping. */
+ bool is_contiguous = false;
+ bool first_mapping = false; // first process mapping.
+
+ if (!aslr_thread)
+ return 0; // No thread.
+
+ addr_location__init(&prev_al);
+ if (thread__find_map(aslr_thread, cpumode, start-1, &prev_al)) {
+ if (map__start(prev_al.map) + map__size(prev_al.map) == start) {
+ is_contiguous = true;
+ } else {
+ pr_debug("Previous mmap [%lx, %lx] overlaps current map [%lx, %lx]\n",
+ map__start(prev_al.map),
+ map__start(prev_al.map) + map__size(prev_al.map),
+ start, start+len);
+ }
+ }
+ addr_location__exit(&prev_al);
+
+ key.pid = cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_;
+ key.start_addr = start;
+ if (hashmap__find(&aslr->remap_addresses, &key, &remap_addr))
+ return remap_addr;
+
+ if (!hashmap__find(&aslr->top_addresses, key.pid, &remap_addr)) {
+ /* First mapping in this process. Don't add a page gap. */
+ first_mapping = true;
+ remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL ?
+ kernel_space_start : user_space_start);
+ }
+
+ remap_addr = round_up_to_page_size(remap_addr);
+ if (!is_contiguous && !first_mapping)
+ remap_addr += page_size;
+
+ {
+ struct remap_addresses_key *new_key = malloc(sizeof(*new_key));
+
+ if (!new_key)
+ return 0;
+ *new_key = key;
+ if (hashmap__add(&aslr->remap_addresses, new_key, remap_addr) != 0) {
+ pr_debug("Failed to add remap_addresses entry for pid=%d, mapping start=%lx, remapped start=%lx",
+ key.pid, start, remap_addr);
+ free(new_key);
+ return 0;
+ }
+ }
+
+ hashmap__insert(&aslr->top_addresses, key.pid, remap_addr+len,
+ first_mapping ? HASHMAP_ADD : HASHMAP_UPDATE, NULL, NULL);
+ return remap_addr;
+}
+
+static u64 aslr_tool__remap_ksymbol(struct aslr_tool *aslr,
+ struct thread *aslr_thread,
+ u64 addr, u32 len)
+{
+ struct remap_addresses_key key;
+ u64 remap_addr = 0;
+ bool first_mapping = false;
+
+ if (!aslr_thread)
+ return 0; // No thread.
+
+ key.pid = aslr_thread->pid_;
+ key.start_addr = addr;
+ if (hashmap__find(&aslr->remap_addresses, &key, &remap_addr))
+ return remap_addr;
+
+ first_mapping = !hashmap__find(&aslr->top_addresses, key.pid, &remap_addr);
+ if (first_mapping)
+ remap_addr = kernel_space_start;
+ remap_addr = round_up_to_page_size(remap_addr) + page_size;
+
+ {
+ struct remap_addresses_key *new_key = malloc(sizeof(*new_key));
+
+ if (!new_key)
+ return 0;
+ *new_key = key;
+ if (hashmap__add(&aslr->remap_addresses, new_key, remap_addr) < 0) {
+ pr_debug("Failed to add remap_addresses entry for pid=%d, ksymbol=%lx, remapped address=%lx",
+ key.pid, addr, remap_addr);
+ free(new_key);
+ return 0;
+ }
+ }
+
+ hashmap__insert(&aslr->top_addresses, key.pid, remap_addr+len,
+ first_mapping ? HASHMAP_ADD : HASHMAP_UPDATE, NULL, NULL);
+ return remap_addr;
+}
+
+
+static int aslr_tool__process_mmap(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ union perf_event *new_event = (union perf_event *)aslr->event_copy;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct thread *thread;
+ int err;
+
+ /* Create the thread, map, etc. in the ASLR before virtual address space. */
+ err = perf_event__process_mmap(tool, event, sample, &aslr->machine);
+ if (err)
+ return err;
+
+ thread = machine__findnew_thread(&aslr->machine, event->mmap.pid, event->mmap.tid);
+ memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size);
+ /* Remaps the mmap.start. */
+ new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode,
+ event->mmap.start, event->mmap.len);
+ if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) {
+ /* If this is the first kernel image, we need to adjust the pgoff by a
+ * similar delta.
+ */
+ new_event->mmap.pgoff = event->mmap.pgoff - event->mmap.start +
+ new_event->mmap.start;
+ aslr->first_kernel_mapping = false;
+ }
+ return delegate->mmap(delegate, new_event, sample, machine);
+}
+
+static int aslr_tool__process_mmap2(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ union perf_event *new_event = (union perf_event *)aslr->event_copy;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct thread *thread;
+ int err;
+
+ /* Create the thread, map, etc. in the ASLR before virtual address space. */
+ err = perf_event__process_mmap2(tool, event, sample, &aslr->machine);
+ if (err)
+ return err;
+
+ thread = machine__findnew_thread(&aslr->machine, event->mmap2.pid, event->mmap2.tid);
+ memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size);
+ /* Remaps the mmap.start. */
+ new_event->mmap2.start = aslr_tool__remap_mapping(aslr, thread, cpumode,
+ event->mmap2.start, event->mmap2.len);
+ if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) {
+ /* If this is the first kernel image, we need to adjust the pgoff by a
+ * similar delta.
+ */
+ new_event->mmap2.pgoff = event->mmap2.pgoff - event->mmap2.start +
+ new_event->mmap2.start;
+ aslr->first_kernel_mapping = false;
+ }
+ return delegate->mmap2(delegate, new_event, sample, machine);
+}
+
+static int aslr_tool__process_comm(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ int err;
+
+ /* Create the thread, map, etc. in the ASLR before virtual address space. */
+ err = perf_event__process_comm(tool, event, sample, &aslr->machine);
+ if (err)
+ return err;
+
+ return delegate->comm(delegate, event, sample, machine);
+}
+
+static int aslr_tool__process_fork(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ int err;
+
+ /* Create the thread, map, etc. in the ASLR before virtual address space. */
+ err = perf_event__process_fork(tool, event, sample, &aslr->machine);
+ if (err)
+ return err;
+
+ return delegate->fork(delegate, event, sample, machine);
+}
+
+static int aslr_tool__process_exit(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ int err;
+
+ /* Create the thread, map, etc. in the ASLR before virtual address space. */
+ err = perf_event__process_exit(tool, event, sample, &aslr->machine);
+ if (err)
+ return err;
+
+ return delegate->exit(delegate, event, sample, machine);
+}
+
+static int aslr_tool__process_text_poke(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ union perf_event *new_event = (union perf_event *)aslr->event_copy;
+ u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
+ struct thread *thread;
+
+ thread = machine__findnew_thread(&aslr->machine, sample->pid, sample->tid);
+ memcpy(&new_event->text_poke, &event->text_poke, event->text_poke.header.size);
+ new_event->text_poke.addr = aslr_tool__remap_address(aslr, thread, cpumode,
+ event->text_poke.addr);
+
+ return delegate->text_poke(delegate, new_event, sample, machine);
+}
+
+static int aslr_tool__process_ksymbol(const struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_sample *sample,
+ struct machine *machine)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ union perf_event *new_event = (union perf_event *)aslr->event_copy;
+ struct thread *thread;
+ int err;
+
+ /* Create the thread, map, etc. in the ASLR before virtual address space. */
+ err = perf_event__process_ksymbol(tool, event, sample, &aslr->machine);
+ if (err)
+ return err;
+
+ thread = machine__findnew_thread(&aslr->machine, kernel_pid, 0);
+ memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size);
+ /* Remaps the ksymbol.start */
+ new_event->ksymbol.addr = aslr_tool__remap_ksymbol(aslr, thread,
+ event->ksymbol.addr, event->ksymbol.len);
+
+ return delegate->ksymbol(delegate, new_event, sample, machine);
+}
+
+static inline int copy_u64(__u64 *in_array, __u64 *out_array,
+ size_t *i, size_t *j, const __u64 max_i)
+{
+ if (*i > max_i)
+ return -EFAULT;
+ out_array[(*j)++] = in_array[(*i)++];
+ return 0;
+}
+
+static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event,
+ struct perf_sample *sample,
+ struct evsel *evsel, struct machine *machine)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ int ret;
+ u64 sample_type = evsel->core.attr.sample_type;
+ struct thread *thread = machine__findnew_thread(&aslr->machine, sample->pid, sample->tid);
+ const __u64 max_i = event->header.size / sizeof(__u64);
+ union perf_event *new_event = (union perf_event *)aslr->event_copy;
+ struct perf_sample new_sample;
+ __u64 *in_array, *out_array;
+ u8 cpumode = sample->cpumode;
+ u64 addr;
+ size_t i = 0, j = 0;
+
+ new_event->sample.header = event->sample.header;
+
+ in_array = &event->sample.array[0];
+ out_array = &new_event->sample.array[0];
+
+
+
+ if (sample_type & PERF_SAMPLE_IDENTIFIER)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // id
+ if (sample_type & PERF_SAMPLE_IP) {
+ i++;
+ out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode,
+ sample->ip);
+ }
+ if (sample_type & PERF_SAMPLE_TID)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // pid, tid
+ if (sample_type & PERF_SAMPLE_TIME)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // time
+ if (sample_type & PERF_SAMPLE_ADDR) {
+ i++; // addr
+ out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode,
+ sample->addr);
+ }
+ if (sample_type & PERF_SAMPLE_ID)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // id
+ if (sample_type & PERF_SAMPLE_STREAM_ID)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // stream_id
+ if (sample_type & PERF_SAMPLE_CPU)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // cpu, res
+ if (sample_type & PERF_SAMPLE_PERIOD)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // period
+ if (sample_type & PERF_SAMPLE_READ) {
+ if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) {
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // value
+ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // time_enabled
+ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // time_running
+ if (evsel->core.attr.read_format & PERF_FORMAT_ID)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // id
+ if (evsel->core.attr.read_format & PERF_FORMAT_LOST)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // lost
+ } else {
+ u64 nr;
+
+ if (i > max_i)
+ return -EFAULT;
+ nr = out_array[j++] = in_array[i++];
+ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // time_enabled
+ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // time_running
+ for (u64 cntr = 0; cntr < nr; cntr++) {
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // value
+ if (evsel->core.attr.read_format & PERF_FORMAT_ID)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // id
+ if (evsel->core.attr.read_format & PERF_FORMAT_LOST)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // lost
+ }
+ }
+ }
+ if (sample_type & PERF_SAMPLE_CALLCHAIN) {
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // nr
+
+ for (u64 cntr = 0; cntr < sample->callchain->nr; cntr++) {
+ if (i > max_i)
+ return -EFAULT;
+ i++;
+ addr = sample->callchain->ips[cntr];
+ if (addr >= PERF_CONTEXT_MAX) {
+ // Copy context values as is.
+ out_array[j++] = addr;
+ switch (addr) {
+ case PERF_CONTEXT_HV:
+ cpumode = PERF_RECORD_MISC_HYPERVISOR;
+ break;
+ case PERF_CONTEXT_KERNEL:
+ cpumode = PERF_RECORD_MISC_KERNEL;
+ break;
+ case PERF_CONTEXT_USER:
+ cpumode = PERF_RECORD_MISC_USER;
+ break;
+ default:
+ pr_debug("invalid callchain context: %"PRIx64"\n", addr);
+ /*
+ * It seems the callchain is corrupted.
+ * Discard sample.
+ */
+ return 0;
+ }
+ continue;
+ }
+ out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode,
+ sample->callchain->ips[cntr]);
+ }
+ }
+ if (sample_type & PERF_SAMPLE_RAW) {
+ size_t bytes = sizeof(u32) + sample->raw_size;
+
+ if ((i + (bytes / sizeof(u64))) > max_i)
+ return -EFAULT;
+ memcpy(&out_array[j], &in_array[i], bytes);
+ i += bytes / sizeof(u64);
+ j += bytes / sizeof(u64);
+ // TODO: certain raw samples can be remapped, such as
+ // tracepoints by examining their fields.
+ pr_debug("Dropping raw samples as possible ASLR leak\n");
+ return 0;
+ }
+ if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // nr
+ if (sample_type & PERF_SAMPLE_BRANCH_HW_INDEX)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // hw_idx
+ if (i + (sample->branch_stack->nr * 3) > max_i)
+ return -EFAULT;
+ for (u64 cntr = 0; cntr < sample->branch_stack->nr; cntr++) {
+ out_array[j++] = aslr_tool__remap_address(aslr, thread, sample->cpumode,
+ in_array[i++]); // from
+ out_array[j++] = aslr_tool__remap_address(aslr, thread, sample->cpumode,
+ in_array[i++]); // to
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // flags
+ }
+ if (sample_type & PERF_SAMPLE_BRANCH_COUNTERS) {
+ if (i + sample->branch_stack->nr > max_i)
+ return -EFAULT;
+ memcpy(&out_array[j], &in_array[i], sample->branch_stack->nr * sizeof(u64));
+ i += sample->branch_stack->nr;
+ j += sample->branch_stack->nr;
+ // TODO: confirm branch counters don't leak ASLR information.
+ pr_debug("Dropping sample branch counters as possible ASLR leak\n");
+ return 0;
+ }
+ }
+ if (sample_type & PERF_SAMPLE_REGS_USER) {
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // abi
+ if (sample->user_regs->abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ u64 nr = hweight64(evsel->core.attr.sample_regs_user);
+
+ if (i + nr > max_i)
+ return -EFAULT;
+ memcpy(&out_array[j], &in_array[i], nr * sizeof(u64));
+ i += nr;
+ j += nr;
+ }
+ // TODO: can this be less conservative?
+ pr_debug("Dropping regs user sample as possible ASLR leak\n");
+ return 0;
+ }
+ if (sample_type & PERF_SAMPLE_STACK_USER) {
+ u64 size;
+
+ if (i > max_i)
+ return -EFAULT;
+ size = out_array[j++] = in_array[i++];
+ if (size > 0) {
+ memcpy(&out_array[j], &in_array[i], size);
+ i += size / sizeof(u64);
+ j += size / sizeof(u64);
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // dyn_size
+ }
+ // TODO: can this be less conservative?
+ pr_debug("Dropping stack user sample as possible ASLR leak\n");
+ return 0;
+ }
+ if (sample_type & PERF_SAMPLE_WEIGHT_TYPE)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // perf_sample_weight
+ if (sample_type & PERF_SAMPLE_DATA_SRC)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // data_src
+ if (sample_type & PERF_SAMPLE_TRANSACTION)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // transaction
+ if (sample_type & PERF_SAMPLE_REGS_INTR) {
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // abi
+ if (sample->intr_regs->abi != PERF_SAMPLE_REGS_ABI_NONE) {
+ u64 nr = hweight64(evsel->core.attr.sample_regs_intr);
+
+ if (i + nr > max_i)
+ return -EFAULT;
+ memcpy(&out_array[j], &in_array[i], nr * sizeof(u64));
+ i += nr;
+ j += nr;
+ }
+ // TODO: can this be less conservative?
+ pr_debug("Dropping interrupt register sample as possible ASLR leak\n");
+ return 0;
+ }
+ if (sample_type & PERF_SAMPLE_PHYS_ADDR) {
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // phys_addr
+ // TODO: can this be less conservative?
+ pr_debug("Dropping physical address sample as possible ASLR leak\n");
+ return 0;
+ }
+ if (sample_type & PERF_SAMPLE_CGROUP)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // cgroup
+ if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // data_page_size
+ if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE)
+ if (copy_u64(in_array, out_array, &i, &j, max_i))
+ return -EFAULT; // code_page_size
+
+ if (sample_type & PERF_SAMPLE_AUX) {
+ u64 size;
+
+ if (i > max_i)
+ return -EFAULT;
+ size = out_array[j++] = in_array[i++];
+ if (i + (size / sizeof(u64)) > max_i)
+ return -EFAULT;
+ memcpy(&out_array[j], &in_array[i], size);
+ i += size / sizeof(u64);
+ j += size / sizeof(u64);
+ // TODO: can this be less conservative?
+ pr_debug("Dropping aux sample as possible ASLR leak\n");
+ return 0;
+ }
+
+ if (evsel__is_offcpu_event(evsel)) {
+ // TODO: can this be less conservative?
+ pr_debug("Dropping off-CPU sample as possible ASLR leak\n");
+ return 0;
+ }
+
+ perf_sample__init(&new_sample, /*all=*/ true);
+ ret = evsel__parse_sample(evsel, new_event, &new_sample);
+ if (ret)
+ return ret;
+
+ ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine);
+ perf_sample__exit(&new_sample);
+ return ret;
+}
+
+static int aslr_tool__process_attr(const struct perf_tool *tool,
+ union perf_event *event,
+ struct evlist **pevlist)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct perf_tool *delegate = aslr->tool.delegate;
+ union perf_event *new_event = (union perf_event *)aslr->event_copy;
+
+ memcpy(&new_event->attr, &event->attr, event->attr.header.size);
+ new_event->attr.attr.bp_addr = 0; // Conservatively remove addresses.
+ new_event->attr.attr.kprobe_addr = 0; // Conservatively remove addresses.
+
+ return delegate->attr(delegate, new_event, pevlist);
+}
+
+static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate)
+{
+ delegate_tool__init(&aslr->tool, delegate);
+ aslr->tool.tool.ordered_events = true;
+
+ machine__init(&aslr->machine, "", HOST_KERNEL_ID);
+
+ hashmap__init(&aslr->remap_addresses,
+ remap_addresses__hash, remap_addresses__equal,
+ /*ctx=*/NULL);
+ hashmap__init(&aslr->top_addresses,
+ top_addresses__hash, top_addresses__equal,
+ /*ctx=*/NULL);
+ aslr->first_kernel_mapping = true;
+
+ aslr->tool.tool.sample = aslr_tool__process_sample;
+ // read - reads a counter, okay to delegate.
+ aslr->tool.tool.mmap = aslr_tool__process_mmap;
+ aslr->tool.tool.mmap2 = aslr_tool__process_mmap2;
+ aslr->tool.tool.comm = aslr_tool__process_comm;
+ aslr->tool.tool.fork = aslr_tool__process_fork;
+ aslr->tool.tool.exit = aslr_tool__process_exit;
+ // namesspaces, cgroup, lost, lost_sample, aux,
+ // itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle
+ // - no virtual addresses.
+ aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol;
+ // bpf - no virtual address.
+ aslr->tool.tool.text_poke = aslr_tool__process_text_poke;
+ aslr->tool.tool.attr = aslr_tool__process_attr;
+ // event_update, tracing_data, finished_round, build_id, id_index,
+ // auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map,
+ // stat_config, stat, feature, finished_init, bpf_metadata, compressed,
+ // auxtrace - no virtual addresses.
+}
+
+struct perf_tool *aslr_tool__new(struct perf_tool *delegate)
+{
+ struct aslr_tool *aslr = malloc(sizeof(*aslr));
+
+ if (!aslr)
+ return NULL;
+
+ aslr_tool__init(aslr, delegate);
+ return &aslr->tool.tool;
+}
+
+void aslr_tool__delete(struct perf_tool *tool)
+{
+ struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool);
+ struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool);
+ struct hashmap_entry *cur;
+ size_t bkt;
+
+ hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt)
+ zfree(&cur->pkey);
+
+ hashmap__clear(&aslr->remap_addresses);
+ hashmap__clear(&aslr->top_addresses);
+ machine__exit(&aslr->machine);
+ free(aslr);
+}
diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h
new file mode 100644
index 000000000000..ea984d82681f
--- /dev/null
+++ b/tools/perf/util/aslr.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __PERF_ASLR_H
+#define __PERF_ASLR_H
+
+struct perf_tool;
+
+struct perf_tool *aslr_tool__new(struct perf_tool *delegate);
+void aslr_tool__delete(struct perf_tool *aslr);
+
+#endif /* __PERF_ASLR_H */
--
2.54.0.545.g6539524ca2-goog
^ permalink raw reply related [flat|nested] 183+ messages in thread* [PATCH v1 2/2] perf test: Add inject ASLR test 2026-04-24 22:05 [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-04-24 22:05 ` Ian Rogers 2026-04-24 22:47 ` sashiko-bot 2026-04-24 22:36 ` [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses sashiko-bot 2026-04-25 2:05 ` [PATCH v2 " Ian Rogers 2 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-04-24 22:05 UTC (permalink / raw) To: Peter Zijlstra, Ingo Molnar, Arnaldo Carvalho de Melo, Namhyung Kim, Jiri Olsa, Ian Rogers, Adrian Hunter, James Clark, Gabriel Marin, linux-kernel, linux-perf-users Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. - Callchain address remapping. - Consistency of `perf report` output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (skipping gracefully if permissions restrict recording the kernel map). - Kernel report consistency with address normalization. Assisted-by: Gemini:gemini-3.1-pro-preview Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 381 ++++++++++++++++++++++++++ 1 file changed, 381 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..17544fe9ef6c --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,381 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +# Set path to built perf +PERF="/tmp/perf3/perf" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) +data="${temp_dir}/perf.data" +data2="${temp_dir}/perf.data2" + +prog="${PERF} test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 + +set -e + +cleanup() { + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi + trap - EXIT TERM INT +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + ${PERF} script -i "$file" | awk '{for(i=1;i<=NF;i++) if($i ~ /noploop\+/) {print $(i-1); exit}}' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp /tmp/perf.data.basic.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.basic.XXXXXX) + + ${PERF} record -e task-clock:u -o "${data}" ${prog} + ${PERF} inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp /tmp/perf.data.pipe.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.pipe.XXXXXX) + + # Use tee to save the original pipe data for comparison + ${PERF} record -e task-clock:u -o - ${prog} | tee "${data}" | ${PERF} inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp /tmp/perf.data.callchain.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.callchain.XXXXXX) + + ${PERF} record -g -e task-clock:u -o "${data}" ${prog} + ${PERF} inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Also check that the full script output differs (to cover callchains) + orig_script=$(${PERF} script -i "${data}" | grep -A 5 noploop | head -n 20) + new_script=$(${PERF} script -i "${data2}" | grep -A 5 noploop | head -n 20) + + if [ "$orig_script" = "$new_script" ]; then + echo "Callchain ASLR test [Failed - callchain output is identical]" + err=1 + else + echo "Callchain ASLR test [Success]" + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp /tmp/perf.data.report.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.report.XXXXXX) + local data_clean + data_clean=$(mktemp /tmp/perf.data.clean.XXXXXX) + + ${PERF} record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + ${PERF} inject -b -i "${data}" -o "${data_clean}" + ${PERF} inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + ${PERF} report -i "${data_clean}" --stdio > "${report1}" + ${PERF} report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" + + diff -u "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi + rm -f "${data}" "${data2}" "${data_clean}" "${data}.old" "${data2}.old" "${data_clean}.old" +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp /tmp/perf.data.pipe_report.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.pipe_report.XXXXXX) + local data_clean + data_clean=$(mktemp /tmp/perf.data.clean.XXXXXX) + + # Use tee to save the original pipe data, then process it with inject -b + ${PERF} record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + ${PERF} inject -b --aslr -o "${data2}" + ${PERF} inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + ${PERF} report -i "${data_clean}" --stdio > "${report1}" + ${PERF} report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" + + diff -u "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi + rm -f "${data}" "${data2}" "${data_clean}" "${data}.old" "${data2}.old" "${data_clean}.old" +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp /tmp/perf.data.dropped.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.dropped.XXXXXX) + + # Check if --phys-data is supported by recording a short run + if ! ${PERF} record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + ${PERF} record -e task-clock:u --phys-data -o "${data}" ${prog} + ${PERF} inject --aslr -i "${data}" -o "${data2}" + + # Verify that samples are dropped. + samples_count=$(${PERF} script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp /tmp/perf.data.kernel.XXXXXX) + local kdata2 + kdata2=$(mktemp /tmp/perf.data2.kernel.XXXXXX) + local log_file + log_file=$(mktemp /tmp/kernel_record.log.XXXXXX) + + # Try to record kernel samples + if ! ${PERF} record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + rm -f "${kdata}" "${log_file}" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + rm -f "${kdata}" "${log_file}" + return + fi + + ${PERF} inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + orig_addr=$(${PERF} script -i "${kdata}" | awk '{for(i=1;i<=NF;i++) if($i ~ /^ffff/) {print $i; exit}}') + new_addr=$(${PERF} script -i "${kdata2}" | awk '{for(i=1;i<=NF;i++) if($i ~ /^ffff/) {print $i; exit}}') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + rm -f "${kdata}" "${kdata2}" "${log_file}" "${kdata}.old" "${kdata2}.old" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp /tmp/perf.data.kernel_report.XXXXXX) + local kdata2 + kdata2=$(mktemp /tmp/perf.data2.kernel_report.XXXXXX) + local data_clean + data_clean=$(mktemp /tmp/perf.data.clean.XXXXXX) + local log_file + log_file=$(mktemp /tmp/kernel_report_record.log.XXXXXX) + + # Try to record kernel samples + if ! ${PERF} record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + rm -f "${kdata}" "${log_file}" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + rm -f "${kdata}" "${log_file}" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + ${PERF} inject -b -i "${kdata}" -o "${data_clean}" + ${PERF} inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + local diff_file="${temp_dir}/diff_kernel" + + ${PERF} report -i "${data_clean}" --stdio > "${report1}" + ${PERF} report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + awk '{$3 = ($3 ~ /^\[.*\]$/ ? "[kernel]" : $3); \ + for(i=1;i<=NF;i++) \ + if($i ~ /^ffff/ || $i ~ /^0x/ || $i == "0000000000000000") \ + $i = "[addr]"; \ + print}' "${report1_clean}" | sort > "${report1_norm}" + awk '{$3 = ($3 ~ /^\[.*\]$/ ? "[kernel]" : $3); \ + for(i=1;i<=NF;i++) \ + if($i ~ /^ffff/ || $i ~ /^0x/ || $i == "0000000000000000") \ + $i = "[addr]"; \ + print}' "${report2_clean}" | sort > "${report2_norm}" + + diff -u "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi + rm -f "${kdata}" "${kdata2}" "${data_clean}" "${log_file}" "${kdata}.old" "${kdata2}.old" "${data_clean}.old" +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v1 2/2] perf test: Add inject ASLR test 2026-04-24 22:05 ` [PATCH v1 2/2] perf test: Add inject ASLR test Ian Rogers @ 2026-04-24 22:47 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-04-24 22:47 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Sashiko AI review found 4 potential issue(s): - [High] Hardcoded and insecure PERF binary path - [Medium] Unhandled grep pipeline aborts script via set -e - [Medium] Architecture assumption in kernel address parsing breaks 32-bit systems - [Medium] Improper cleanup of temporary files and directories -- commit 55c497e7d1e9a304b5ad52e13004ec9955f6ffcb Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test Add a new shell test inject_aslr.sh to verify the perf inject --aslr feature. > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 00000000000000..17544fe9ef6c02 > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh > @@ -0,0 +1,381 @@ > +#!/bin/bash > +# perf inject --aslr test > +# SPDX-License-Identifier: GPL-2.0 > + > +shelldir=$(dirname "$0") > +# shellcheck source=lib/perf_has_symbol.sh > +. "${shelldir}"/lib/perf_has_symbol.sh > + > +sym="noploop" > + > +# Set path to built perf > +PERF="/tmp/perf3/perf" Is this hardcoded path safe and portable? If the test is run as root, could a local user place a malicious executable at /tmp/perf3/perf and escalate privileges? Also, won't this break the test in standard CI environments? [ ... ] > +cleanup() { > + # Check if temp_dir is set and looks sane before removing > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > + rm -rf "${temp_dir}" > + fi > + trap - EXIT TERM INT > +} > + > +trap_cleanup() { > + echo "Unexpected signal in ${FUNCNAME[1]}" > + cleanup > + exit 1 > +} > +trap trap_cleanup TERM INT Because the script uses set -e, should this also trap EXIT? Without an EXIT trap, if a command fails and triggers an exit, temp_dir might be leaked. [ ... ] > +test_basic_aslr() { > + echo "Test basic ASLR remapping" > + local data > + data=$(mktemp /tmp/perf.data.basic.XXXXXX) Should these temporary files be created inside temp_dir instead of directly in /tmp? [ ... ] > + if [ -z "$orig_addr" ]; then > + echo "Basic ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Basic ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Basic ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Basic ASLR test [Success]" > + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" > + fi > +} If the test fails and err=1 is set, won't these temporary files be permanently leaked? This pattern appears in multiple test functions. Also, in test_dropped_samples, if the test returns early due to unsupported --phys-data, these files are also leaked. [ ... ] > +test_kernel_aslr() { > + echo "Test kernel ASLR remapping" > + local kdata > + kdata=$(mktemp /tmp/perf.data.kernel.XXXXXX) > + local kdata2 > + kdata2=$(mktemp /tmp/perf.data2.kernel.XXXXXX) > + local log_file > + log_file=$(mktemp /tmp/kernel_record.log.XXXXXX) > + > + # Try to record kernel samples > + if ! ${PERF} record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then > + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" > + rm -f "${kdata}" "${log_file}" > + return > + fi > + > + # Check for warning about kernel map restriction > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" > + rm -f "${kdata}" "${log_file}" > + return > + fi > + > + ${PERF} inject -v --aslr -i "${kdata}" -o "${kdata2}" > + > + # Check if kernel addresses are remapped. > + orig_addr=$(${PERF} script -i "${kdata}" | awk '{for(i=1;i<=NF;i++) if($i ~ /^ffff/) {print $i; exit}}') > + new_addr=$(${PERF} script -i "${kdata2}" | awk '{for(i=1;i<=NF;i++) if($i ~ /^ffff/) {print $i; exit}}') Will this parsing work on 32-bit architectures? Kernel space typically resides at different offsets (such as 0xc0000000 or 0x80000000) on 32-bit platforms, so hardcoding ^ffff might cause the test to spuriously fail there. [ ... ] > +test_kernel_report_aslr() { [ ... ] > + ${PERF} report -i "${data_clean}" --stdio > "${report1}" > + ${PERF} report -i "${kdata2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" > + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" What happens if report1 contains no matching lines? The second grep -v would receive empty input and exit with a status code of 1. Since the script uses set -e and doesn't have || true on this pipeline, won't this cause the script to abort immediately and silently? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260424220519.2743472-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-04-24 22:05 [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-04-24 22:05 ` [PATCH v1 2/2] perf test: Add inject ASLR test Ian Rogers @ 2026-04-24 22:36 ` sashiko-bot 2026-04-25 2:05 ` [PATCH v2 " Ian Rogers 2 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-04-24 22:36 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Sashiko AI review found 8 potential issue(s): - [High] Missing `thread__put()` causes severe memory leak of thread objects per event. - [High] Incorrect bitmask check for branch stack flags causes catastrophic sample parsing failure. - [High] Incorrect size calculation for `PERF_SAMPLE_RAW` causes parsing desynchronization and uninitialized memory leak. - [High] Off-by-one error in `copy_u64()` bounds checking allows out-of-bounds memory reads. - [High] Unconditional zeroing of union fields in `perf_event_attr` corrupts PMU configurations. - [High] `hashmap` usage causes 32-bit compilation failures and silent 64-bit address truncation. - [Medium] Unhandled guest callchain contexts cause valid samples to be discarded. - [Medium] Missing integer overflow checks on `branch_stack->nr` allow bounds-check bypass. -- commit cb2629f8e0c5449ea2cd944df7889ca64868a5eb Author: Ian Rogers <irogers@google.com> Subject: perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses This commit introduces an ASLR tool to perf inject. It allows remapping virtual memory addresses to prevent leaking ASLR information when sharing perf.data files. Unhandled events are dropped to remain conservative. diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c --- /dev/null +++ b/tools/perf/util/aslr.c [ ... ] > +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, > + u64 addr) > +{ [ ... ] > + if (!hashmap__find(&aslr->remap_addresses, &key, &remap_addr)) { Will storing and retrieving 64-bit addresses in this hashmap cause truncation or build failures on 32-bit architectures? The underlying hashmap API stores long values. On 32-bit systems, long is 32 bits wide, which might truncate the upper 32 bits of the remapped address during hashmap__add. Additionally, passing a u64 pointer to hashmap__find triggers hashmap_cast_ptr(), which contains a static assertion that the value size equals sizeof(long). Might this result in a build failure when compiling on 32-bit platforms? [ ... ] > +static int aslr_tool__process_mmap(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + thread = machine__findnew_thread(&aslr->machine, event->mmap.pid, event->mmap.tid); > + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); > + /* Remaps the mmap.start. */ > + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, > + event->mmap.start, event->mmap.len); [ ... ] > + return delegate->mmap(delegate, new_event, sample, machine); > +} Since machine__findnew_thread() increments the reference count of the returned thread, could returning from this function without calling thread__put() leak the thread reference for every processed event? This pattern appears to be repeated in aslr_tool__process_mmap2, aslr_tool__process_ksymbol, aslr_tool__process_text_poke, and aslr_tool__process_sample. [ ... ] > +static inline int copy_u64(__u64 *in_array, __u64 *out_array, > + size_t *i, size_t *j, const __u64 max_i) > +{ > + if (*i > max_i) > + return -EFAULT; > + out_array[(*j)++] = in_array[(*i)++]; > + return 0; > +} Does this bounds check allow reading past the end of the input event buffer? The max_i value is calculated as event->header.size / sizeof(__u64). Since in_array points to event->sample.array[0], which starts after the 8-byte perf_event_header, the array contains max_i - 1 elements. Should the check be if (*i >= max_i - 1) instead? [ ... ] > +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, > + struct perf_sample *sample, > + struct evsel *evsel, struct machine *machine) > +{ [ ... ] > + if (sample_type & PERF_SAMPLE_CALLCHAIN) { > + if (copy_u64(in_array, out_array, &i, &j, max_i)) > + return -EFAULT; // nr > + > + for (u64 cntr = 0; cntr < sample->callchain->nr; cntr++) { > + if (i > max_i) > + return -EFAULT; > + i++; > + addr = sample->callchain->ips[cntr]; > + if (addr >= PERF_CONTEXT_MAX) { > + // Copy context values as is. > + out_array[j++] = addr; > + switch (addr) { > + case PERF_CONTEXT_HV: > + cpumode = PERF_RECORD_MISC_HYPERVISOR; > + break; > + case PERF_CONTEXT_KERNEL: > + cpumode = PERF_RECORD_MISC_KERNEL; > + break; > + case PERF_CONTEXT_USER: > + cpumode = PERF_RECORD_MISC_USER; > + break; > + default: > + pr_debug("invalid callchain context: %"PRIx64"\n", addr); > + /* > + * It seems the callchain is corrupted. > + * Discard sample. > + */ > + return 0; > + } Should the switch statement handle other valid context markers like PERF_CONTEXT_GUEST, PERF_CONTEXT_GUEST_KERNEL, PERF_CONTEXT_GUEST_USER, and PERF_CONTEXT_USER_DEFERRED? Falling into the default block here abruptly discards the sample, which might break profiling workflows involving KVM guests. [ ... ] > + if (sample_type & PERF_SAMPLE_RAW) { > + size_t bytes = sizeof(u32) + sample->raw_size; > + > + if ((i + (bytes / sizeof(u64))) > max_i) > + return -EFAULT; > + memcpy(&out_array[j], &in_array[i], bytes); > + i += bytes / sizeof(u64); > + j += bytes / sizeof(u64); Will the integer division truncate the padding bytes if the raw sample size is not perfectly aligned to an 8-byte boundary? Because the kernel pads raw sample payloads to an 8-byte boundary, truncation might cause i and j to fail to advance past the padding. Could this lead to the tool misinterpreting the padding bytes as the next fields in the sample, and also leave uninitialized heap memory from aslr->event_copy in the output? [ ... ] > + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { > + if (copy_u64(in_array, out_array, &i, &j, max_i)) > + return -EFAULT; // nr > + if (sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) > + if (copy_u64(in_array, out_array, &i, &j, max_i)) > + return -EFAULT; // hw_idx Is this checking the correct bitmask? The PERF_SAMPLE_BRANCH_HW_INDEX and PERF_SAMPLE_BRANCH_COUNTERS flags belong to the branch_sample_type bitmask, not the sample_type bitmask. Because the sample_type enum uses those bit values for PERF_SAMPLE_TRANSACTION and PERF_SAMPLE_PHYS_ADDR, could a sample with a transaction or physical address field cause the parser to incorrectly assume the presence of branch hardware indices or counters? > + if (i + (sample->branch_stack->nr * 3) > max_i) > + return -EFAULT; Is it possible for sample->branch_stack->nr * 3 to wrap around and bypass this bounds check? If a crafted perf.data file contains an exceptionally large nr value, the multiplication might overflow and result in a small number. Could this allow the subsequent loop to perform out-of-bounds reads from in_array? [ ... ] > +static int aslr_tool__process_attr(const struct perf_tool *tool, > + union perf_event *event, > + struct evlist **pevlist) > +{ [ ... ] > + memcpy(&new_event->attr, &event->attr, event->attr.header.size); > + new_event->attr.attr.bp_addr = 0; // Conservatively remove addresses. > + new_event->attr.attr.kprobe_addr = 0; // Conservatively remove addresses. > + > + return delegate->attr(delegate, new_event, pevlist); > +} Could clearing bp_addr and kprobe_addr unintentionally corrupt PMU configurations? In struct perf_event_attr, bp_addr is an alias for the config1 union field, and kprobe_addr is an alias for config2. Since many hardware PMU drivers rely on config1 and config2 to store critical event configuration data, could clearing these fields break configurations for non-probe hardware events? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260424220519.2743472-1-irogers@google.com?part=1 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v2 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-04-24 22:05 [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-04-24 22:05 ` [PATCH v1 2/2] perf test: Add inject ASLR test Ian Rogers 2026-04-24 22:36 ` [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses sashiko-bot @ 2026-04-25 2:05 ` Ian Rogers 2026-04-25 2:05 ` [PATCH v2 2/2] perf test: Add inject ASLR test Ian Rogers 2026-05-04 3:51 ` [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2 siblings, 2 replies; 183+ messages in thread From: Ian Rogers @ 2026-04-25 2:05 UTC (permalink / raw) To: irogers Cc: acme, adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, namhyung, peterz If perf.data files are taken from one machine to another they make leak virtual addresses and so weaken ASLR on the machine they are coming from. Add a '--aslr' option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. When events are not known/handled by the tool they are dropped. This makes the tool conservative and it should never leak ASLR information, but it means virtual address remapping is needed for cases like auxtrace. Assisted-by: Gemini:gemini-3.1-pro-preview Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 11 +- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 816 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + 4 files changed, 837 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f174bc69cec4..fa3a71e23f1b 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -123,6 +124,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -2564,6 +2566,8 @@ int cmd_inject(int argc, const char **argv) " instance has a subdir"), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2571,6 +2575,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2684,7 +2689,9 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) + tool = aslr_tool__new(&inject.tool); + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); @@ -2789,6 +2796,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 70cc91d00804..65b96f3b87e2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..b21c5e82539d --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,816 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <errno.h> +#include <inttypes.h> + +struct remap_addresses_key { + u64 start_addr; + pid_t pid; +}; + +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machine: The machine with the input, not remapped, virtual address layout. */ + struct machine machine; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE]; + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; + /** @first_kernel_mapping: flag indicating if we are still to process any kernel mapping. */ + bool first_kernel_mapping; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return key->start_addr ^ (key->start_addr >> 12) ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->pid == key2->pid && key1->start_addr == key2->start_addr; +} + +static size_t top_addresses__hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool top_addresses__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 remap_addr = 0; + u64 *remap_addr_ptr = NULL; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + effective_cpumode = (cpumode == PERF_RECORD_MISC_KERNEL) ? + PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL; + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + pr_debug("Cannot find mmap for address %lx in either space, pid=%d\n", + addr, aslr_thread->pid_); + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + key.start_addr = map__start(al.map); + if (!hashmap__find(&aslr->remap_addresses, &key, &remap_addr_ptr)) { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + addr_location__exit(&al); + return 0; + } + remap_addr = *remap_addr_ptr + (addr - map__start(al.map)); + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__remap_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 start, u64 len) +{ + struct addr_location prev_al; + struct remap_addresses_key key; + u64 remap_addr = 0; + u64 *remap_addr_ptr = NULL; + u64 *max_addr_ptr = NULL; + /* If mapping is contiguous to the previous process mapping. */ + bool is_contiguous = false; + bool first_mapping = false; /* first process mapping. */ + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start-1, &prev_al)) { + if (map__start(prev_al.map) + map__size(prev_al.map) == start) { + is_contiguous = true; + } else { + pr_debug("Previous mmap [%lx, %lx] overlaps current map [%lx, %lx]\n", + map__start(prev_al.map), + map__start(prev_al.map) + map__size(prev_al.map), + start, start+len); + } + } + addr_location__exit(&prev_al); + + key.pid = cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + key.start_addr = start; + if (hashmap__find(&aslr->remap_addresses, &key, &remap_addr_ptr)) + return *remap_addr_ptr; + + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + /* First mapping in this process. Don't add a page gap. */ + first_mapping = true; + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL ? + kernel_space_start : user_space_start); + } else { + remap_addr = *max_addr_ptr; + } + + remap_addr = round_up_to_page_size(remap_addr); + if (!is_contiguous && !first_mapping) + remap_addr += page_size; + + { + struct remap_addresses_key *new_key = malloc(sizeof(*new_key)); + + remap_addr_ptr = malloc(sizeof(u64)); + + if (!new_key || !remap_addr_ptr) { + free(new_key); + free(remap_addr_ptr); + return 0; + } + *new_key = key; + *remap_addr_ptr = remap_addr; + if (hashmap__add(&aslr->remap_addresses, new_key, remap_addr_ptr) != 0) { + pr_debug("Failed to add remap_addresses entry for pid=%d, mapping start=%lx, remapped start=%lx", + key.pid, start, remap_addr); + free(new_key); + free(remap_addr_ptr); + return 0; + } + } + + max_addr_ptr = malloc(sizeof(u64)); + + if (!max_addr_ptr) + return 0; + *max_addr_ptr = remap_addr + len; + hashmap__insert(&aslr->top_addresses, key.pid, max_addr_ptr, + first_mapping ? HASHMAP_ADD : HASHMAP_UPDATE, NULL, NULL); + return remap_addr; +} + +static u64 aslr_tool__remap_ksymbol(struct aslr_tool *aslr, + struct thread *aslr_thread, + u64 addr, u32 len) +{ + struct remap_addresses_key key; + u64 remap_addr = 0; + u64 *max_addr_ptr = NULL; + u64 *remap_addr_ptr = NULL; + bool first_mapping = false; + + if (!aslr_thread) + return 0; /* No thread. */ + + key.pid = aslr_thread->pid_; + key.start_addr = addr; + if (hashmap__find(&aslr->remap_addresses, &key, &remap_addr_ptr)) + return *remap_addr_ptr; + + first_mapping = !hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr); + if (first_mapping) + remap_addr = kernel_space_start; + else + remap_addr = *max_addr_ptr; + remap_addr = round_up_to_page_size(remap_addr) + page_size; + + { + struct remap_addresses_key *new_key = malloc(sizeof(*new_key)); + + remap_addr_ptr = malloc(sizeof(u64)); + + if (!new_key || !remap_addr_ptr) { + free(new_key); + free(remap_addr_ptr); + return 0; + } + *new_key = key; + *remap_addr_ptr = remap_addr; + if (hashmap__add(&aslr->remap_addresses, new_key, remap_addr_ptr) < 0) { + pr_debug("Failed to add remap_addresses entry for pid=%d, ksymbol=%lx, remapped address=%lx", + key.pid, addr, remap_addr); + free(new_key); + free(remap_addr_ptr); + return 0; + } + } + + max_addr_ptr = malloc(sizeof(u64)); + + if (!max_addr_ptr) + return 0; + *max_addr_ptr = remap_addr + len; + hashmap__insert(&aslr->top_addresses, key.pid, max_addr_ptr, + first_mapping ? HASHMAP_ADD : HASHMAP_UPDATE, NULL, NULL); + return remap_addr; +} + + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + union perf_event *new_event = (union perf_event *)aslr->event_copy; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread; + int err; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, &aslr->machine); + if (err) + return err; + + thread = machine__findnew_thread(&aslr->machine, event->mmap.pid, event->mmap.tid); + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, + event->mmap.start, event->mmap.len); + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { + /* + * If this is the first kernel image, we need to adjust the pgoff by a + * similar delta. + */ + new_event->mmap.pgoff = event->mmap.pgoff - event->mmap.start + + new_event->mmap.start; + aslr->first_kernel_mapping = false; + } + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + union perf_event *new_event = (union perf_event *)aslr->event_copy; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread; + int err; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, &aslr->machine); + if (err) + return err; + + thread = machine__findnew_thread(&aslr->machine, event->mmap2.pid, event->mmap2.tid); + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__remap_mapping(aslr, thread, cpumode, + event->mmap2.start, event->mmap2.len); + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { + /* + * If this is the first kernel image, we need to adjust the pgoff by a + * similar delta. + */ + new_event->mmap2.pgoff = event->mmap2.pgoff - event->mmap2.start + + new_event->mmap2.start; + aslr->first_kernel_mapping = false; + } + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + int err; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, &aslr->machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + int err; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, &aslr->machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + int err; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, &aslr->machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + union perf_event *new_event = (union perf_event *)aslr->event_copy; + u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + struct thread *thread; + int err; + + thread = machine__findnew_thread(&aslr->machine, sample->pid, sample->tid); + memcpy(&new_event->text_poke, &event->text_poke, event->text_poke.header.size); + new_event->text_poke.addr = aslr_tool__remap_address(aslr, thread, cpumode, + event->text_poke.addr); + + err = delegate->text_poke(delegate, new_event, sample, machine); + + thread__put(thread); + return err; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + union perf_event *new_event = (union perf_event *)aslr->event_copy; + struct thread *thread; + int err; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_ksymbol(tool, event, sample, &aslr->machine); + if (err) + return err; + + thread = machine__findnew_thread(&aslr->machine, kernel_pid, 0); + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__remap_ksymbol(aslr, thread, + event->ksymbol.addr, event->ksymbol.len); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static inline int copy_u64(__u64 *in_array, __u64 *out_array, + size_t *i, size_t *j, const __u64 max_i) +{ + if (*i >= max_i - 1) + return -EFAULT; + out_array[(*j)++] = in_array[(*i)++]; + return 0; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + int ret; + u64 sample_type = evsel->core.attr.sample_type; + struct thread *thread = machine__findnew_thread(&aslr->machine, sample->pid, sample->tid); + const __u64 max_i = event->header.size / sizeof(__u64); + union perf_event *new_event = (union perf_event *)aslr->event_copy; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode = sample->cpumode; + u64 addr; + size_t i = 0, j = 0; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + + + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* id */ + if (sample_type & PERF_SAMPLE_IP) { + i++; + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, + sample->ip); + } + if (sample_type & PERF_SAMPLE_TID) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* time */ + if (sample_type & PERF_SAMPLE_ADDR) { + i++; /* addr */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, + sample->addr); + } + if (sample_type & PERF_SAMPLE_ID) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* lost */ + } else { + u64 nr; + + if (i > max_i) + return -EFAULT; + nr = out_array[j++] = in_array[i++]; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* nr */ + + for (u64 cntr = 0; cntr < sample->callchain->nr; cntr++) { + if (i > max_i) + return -EFAULT; + i++; + addr = sample->callchain->ips[cntr]; + if (addr >= PERF_CONTEXT_MAX) { + /* Copy context values as is. */ + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + case PERF_CONTEXT_GUEST_KERNEL: + case PERF_CONTEXT_GUEST_USER: + case PERF_CONTEXT_USER_DEFERRED: + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + /* + * It seems the callchain is corrupted. + * Discard sample. + */ + return 0; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, + sample->callchain->ips[cntr]); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + + size_t u64_words = (bytes + 7) / 8; + + if ((i + u64_words) > max_i) + return -EFAULT; + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + return 0; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* nr */ + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* hw_idx */ + if (sample->branch_stack->nr > (ULLONG_MAX / 3)) + return -EFAULT; + if (i + (sample->branch_stack->nr * 3) > max_i) + return -EFAULT; + for (u64 cntr = 0; cntr < sample->branch_stack->nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, sample->cpumode, + in_array[i++]); /* to */ + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (i + sample->branch_stack->nr > max_i) + return -EFAULT; + memcpy(&out_array[j], &in_array[i], sample->branch_stack->nr * sizeof(u64)); + i += sample->branch_stack->nr; + j += sample->branch_stack->nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + return 0; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* abi */ + if (sample->user_regs->abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_user); + + if (i + nr > max_i) + return -EFAULT; + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + return 0; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (i > max_i) + return -EFAULT; + size = out_array[j++] = in_array[i++]; + if (size > 0) { + memcpy(&out_array[j], &in_array[i], size); + i += size / sizeof(u64); + j += size / sizeof(u64); + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* dyn_size */ + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + return 0; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* abi */ + if (sample->intr_regs->abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + + if (i + nr > max_i) + return -EFAULT; + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + return 0; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + return 0; + } + if (sample_type & PERF_SAMPLE_CGROUP) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (copy_u64(in_array, out_array, &i, &j, max_i)) + return -EFAULT; /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (i > max_i) + return -EFAULT; + size = out_array[j++] = in_array[i++]; + if (i + (size / sizeof(u64)) > max_i) + return -EFAULT; + memcpy(&out_array[j], &in_array[i], size); + i += size / sizeof(u64); + j += size / sizeof(u64); + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + return 0; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + return 0; + } + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) + return ret; + + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); + perf_sample__exit(&new_sample); + thread__put(thread); + return ret; +} + +static int aslr_tool__process_attr(const struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + union perf_event *new_event = (union perf_event *)aslr->event_copy; + + memcpy(&new_event->attr, &event->attr, event->attr.header.size); + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + if (new_event->attr.attr.kprobe_addr >= 0xffff800000000000) + new_event->attr.attr.kprobe_addr = 0; /* Conservatively remove addresses. */ + + return delegate->attr(delegate, new_event, pevlist); +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machine__init(&aslr->machine, "", HOST_KERNEL_ID); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + aslr->first_kernel_mapping = true; + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + aslr->tool.tool.attr = aslr_tool__process_attr; + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ + /* auxtrace - no virtual addresses. */ +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = malloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct hashmap_entry *cur; + size_t bkt; + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + machine__exit(&aslr->machine); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..ea984d82681f --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +struct perf_tool; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v2 2/2] perf test: Add inject ASLR test 2026-04-25 2:05 ` [PATCH v2 " Ian Rogers @ 2026-04-25 2:05 ` Ian Rogers 2026-05-04 3:51 ` [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 1 sibling, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-04-25 2:05 UTC (permalink / raw) To: irogers Cc: acme, adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, namhyung, peterz Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. - Callchain address remapping. - Consistency of `perf report` output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (skipping gracefully if permissions restrict recording the kernel map). - Kernel report consistency with address normalization. Assisted-by: Gemini:gemini-3.1-pro-preview Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 386 ++++++++++++++++++++++++++ 1 file changed, 386 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..951809eecfd4 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,386 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) +data="${temp_dir}/perf.data" +data2="${temp_dir}/perf.data2" + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 + +set -e + +cleanup() { + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi + trap - EXIT TERM INT +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk '{for(i=1;i<=NF;i++) if($i ~ /noploop\+/) {print $(i-1); exit}}' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp /tmp/perf.data.basic.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.basic.XXXXXX) + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp /tmp/perf.data.pipe.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.pipe.XXXXXX) + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp /tmp/perf.data.callchain.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.callchain.XXXXXX) + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Also check that the full script output differs (to cover callchains) + orig_script=$(perf script -i "${data}" | grep -A 5 noploop | head -n 20) + new_script=$(perf script -i "${data2}" | grep -A 5 noploop | head -n 20) + + if [ "$orig_script" = "$new_script" ]; then + echo "Callchain ASLR test [Failed - callchain output is identical]" + err=1 + else + echo "Callchain ASLR test [Success]" + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp /tmp/perf.data.report.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.report.XXXXXX) + local data_clean + data_clean=$(mktemp /tmp/perf.data.clean.XXXXXX) + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi + rm -f "${data}" "${data2}" "${data_clean}" "${data}.old" "${data2}.old" "${data_clean}.old" +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp /tmp/perf.data.pipe_report.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.pipe_report.XXXXXX) + local data_clean + data_clean=$(mktemp /tmp/perf.data.clean.XXXXXX) + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi + rm -f "${data}" "${data2}" "${data_clean}" "${data}.old" "${data2}.old" "${data_clean}.old" +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp /tmp/perf.data.dropped.XXXXXX) + local data2 + data2=$(mktemp /tmp/perf.data2.dropped.XXXXXX) + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + rm -f "${data}" "${data2}" "${data}.old" "${data2}.old" + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp /tmp/perf.data.kernel.XXXXXX) + local kdata2 + kdata2=$(mktemp /tmp/perf.data2.kernel.XXXXXX) + local log_file + log_file=$(mktemp /tmp/kernel_record.log.XXXXXX) + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + rm -f "${kdata}" "${log_file}" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + rm -f "${kdata}" "${log_file}" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + orig_addr=$(perf script -i "${kdata}" | \ + awk '{for(i=1;i<=NF;i++) if($i ~ /^ffff/) {print $i; exit}}') + new_addr=$(perf script -i "${kdata2}" | \ + awk '{for(i=1;i<=NF;i++) if($i ~ /^ffff/) {print $i; exit}}') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + rm -f "${kdata}" "${kdata2}" "${log_file}" "${kdata}.old" "${kdata2}.old" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp /tmp/perf.data.kernel_report.XXXXXX) + local kdata2 + kdata2=$(mktemp /tmp/perf.data2.kernel_report.XXXXXX) + local data_clean + data_clean=$(mktemp /tmp/perf.data.clean.XXXXXX) + local log_file + log_file=$(mktemp /tmp/kernel_report_record.log.XXXXXX) + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + rm -f "${kdata}" "${log_file}" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + rm -f "${kdata}" "${log_file}" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ + gsub(/0x[0-9a-f]+/, "[addr]", $0); \ + gsub(/ffff[0-9a-f]+/, "[addr]", $0); \ + gsub(/0000000000000000/, "[addr]", $0); \ + print}' "${report1_clean}" > "${report1_norm}" + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ + gsub(/0x[0-9a-f]+/, "[addr]", $0); \ + gsub(/ffff[0-9a-f]+/, "[addr]", $0); \ + gsub(/0000000000000000/, "[addr]", $0); \ + print}' "${report2_clean}" > "${report2_norm}" + + # Calculate sum of percentages for [kernel] samples + sum1=$(awk '/\[kernel\]/ {sum += $1} END {print sum}' "${report1_norm}") + sum2=$(awk '/\[kernel\]/ {sum += $1} END {print sum}' "${report2_norm}") + + echo "Kernel report sums: sum1=$sum1, sum2=$sum2" + + # Compare sums with tolerance + if awk -v s1="$sum1" -v s2="$sum2" \ + 'BEGIN {diff = s1 - s2; if (diff < 0) diff = -diff; if (diff < 0.05) exit 0; else exit 1}'; then + echo "Kernel Report ASLR test [Success]" + else + echo "Kernel Report ASLR test [Failed - sums differ too much]" + err=1 + fi + + rm -f "${kdata}" "${kdata2}" "${data_clean}" "${log_file}" \ + "${kdata}.old" "${kdata2}.old" "${data_clean}.old" +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes 2026-04-25 2:05 ` [PATCH v2 " Ian Rogers 2026-04-25 2:05 ` [PATCH v2 2/2] perf test: Add inject ASLR test Ian Rogers @ 2026-05-04 3:51 ` Ian Rogers 2026-05-04 3:51 ` [PATCH v3 1/4] perf sched: Add missing mmap2 handler in timehist Ian Rogers ` (4 more replies) 1 sibling, 5 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-04 3:51 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature are two independent, critical bug fixes inside core event dispatching tools that harden perf session analysis against dynamic crashes and callchain mapping failures. --- Core Feature: 'perf inject --aslr' (Patches 3 and 4) Transferring perf.data files across environments introduces a potential leak of virtual address footprints, weakening Address Space Layout Randomization (ASLR) on the originating machine. To mitigate this, we introduce the --aslr flag into perf inject. Unknown or unhandled events are dropped conservatively, while handled samples and branch loops undergo systematic virtual memory offset obfuscation. To ensure comprehensive memory and error-path safety, the ASLR tool implements: - Machine namespaces ('struct machines') to safely interleave host mappings and unprivileged guest (KVM) memory regions without boundary leakages. - Multi-map anchor key matching ( anchored by DSO, invariant offsets, and PID) resolving overlapping split-map lookups. - Subtraction-based bounds check equations to mathematically secure branch stack loops against integer overflows. - Secure u64 dynamic buffer calculations on userspace stack and hardware tracing payloads to prevent wrap-around heap overflows. - Clean, error-checked skip advancement loops (skipn) past dynamic AUX streams in piped records to maintain stream reader sync. - Robust OOM fallback rollbacks of transient dictionary keys to guarantee dictionary hashmap integrity on failures. Verification is reinforced in Patch 4 with a new comprehensive POSIX shell suite ('inject_aslr.sh'), hardened against SIGPIPE signal exits with stream consuming awk loops and robust 'set -o pipefail' assertions. --- Prerequisite Bug Fixes (Patches 1 and 2) During development, two core event delegation issues were identified and resolved to prevent crashes and data-loss during analysis: 1. perf sched: 'timehist' registers standard MMAP, COMM, EXIT, and FORK stubs, but completely omitted registering MMAP2 callbacks. Because modern environments output maps primarily via MMAP2 frames, this caused timehist sessions to silently drop shared library mappings, causing dynamic callchain symbol resolutions to fail. Patch 1 corrects this by properly registering perf_event__process_mmap2. 2. perf tool: Patch 2 fixes missing copies of schedstat callbacks inside delegated wrapper tools (which caused segfaults on NULL stubs) and properly initializes/copies the 'dont_split_sample_group' grouping parameters to prevent stack garbage from triggering silent non-leader events drops during split deliver streams. Ian Rogers (4): perf sched: Add missing mmap2 handler in timehist perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses perf test: Add inject ASLR test tools/perf/builtin-inject.c | 18 +- tools/perf/builtin-sched.c | 1 + tools/perf/tests/shell/inject_aslr.sh | 423 +++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1157 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + tools/perf/util/tool.c | 6 + 7 files changed, 1615 insertions(+), 1 deletion(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v3 1/4] perf sched: Add missing mmap2 handler in timehist 2026-05-04 3:51 ` [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers @ 2026-05-04 3:51 ` Ian Rogers 2026-05-04 3:51 ` [PATCH v3 2/4] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers ` (3 subsequent siblings) 4 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-04 3:51 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers perf_sched__timehist() registers event handlers for options using the sched->tool struct. It registers handlers for MMAP, COMM, EXIT, FORK, etc. but completely omits registering a handler for MMAP2 events. Failing to register both MMAP and MMAP2 handlers causes modern systems (which primarily output MMAP2 records) to silently drop VMA map mappings. This results in uninitialized machine/thread mapping structures, making it impossible to resolve shared library instruction pointers (IPs) to dynamic symbols/DSOs during timehist callchain analysis. Fix this by correctly registering perf_event__process_mmap2 in sched->tool inside perf_sched__timehist(). Assisted-by: Gemini-CLI:Google Gemini 3 Fixes: 5bbfec0ad93c ("perf sched: Implement timehist option") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 555247568e7a..241c2f808f7b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3299,6 +3299,7 @@ static int perf_sched__timehist(struct perf_sched *sched) */ sched->tool.sample = perf_timehist__process_sample; sched->tool.mmap = perf_event__process_mmap; + sched->tool.mmap2 = perf_event__process_mmap2; sched->tool.comm = perf_event__process_comm; sched->tool.exit = perf_event__process_exit; sched->tool.fork = perf_event__process_fork; -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v3 2/4] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool 2026-05-04 3:51 ` [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-04 3:51 ` [PATCH v3 1/4] perf sched: Add missing mmap2 handler in timehist Ian Rogers @ 2026-05-04 3:51 ` Ian Rogers 2026-05-04 3:51 ` [PATCH v3 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers ` (2 subsequent siblings) 4 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-04 3:51 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers delegate_tool was missing the delegate overrides for schedstat_cpu and schedstat_domain. As a result, when allocated with zalloc, these callbacks defaulted to NULL, causing a segmentation fault crash if any schedstat events were delivered during event processing. Fix this by adding delegate_schedstat_cpu and delegate_schedstat_domain via the CREATE_DELEGATE_OP2 macro, and ensuring delegate_tool__init correctly registers them. Additionally, delegate_tool__init completely omitted copying the dont_split_sample_group property from the delegate. This would cause wrapper tools to default the flag to false, which corrupts piped event processing (e.g., in perf inject) by triggering duplicate event deliveries on split sample values in deliver_sample_group(). Similarly, perf_tool__init() omitted the initialization of this boolean field. On stack-allocated tools that rely on this initializer (like intel-tpebs or __cmd_evlist), this could result in uninitialized stack garbage evaluating to true—silently dropping non-leader event members in deliver_sample_group(). Fix both issues by properly copying the field in delegate_tool__init and initializing it to false in perf_tool__init. Assisted-by: Gemini-CLI:Google Gemini 3 Fixes: 6331b2669359 ("perf tool: Add a delegate_tool that just delegates actions to another tool") Fixes: 79bcd34e0f3d ("perf inject: Fix leader sampling inserting additional samples") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/tool.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 013c7839e2cf..ff2150517b75 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -285,6 +285,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events) tool->no_warn = false; tool->show_feat_hdr = SHOW_FEAT_NO_HEADER; tool->merge_deferred_callchains = true; + tool->dont_split_sample_group = false; tool->sample = process_event_sample_stub; tool->mmap = process_event_stub; @@ -433,6 +434,8 @@ CREATE_DELEGATE_OP2(stat_config); CREATE_DELEGATE_OP2(stat_round); CREATE_DELEGATE_OP2(thread_map); CREATE_DELEGATE_OP2(time_conv); +CREATE_DELEGATE_OP2(schedstat_cpu); +CREATE_DELEGATE_OP2(schedstat_domain); CREATE_DELEGATE_OP2(tracing_data); #define CREATE_DELEGATE_OP3(name) \ @@ -470,6 +473,7 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) tool->tool.no_warn = delegate->no_warn; tool->tool.show_feat_hdr = delegate->show_feat_hdr; tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains; + tool->tool.dont_split_sample_group = delegate->dont_split_sample_group; tool->tool.sample = delegate_sample; tool->tool.read = delegate_read; @@ -516,4 +520,6 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) tool->tool.bpf_metadata = delegate_bpf_metadata; tool->tool.compressed = delegate_compressed; tool->tool.auxtrace = delegate_auxtrace; + tool->tool.schedstat_cpu = delegate_schedstat_cpu; + tool->tool.schedstat_domain = delegate_schedstat_domain; } -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v3 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-04 3:51 ` [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-04 3:51 ` [PATCH v3 1/4] perf sched: Add missing mmap2 handler in timehist Ian Rogers 2026-05-04 3:51 ` [PATCH v3 2/4] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers @ 2026-05-04 3:51 ` Ian Rogers 2026-05-04 4:51 ` sashiko-bot 2026-05-04 3:51 ` [PATCH v3 4/4] perf test: Add inject ASLR test Ian Rogers 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 4 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-04 3:51 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers If perf.data files are taken from one machine to another they make leak virtual addresses and so weaken ASLR on the machine they are coming from. Add a '--aslr' option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. When events are not known/handled by the tool they are dropped. This makes the tool conservative and it should never leak ASLR information, but it means virtual address remapping is needed for cases like auxtrace. To ensure comprehensive robustness and security, this tool: - Employs guest namespace isolation by utilizing 'struct machines' to safely interleave host and unprivileged KVM guest virtual address mappings. - Resolves VMA split map failures (caused by maps__fixup_overlap_and_insert) consistently by anchoring mappings on DSO and memory invariants. - Guards against integer overflows in branch stack loops via subtraction-based bounds arithmetic. - Prevents heap buffer overflows by computing safe word limits on userspace stacks and dynamic hardware tracing (AUX) sizes. - Prevents key collisions/ABA lookups by correctly managing DSO reference counts (dso__get/put). - Cleans up error paths to avoid inconsistent hashmap mappings on OOM failures. - Optimizes performance by removing redundant hot-path memory allocations. - Cleanly advances session readers past dropped auxtrace streams. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- v3: Combine split-map fixes, guest namespaces, bounds checks, OOM rollbacks, hot path optimization, safe dso references, and I/O stream error handling from v3/v4 development. Drop raw auxtrace events. Fix thread reference leaks in event handlers. Fix 32-bit truncation bug in hashmaps using u64* values. Prevent leaking uninitialized heap memory by zeroing copy buffer. Correct bitmask checks for branch stack flags. Avoid PMU configuration corruption. v2: First review feedback adjustments. --- tools/perf/builtin-inject.c | 18 +- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1157 +++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + 4 files changed, 1185 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f174bc69cec4..6e6bf6b67956 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -123,6 +124,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -2564,6 +2566,8 @@ int cmd_inject(int argc, const char **argv) " instance has a subdir"), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2571,6 +2575,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2684,12 +2689,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2789,6 +2803,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 70cc91d00804..65b96f3b87e2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..32548352e1e5 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,1157 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <errno.h> +#include <inttypes.h> +#include <unistd.h> + +static int skipn(int fd, u64 n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, (n < (u64)sizeof(buf) ? n : (u64)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE]; + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; + /** @first_kernel_mapping: flag indicating if we are still to process any kernel mapping. */ + bool first_kernel_mapping; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->dso == key2->dso && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +static size_t top_addresses__hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool top_addresses__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + effective_cpumode = (cpumode == PERF_RECORD_MISC_KERNEL) ? + PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL; + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__remap_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 start, u64 len, u64 pgoff) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 remap_addr = 0; + u64 *remapped_invariant_ptr = NULL; + u64 *max_addr_ptr = NULL; + bool is_contiguous = false; + bool first_mapping = false; + bool key_found = false; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (thread__find_map(aslr_thread, cpumode, start, &al)) + key.dso = map__dso(al.map); + else + key.dso = NULL; + + key.invariant = start - pgoff; + key.pid = cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + pgoff; + key_found = true; + } else { + struct addr_location prev_al; + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__start(prev_al.map) + map__size(prev_al.map) == start) { + is_contiguous = true; + } else { + pr_debug("Previous mmap [%lx, %lx] overlaps current map [%lx, %lx]\n", + map__start(prev_al.map), + map__start(prev_al.map) + map__size(prev_al.map), + start, start+len); + } + } + addr_location__exit(&prev_al); + + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + first_mapping = true; + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL ? + kernel_space_start : user_space_start); + } else { + remap_addr = *max_addr_ptr; + } + + remap_addr = round_up_to_page_size(remap_addr); + if (!is_contiguous && !first_mapping) + remap_addr += page_size; + + { + struct remap_addresses_key *new_key = malloc(sizeof(*new_key)); + u64 *new_val = malloc(sizeof(u64)); + + if (!new_key || !new_val) { + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + *new_key = key; + new_key->dso = dso__get(key.dso); + *new_val = remap_addr - pgoff; + + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) != 0) { + dso__put(new_key->dso); + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + } + } + + /* Update top_addresses */ + { + u64 *new_max = malloc(sizeof(u64)); + u64 *old_val = NULL; + int err; + + if (!new_max) { + struct remap_addresses_key *old_key = NULL; + u64 *old_val_remap = NULL; + + if (!key_found) { + hashmap__delete(&aslr->remap_addresses, &key, + &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + } + addr_location__exit(&al); + return 0; + } + *new_max = remap_addr + len; + + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + if (*max_addr_ptr > *new_max) + *new_max = *max_addr_ptr; + } + + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, + (first_mapping && !key_found) ? + HASHMAP_ADD : HASHMAP_UPDATE, + NULL, &old_val); + if (err) { + struct remap_addresses_key *old_key = NULL; + u64 *old_val_remap = NULL; + + free(new_max); + if (!key_found) { + hashmap__delete(&aslr->remap_addresses, &key, + &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + } + addr_location__exit(&al); + return 0; + } + free(old_val); + } + + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__remap_ksymbol(struct aslr_tool *aslr, + struct thread *aslr_thread, + u64 addr, u32 len) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 remap_addr = 0; + u64 *remapped_invariant_ptr = NULL; + u64 *max_addr_ptr = NULL; + bool first_mapping = false; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (thread__find_map(aslr_thread, PERF_RECORD_MISC_KERNEL, addr, &al)) + key.dso = map__dso(al.map); + else + key.dso = NULL; + + key.invariant = addr; /* pgoff is 0 for ksymbols */ + key.pid = aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr; + addr_location__exit(&al); + return remap_addr; + } + + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + first_mapping = true; + remap_addr = kernel_space_start; + } else { + remap_addr = *max_addr_ptr; + } + + remap_addr = round_up_to_page_size(remap_addr) + page_size; + + { + struct remap_addresses_key *new_key = malloc(sizeof(*new_key)); + u64 *new_val = malloc(sizeof(u64)); + + if (!new_key || !new_val) { + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + *new_key = key; + new_key->dso = dso__get(key.dso); + *new_val = remap_addr; + + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) < 0) { + dso__put(new_key->dso); + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + } + + { + u64 *new_max = malloc(sizeof(u64)); + u64 *old_val = NULL; + int err; + + if (!new_max) { + struct remap_addresses_key *old_key = NULL; + u64 *old_val_remap = NULL; + + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + addr_location__exit(&al); + return 0; + } + *new_max = remap_addr + len; + + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + if (*max_addr_ptr > *new_max) + *new_max = *max_addr_ptr; + } + + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, + first_mapping ? + HASHMAP_ADD : HASHMAP_UPDATE, + NULL, &old_val); + if (err) { + struct remap_addresses_key *old_key = NULL; + u64 *old_val_remap = NULL; + + free(new_max); + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + addr_location__exit(&al); + return 0; + } + free(old_val); + } + + addr_location__exit(&al); + return remap_addr; +} + + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { + /* + * If this is the first kernel image, we need to adjust the pgoff by a + * similar delta. + */ + new_event->mmap.pgoff = event->mmap.pgoff - event->mmap.start + + new_event->mmap.start; + aslr->first_kernel_mapping = false; + } + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__remap_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { + /* + * If this is the first kernel image, we need to adjust the pgoff by a + * similar delta. + */ + new_event->mmap2.pgoff = event->mmap2.pgoff - event->mmap2.start + + new_event->mmap2.start; + aslr->first_kernel_mapping = false; + } + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->text_poke, &event->text_poke, event->text_poke.header.size); + new_event->text_poke.addr = aslr_tool__remap_address(aslr, thread, cpumode, + event->text_poke.addr); + + err = delegate->text_poke(delegate, new_event, sample, machine); + + thread__put(thread); + return err; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__remap_ksymbol(aslr, thread, + event->ksymbol.addr, event->ksymbol.len); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + do { \ + if (i + (required_i) > max_i || j + (required_j) > max_j) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + } while (0) + +#define COPY_U64() \ + do { \ + CHECK_BOUNDS(1, 1); \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + CHECK_BOUNDS(1, 1); \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + + for (u64 cntr = 0; cntr < nr; cntr++) { + CHECK_BOUNDS(1, 1); + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + CHECK_BOUNDS(1, 1); + out_array[j++] = in_array[i++]; + cntr++; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + + COPY_U64(); /* abi */ + abi = out_array[j-1]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_user); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + CHECK_BOUNDS(1, 1); + size = out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + i += u64_words; + j += u64_words; + + COPY_U64(); /* dyn_size */ + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + + COPY_U64(); /* abi */ + abi = out_array[j-1]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + CHECK_BOUNDS(1, 1); + size = out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; +} + +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + + +static int aslr_tool__process_attr(const struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + memcpy(&new_event->attr, &event->attr, event->attr.header.size); + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + + return delegate->attr(delegate, new_event, pevlist); +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (perf_data__is_pipe(session->data)) { + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + aslr->first_kernel_mapping = true; + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + aslr->tool.tool.attr = aslr_tool__process_attr; + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ + /* auxtrace - no virtual addresses. */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..ea984d82681f --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +struct perf_tool; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v3 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-04 3:51 ` [PATCH v3 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-04 4:51 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-04 4:51 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: - [High] Breakpoint addresses (bp_addr) are not scrubbed when processing regular perf.data files or synthesizing for a pipe, defeating the ASLR protection. - [High] The --aslr option defeats kernel ASLR obfuscation by deterministically encoding the original kernel start address into the emitted mmap.pgoff field. - [Medium] The `skipn` function does not handle `EINTR` when reading from a pipe, which can cause spurious tool aborts. -- commit e61cd92f35f2e374d97add5939b1983c327eae98 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses This commit adds an --aslr option to perf inject to remap virtual addresses in perf.data files. This prevents virtual addresses from leaking when transferring perf data between machines, thus weakening ASLR. Unhandled events are dropped to conservatively avoid leaking ASLR information. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index f174bc69cec4..6e6bf6b67956 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c [ ... ] > @@ -2763,12 +2777,12 @@ int cmd_inject(int argc, const char **argv) > /* > * If the input is already a pipe then the features and > * attributes don't need synthesizing, they will be present in > * the input. > */ > if (!data.is_pipe) { > ret = perf_event__synthesize_for_pipe(&inject.tool, > inject.session, > &inject.output, > perf_event__repipe); Does this bypass the ASLR tool wrapper for ATTR events and leave breakpoint addresses unscrubbed? The aslr_tool__process_attr() function zeros out bp_addr, but this callback only intercepts ATTR events in the main event stream. If the output is a pipe, perf_event__synthesize_for_pipe() uses perf_event__repipe directly, bypassing the scrubbing and exposing the original bp_addr in the output stream. When processing regular input files, ATTR configurations are parsed from the file header and stored in session->evlist. Does perf_session__inject_header() write this un-scrubbed evlist directly into the output file's header, also leaking the original bp_addr? [ ... ] > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 000000000000..32548352e1e5 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static int skipn(int fd, u64 n) > +{ > + char buf[4096]; > + ssize_t ret; > + > + while (n > 0) { > + ret = read(fd, buf, (n < (u64)sizeof(buf) ? n : (u64)sizeof(buf))); > + if (ret <= 0) > + return ret; > + n -= ret; > + } > + > + return 0; > +} Can this cause spurious tool aborts if read() is interrupted by a signal (like SIGWINCH)? This function explicitly checks if ret <= 0 and returns it without checking for EINTR, which allows the error to propagate back to the event processing loop and prematurely abort the perf inject session. Should this utilize readn() or preadn() from the perf library which correctly handle EINTR? [ ... ] > +static int aslr_tool__process_mmap(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + /* Remaps the mmap.start. */ > + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, > + event->mmap.start, > + event->mmap.len, > + event->mmap.pgoff); > + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { > + /* > + * If this is the first kernel image, we need to adjust the pgoff by a > + * similar delta. > + */ > + new_event->mmap.pgoff = event->mmap.pgoff - event->mmap.start + > + new_event->mmap.start; Does this adjustment of pgoff allow recovering the original kernel ASLR base? Since event->mmap.pgoff is typically a known public constant for the first kernel image mapping (e.g., 0 or a fixed ELF offset), and the output trace exposes both new_event->mmap.start and new_event->mmap.pgoff, could an attacker trivially reverse the equation: old_start = new_start - new_pgoff + old_pgoff Does this mathematical leak nullify the ASLR obfuscation intended for the kernel image? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260504035125.1851720-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v3 4/4] perf test: Add inject ASLR test 2026-05-04 3:51 ` [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (2 preceding siblings ...) 2026-05-04 3:51 ` [PATCH v3 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-04 3:51 ` Ian Rogers 2026-05-04 5:02 ` sashiko-bot 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 4 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-04 3:51 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. - Callchain address remapping. - Consistency of `perf report` output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (skipping gracefully if permissions restrict recording the kernel map). - Kernel report consistency with address normalization. The test suite is hardened with: - Global 'set -o pipefail' pipeline checks to catch failures in perf script. - Safe awk processing loop closures that consume whole streams to avoid SIGPIPE signal aborts. - False success assertions to verify callchain data isn't dynamically dropped. - Graceful error paths on empty sample records. - Multi-arch support (32-bit and 64-bit address normalizations). Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> --- v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, callchain empty data asserts, baseline sample verification, and grep report abort protections. Ensure grep report filters have || true suffixes. Reorder set -e/pipefail to prevent stack leaks in mktemp failures. v2: Add sum comparison for kernel overhead and 32-bit math corrections. Add awk with gsub for trailing dots and brackets normalizations. Trap EXIT, prevent race conditions and avoid hardcoded perf binary. --- tools/perf/tests/shell/inject_aslr.sh | 423 ++++++++++++++++++++++++++ 1 file changed, 423 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..fa7dd3b4c411 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,423 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) +data="${temp_dir}/perf.data" +data2="${temp_dir}/perf.data2" + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 + + + +cleanup() { + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + cleanup + exit 1 +} + +trap cleanup EXIT +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ + print}' "${report1_clean}" > "${report1_norm}" + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ + print}' "${report2_clean}" > "${report2_norm}" + + # Calculate sum of percentages for [kernel] samples + sum1=$(awk '/\[kernel\]/ {sum += $1} END {print sum}' "${report1_norm}") + sum2=$(awk '/\[kernel\]/ {sum += $1} END {print sum}' "${report2_norm}") + + echo "Kernel report sums: sum1=$sum1, sum2=$sum2" + + # Compare sums with tolerance + if [ -z "$sum1" ] || [ -z "$sum2" ]; then + echo "Kernel Report ASLR test [Failed - no kernel data]" + err=1 + elif awk -v s1="$sum1" -v s2="$sum2" \ + 'BEGIN {diff = s1 - s2; if (diff < 0) diff = -diff; if (diff < 0.05) exit 0; else exit 1}'; then + echo "Kernel Report ASLR test [Success]" + else + echo "Kernel Report ASLR test [Failed - sums differ too much]" + err=1 + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v3 4/4] perf test: Add inject ASLR test 2026-05-04 3:51 ` [PATCH v3 4/4] perf test: Add inject ASLR test Ian Rogers @ 2026-05-04 5:02 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-04 5:02 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] The claimed multi-arch address normalization feature is not implemented. -- commit 7330768f7ec5b4d1676ce71056c223b55aff876e Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test This commit adds a new shell test inject_aslr.sh to verify the perf inject --aslr feature. It covers various remapping scenarios, pipe modes, callchains, and report consistency checks. > The test suite is hardened with: > - Global 'set -o pipefail' pipeline checks to catch failures in perf script. > - Safe awk processing loop closures that consume whole streams to avoid SIGPIPE > signal aborts. > - False success assertions to verify callchain data isn't dynamically dropped. > - Graceful error paths on empty sample records. > - Multi-arch support (32-bit and 64-bit address normalizations). The commit message mentions multi-arch support and 32-bit and 64-bit address normalizations, but it appears the script only normalizes the DSO names. [ ... ] > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 000000000000..fa7dd3b4c411 > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh [ ... ] > +test_kernel_report_aslr() { [ ... ] > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true > + > + # Normalize kernel DSOs and addresses in clean reports > + # This allows kernel modules to be either a module or kernel.kallsyms > + local report1_norm="${temp_dir}/report_kernel1.norm" > + local report2_norm="${temp_dir}/report_kernel2.norm" > + > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ > + print}' "${report1_clean}" > "${report1_norm}" > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ > + print}' "${report2_clean}" > "${report2_norm}" Does this awk script normalize memory addresses, or does it only normalize the DSO names? The code seems to avoid address-related diff failures by summing the percentage column later, rather than normalizing the addresses themselves. Could the commit message and the inline comment be updated to accurately describe what the script actually normalizes? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260504035125.1851720-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes 2026-05-04 3:51 ` [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (3 preceding siblings ...) 2026-05-04 3:51 ` [PATCH v3 4/4] perf test: Add inject ASLR test Ian Rogers @ 2026-05-04 7:29 ` Ian Rogers 2026-05-04 7:29 ` [PATCH v4 1/4] perf sched: Add missing mmap2 handler in timehist Ian Rogers ` (5 more replies) 4 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-04 7:29 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature are two independent, critical bug fixes inside core event dispatching tools that harden perf session analysis against dynamic crashes and callchain mapping failures. Core Feature: 'perf inject --aslr' (Patches 3 and 4) Transferring perf.data files across environments introduces a potential leak of virtual address footprints, weakening Address Space Layout Randomization (ASLR) on the originating machine. To mitigate this, we introduce the --aslr flag into perf inject. Unknown or unhandled events are dropped conservatively, while handled samples and branch loops undergo systematic virtual memory offset obfuscation. To ensure comprehensive memory and error-path safety, the ASLR tool implements: - Machine namespaces ('struct machines') to safely interleave host mappings and unprivileged KVM guest virtual address mappings. - Resolves VMA split map failures (caused by overlap fixups during map insertions) consistently by anchoring mappings on DSO and memory invariants. - Guards against integer overflows in branch stack loops via subtraction-based bounds arithmetic. - Prevents heap buffer overflows by computing safe word limits on userspace stacks and dynamic hardware tracing (AUX) sizes. - Prevents key collisions/ABA lookups by correctly managing DSO reference counts (dso__get/put). - Cleans up error paths to avoid inconsistent hashmap mappings on OOM failures. - Optimizes performance by removing redundant hot-path memory allocations. - Cleanly advances session readers past dropped auxtrace streams using pipe-stream I/O skip helpers. - Scrubs breakpoint addresses (bp_addr) from output event headers and dynamically synthesized events for pipes via a custom pipe repipe wrapper to prevent unscrubbed address leakage. - Remaps kernel memory maps linearly to maintain secure base obfuscation bounds. - Hardens guest cpumode lookups against corrupting host/guest user and kernel mapping boundaries during sample fallback searches. - Synchronizes ksymbol map tracking invariants using precise VMA offset math rather than raw addresses to prevent unique base leaks on every function symbol. - Blocks trailing heap padding byte data leakage vectors in userspace stacks and AUX tracking frames via targeted tail-word clearing. Verification is reinforced in Patch 4 with a new comprehensive POSIX shell suite ('inject_aslr.sh'), hardened against SIGPIPE signal exits with stream consuming awk loops and robust 'set -o pipefail' assertions. The suite includes a new dedicated scenario validating pipe stdout injection attribute stability. Prerequisite Bug Fixes (Patches 1 and 2) During development, two core event delegation issues were identified and resolved to prevent crashes and data-loss during analysis: 1. perf sched: 'timehist' registers standard MMAP, COMM, EXIT, and FORK stubs, but completely omitted registering MMAP2 callbacks. Because modern environments output maps primarily via MMAP2 frames, this caused timehist sessions to silently drop shared library mappings, causing dynamic callchain symbol resolutions to fail. Patch 1 corrects this by properly registering perf_event__process_mmap2. 2. perf tool: Patch 2 fixes missing copies of schedstat callbacks inside delegated wrapper tools (which caused segfaults on NULL stubs) and properly initializes/copies the 'dont_split_sample_group' grouping parameters to prevent stack garbage from triggering silent non-leader events drops during split deliver streams. Changes since v3: - Feature integration: Pass a dedicated 'perf_event__aslr_repipe' callback to perf_event__synthesize_for_pipe() to scrub synthesized breakpoint attributes. - Feature core: Loop through and scrub event evlist breakpoint attributes right before writing file headers in __cmd_inject(). - Feature core: Linearize kernel map base obfuscation and remove redundant pgoff delta adjustments that leaked kernel layout calculations. - Feature core: Fix host/guest cpumode mappings in sample fallback lookups. - Feature core: Sync ksymbol tracking keys onto VMA offset invariants. - Feature core: Zero out trailing padding word bytes in user stacks and AUX blocks. - Validation suite: Add 'test_pipe_out_report_aslr' validation case. - Validation suite: Upgrade kernel report checks to strict sorted line-by-line diffs. - Style: Wrap all commit description lines to under 75 columns and fix code formatting. Ian Rogers (4): perf sched: Add missing mmap2 handler in timehist perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses perf test: Add inject ASLR test tools/perf/builtin-inject.c | 52 +- tools/perf/builtin-sched.c | 1 + tools/perf/tests/shell/inject_aslr.sh | 459 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1161 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + tools/perf/util/tool.c | 6 + 7 files changed, 1689 insertions(+), 1 deletion(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v4 1/4] perf sched: Add missing mmap2 handler in timehist 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers @ 2026-05-04 7:29 ` Ian Rogers 2026-05-04 7:29 ` [PATCH v4 2/4] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers ` (4 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-04 7:29 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers perf_sched__timehist() registers event handlers for options using the sched->tool struct. It registers handlers for MMAP, COMM, EXIT, FORK, etc. but completely omits registering a handler for MMAP2 events. Failing to register both MMAP and MMAP2 handlers causes modern systems (which primarily output MMAP2 records) to silently drop VMA map mappings. This results in uninitialized machine/thread mapping structures, making it impossible to resolve shared library instruction pointers (IPs) to dynamic symbols/DSOs during timehist callchain analysis. Fix this by correctly registering perf_event__process_mmap2 in sched->tool inside perf_sched__timehist(). Assisted-by: Gemini-CLI:Google Gemini 3 Fixes: 5bbfec0ad93c ("perf sched: Implement timehist option") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 555247568e7a..241c2f808f7b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3299,6 +3299,7 @@ static int perf_sched__timehist(struct perf_sched *sched) */ sched->tool.sample = perf_timehist__process_sample; sched->tool.mmap = perf_event__process_mmap; + sched->tool.mmap2 = perf_event__process_mmap2; sched->tool.comm = perf_event__process_comm; sched->tool.exit = perf_event__process_exit; sched->tool.fork = perf_event__process_fork; -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v4 2/4] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-04 7:29 ` [PATCH v4 1/4] perf sched: Add missing mmap2 handler in timehist Ian Rogers @ 2026-05-04 7:29 ` Ian Rogers 2026-05-04 7:29 ` [PATCH v4 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers ` (3 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-04 7:29 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers delegate_tool was missing the delegate overrides for schedstat_cpu and schedstat_domain. As a result, when allocated with zalloc, these callbacks defaulted to NULL, causing a segmentation fault crash if any schedstat events were delivered during event processing. Fix this by adding delegate_schedstat_cpu and delegate_schedstat_domain via the CREATE_DELEGATE_OP2 macro, and ensuring delegate_tool__init correctly registers them. Additionally, delegate_tool__init completely omitted copying the dont_split_sample_group property from the delegate. This would cause wrapper tools to default the flag to false, which corrupts piped event processing (e.g., in perf inject) by triggering duplicate event deliveries on split sample values in deliver_sample_group(). Similarly, perf_tool__init() omitted the initialization of this boolean field. On stack-allocated tools that rely on this initializer (like intel-tpebs or __cmd_evlist), this could result in uninitialized stack garbage evaluating to true—silently dropping non-leader event members in deliver_sample_group(). Fix both issues by properly copying the field in delegate_tool__init and initializing it to false in perf_tool__init. Assisted-by: Gemini-CLI:Google Gemini 3 Fixes: 6331b2669359 ("perf tool: Add a delegate_tool that just delegates actions to another tool") Fixes: 79bcd34e0f3d ("perf inject: Fix leader sampling inserting additional samples") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/tool.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 013c7839e2cf..ff2150517b75 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -285,6 +285,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events) tool->no_warn = false; tool->show_feat_hdr = SHOW_FEAT_NO_HEADER; tool->merge_deferred_callchains = true; + tool->dont_split_sample_group = false; tool->sample = process_event_sample_stub; tool->mmap = process_event_stub; @@ -433,6 +434,8 @@ CREATE_DELEGATE_OP2(stat_config); CREATE_DELEGATE_OP2(stat_round); CREATE_DELEGATE_OP2(thread_map); CREATE_DELEGATE_OP2(time_conv); +CREATE_DELEGATE_OP2(schedstat_cpu); +CREATE_DELEGATE_OP2(schedstat_domain); CREATE_DELEGATE_OP2(tracing_data); #define CREATE_DELEGATE_OP3(name) \ @@ -470,6 +473,7 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) tool->tool.no_warn = delegate->no_warn; tool->tool.show_feat_hdr = delegate->show_feat_hdr; tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains; + tool->tool.dont_split_sample_group = delegate->dont_split_sample_group; tool->tool.sample = delegate_sample; tool->tool.read = delegate_read; @@ -516,4 +520,6 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) tool->tool.bpf_metadata = delegate_bpf_metadata; tool->tool.compressed = delegate_compressed; tool->tool.auxtrace = delegate_auxtrace; + tool->tool.schedstat_cpu = delegate_schedstat_cpu; + tool->tool.schedstat_domain = delegate_schedstat_domain; } -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v4 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-04 7:29 ` [PATCH v4 1/4] perf sched: Add missing mmap2 handler in timehist Ian Rogers 2026-05-04 7:29 ` [PATCH v4 2/4] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers @ 2026-05-04 7:29 ` Ian Rogers 2026-05-04 8:39 ` sashiko-bot 2026-05-04 7:29 ` [PATCH v4 4/4] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-04 7:29 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers If perf.data files are taken from one machine to another they make leak virtual addresses and so weaken ASLR on the machine they are coming from. Add a '--aslr' option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. When events are not known/handled by the tool they are dropped. This makes the tool conservative and it should never leak ASLR information, but it means virtual address remapping is needed for cases like auxtrace. To ensure comprehensive robustness and security, this tool: - Employs guest namespace isolation by utilizing 'struct machines' to safely interleave host and unprivileged KVM guest virtual address mappings. - Resolves VMA split map failures (caused by overlap fixups during map insertions) consistently by anchoring mappings on DSO and memory invariants. - Guards against integer overflows in branch stack loops via subtraction-based bounds arithmetic. - Prevents heap buffer overflows by computing safe word limits on userspace stacks and dynamic hardware tracing (AUX) sizes. - Prevents key collisions/ABA lookups by correctly managing DSO reference counts (dso__get/put). - Cleans up error paths to avoid inconsistent hashmap mappings on OOM failures. - Optimizes performance by removing redundant hot-path memory allocations. - Cleanly advances session readers past dropped auxtrace streams using pipe-stream I/O skip helpers. - Scrubs breakpoint addresses (bp_addr) from output event headers and dynamically synthesized events for pipes via a custom pipe repipe wrapper to prevent unscrubbed address leakage. - Remaps kernel memory maps linearly to maintain secure base obfuscation bounds. - Hardens guest cpumode lookups against corrupting host/guest user and kernel mapping boundaries during sample fallback searches. - Synchronizes ksymbol map tracking invariants using precise VMA offset math rather than raw addresses to prevent unique base leaks on every function symbol. - Blocks trailing heap padding byte data leakage vectors in userspace stacks and AUX tracking frames via targeted tail-word clearing. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- v4: Scrub bp_addr from headers/pipe synthesis attributes. Remove kernel mmap pgoff mathematical delta adjustment leaks to maintain secure base obfuscation bounds. Harden guest space contexts mapping loops, correct ksymbol map base invariants tracking, and plug tail-word padding heap leakage vectors in user stacks and AUX payloads. v3: Combine split-map fixes, guest namespaces, bounds checks, OOM rollbacks, hot path optimization, safe dso references, and I/O stream error handling from v3/v4 development. Drop raw auxtrace events. Fix thread reference leaks in event handlers. Fix 32-bit truncation bug in hashmaps using u64* values. Prevent leaking uninitialized heap memory by zeroing copy buffer. Correct bitmask checks for branch stack flags. Avoid PMU configuration corruption. v2: First review feedback adjustments. --- tools/perf/builtin-inject.c | 52 +- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1161 +++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + 4 files changed, 1223 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f174bc69cec4..d2167a94d4be 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -123,6 +124,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -304,6 +306,29 @@ static int perf_event__repipe(const struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +/** + * perf_event__aslr_repipe - Wrapper to scrub synthesized pipe attributes. + * @tool: The original &inject.tool pointer. + * @event: The synthesized perf_event record. + * + * Synthesized attributes for pipes bypass the main event stream tool hooks. + * This wrapper intercepts them during pipe header generation to clear unprivileged + * breakpoint addresses (bp_addr). It forwards execution using the original tool + * context pointer to ensure container_of(&inject.tool) evaluation inside the + * downstream repipe stubs remains valid and does not cause structure corruptions. + */ +static int perf_event__aslr_repipe(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + if (event->header.type == PERF_RECORD_HEADER_ATTR && + event->attr.attr.type == PERF_TYPE_BREAKPOINT) { + event->attr.attr.bp_addr = 0; + } + return perf_event__repipe(tool, event, sample, machine); +} + static int perf_event__drop(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, @@ -2458,6 +2483,15 @@ static int __cmd_inject(struct perf_inject *inject) } } + if (inject->aslr) { + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2564,6 +2598,8 @@ int cmd_inject(int argc, const char **argv) " instance has a subdir"), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2571,6 +2607,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2684,12 +2721,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2717,6 +2763,8 @@ int cmd_inject(int argc, const char **argv) ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, + inject.aslr ? + perf_event__aslr_repipe : perf_event__repipe); if (ret < 0) goto out_delete; @@ -2789,6 +2837,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 70cc91d00804..65b96f3b87e2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..27b642c91eb9 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,1161 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <errno.h> +#include <inttypes.h> +#include <unistd.h> + +static int skipn(int fd, u64 n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, (n < (u64)sizeof(buf) ? n : (u64)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE]; + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->dso == key2->dso && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +static size_t top_addresses__hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool top_addresses__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__remap_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 start, u64 len, u64 pgoff) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 remap_addr = 0; + u64 *remapped_invariant_ptr = NULL; + u64 *max_addr_ptr = NULL; + bool is_contiguous = false; + bool first_mapping = false; + bool key_found = false; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (thread__find_map(aslr_thread, cpumode, start, &al)) + key.dso = map__dso(al.map); + else + key.dso = NULL; + + key.invariant = start - pgoff; + key.pid = cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + pgoff; + key_found = true; + } else { + struct addr_location prev_al; + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__start(prev_al.map) + map__size(prev_al.map) == start) { + is_contiguous = true; + } else { + pr_debug("Previous mmap [%lx, %lx] overlaps current map [%lx, %lx]\n", + map__start(prev_al.map), + map__start(prev_al.map) + map__size(prev_al.map), + start, start+len); + } + } + addr_location__exit(&prev_al); + + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + first_mapping = true; + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL ? + kernel_space_start : user_space_start); + } else { + remap_addr = *max_addr_ptr; + } + + remap_addr = round_up_to_page_size(remap_addr); + if (!is_contiguous && !first_mapping) + remap_addr += page_size; + + { + struct remap_addresses_key *new_key = malloc(sizeof(*new_key)); + u64 *new_val = malloc(sizeof(u64)); + + if (!new_key || !new_val) { + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + *new_key = key; + new_key->dso = dso__get(key.dso); + *new_val = remap_addr - pgoff; + + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) != 0) { + dso__put(new_key->dso); + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + } + } + + /* Update top_addresses */ + { + u64 *new_max = malloc(sizeof(u64)); + u64 *old_val = NULL; + int err; + + if (!new_max) { + struct remap_addresses_key *old_key = NULL; + u64 *old_val_remap = NULL; + + if (!key_found) { + hashmap__delete(&aslr->remap_addresses, &key, + &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + } + addr_location__exit(&al); + return 0; + } + *new_max = remap_addr + len; + + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + if (*max_addr_ptr > *new_max) + *new_max = *max_addr_ptr; + } + + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, + (first_mapping && !key_found) ? + HASHMAP_ADD : HASHMAP_UPDATE, + NULL, &old_val); + if (err) { + struct remap_addresses_key *old_key = NULL; + u64 *old_val_remap = NULL; + + free(new_max); + if (!key_found) { + hashmap__delete(&aslr->remap_addresses, &key, + &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + } + addr_location__exit(&al); + return 0; + } + free(old_val); + } + + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__remap_ksymbol(struct aslr_tool *aslr, + struct thread *aslr_thread, + u64 addr, u32 len) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 remap_addr = 0; + u64 *remapped_invariant_ptr = NULL; + u64 *max_addr_ptr = NULL; + bool first_mapping = false; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (thread__find_map(aslr_thread, PERF_RECORD_MISC_KERNEL, addr, &al)) { + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + } else { + key.dso = NULL; + key.invariant = addr; /* pgoff is 0 for ksymbols */ + } + key.pid = aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + if (al.map) + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + else + remap_addr = *remapped_invariant_ptr; + addr_location__exit(&al); + return remap_addr; + } + + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + first_mapping = true; + remap_addr = kernel_space_start; + } else { + remap_addr = *max_addr_ptr; + } + + remap_addr = round_up_to_page_size(remap_addr) + page_size; + + { + struct remap_addresses_key *new_key = malloc(sizeof(*new_key)); + u64 *new_val = malloc(sizeof(u64)); + + if (!new_key || !new_val) { + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + *new_key = key; + new_key->dso = dso__get(key.dso); + if (al.map) + *new_val = remap_addr - (addr - map__start(al.map)) - map__pgoff(al.map); + else + *new_val = remap_addr; + + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) < 0) { + dso__put(new_key->dso); + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + } + + { + u64 *new_max = malloc(sizeof(u64)); + u64 *old_val = NULL; + int err; + + if (!new_max) { + struct remap_addresses_key *old_key = NULL; + u64 *old_val_remap = NULL; + + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + addr_location__exit(&al); + return 0; + } + *new_max = remap_addr + len; + + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + if (*max_addr_ptr > *new_max) + *new_max = *max_addr_ptr; + } + + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, + first_mapping ? + HASHMAP_ADD : HASHMAP_UPDATE, + NULL, &old_val); + if (err) { + struct remap_addresses_key *old_key = NULL; + u64 *old_val_remap = NULL; + + free(new_max); + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + addr_location__exit(&al); + return 0; + } + free(old_val); + } + + addr_location__exit(&al); + return remap_addr; +} + + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__remap_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->text_poke, &event->text_poke, event->text_poke.header.size); + new_event->text_poke.addr = aslr_tool__remap_address(aslr, thread, cpumode, + event->text_poke.addr); + + err = delegate->text_poke(delegate, new_event, sample, machine); + + thread__put(thread); + return err; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__remap_ksymbol(aslr, thread, + event->ksymbol.addr, event->ksymbol.len); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + do { \ + if (i + (required_i) > max_i || j + (required_j) > max_j) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + } while (0) + +#define COPY_U64() \ + do { \ + CHECK_BOUNDS(1, 1); \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + CHECK_BOUNDS(1, 1); \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + + for (u64 cntr = 0; cntr < nr; cntr++) { + CHECK_BOUNDS(1, 1); + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + CHECK_BOUNDS(1, 1); + out_array[j++] = in_array[i++]; + cntr++; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + + COPY_U64(); /* abi */ + abi = out_array[j-1]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_user); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + CHECK_BOUNDS(1, 1); + size = out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + + COPY_U64(); /* dyn_size */ + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + + COPY_U64(); /* abi */ + abi = out_array[j-1]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + CHECK_BOUNDS(1, 1); + size = out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; +} + +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + + +static int aslr_tool__process_attr(const struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + memcpy(&new_event->attr, &event->attr, event->attr.header.size); + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + + return delegate->attr(delegate, new_event, pevlist); +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (perf_data__is_pipe(session->data)) { + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + aslr->tool.tool.attr = aslr_tool__process_attr; + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ + /* auxtrace - no virtual addresses. */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..ea984d82681f --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +struct perf_tool; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v4 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-04 7:29 ` [PATCH v4 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-04 8:39 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-04 8:39 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: - [High] Kernel ASLR base is leaked through unmodified mmap `pgoff` fields. - [Medium] The 'targeted tail-word clearing' logic for STACK_USER and AUX frames is dead code because the entire sample is unconditionally dropped immediately after. - [Medium] The `skipn()` pipe helper lacks retry logic for `-EINTR`, risking spurious aborts. -- commit 0794ff7942de3e0dd56f299d7bfb0e61ba3fc97a Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses This commit introduces an '--aslr' option for perf inject to remap virtual addresses in perf.data files. This prevents leaking virtual addresses and weakening ASLR on the source machine. Events with unknown or unhandled addresses are dropped to remain conservative. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 000000000000..27b642c91eb9 > --- /dev/null > +++ b/tools/perf/util/aslr.c > @@ -0,0 +1,1161 @@ [ ... ] > +static int skipn(int fd, u64 n) > +{ > + char buf[4096]; > + ssize_t ret; > + > + while (n > 0) { > + ret = read(fd, buf, (n < (u64)sizeof(buf) ? n : (u64)sizeof(buf))); > + if (ret <= 0) > + return ret; Will this cause spurious aborts if read() is interrupted by a signal? Since this is reading from a pipe, if read() returns -1 with errno == EINTR, skipn() will return the error immediately rather than retrying. > + n -= ret; > + } > + > + return 0; > +} [ ... ] > +static int aslr_tool__process_mmap(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); > + /* Remaps the mmap.start. */ > + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, > + event->mmap.start, > + event->mmap.len, > + event->mmap.pgoff); Does this leak the kernel ASLR base? For the main kernel mapping, the perf tool synthesizes mmap events where the mmap.pgoff field is set to the original kernel start address. Since pgoff is left unmodified here, the unobfuscated kernel base address will be copied directly into the output stream. > + err = delegate->mmap(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} [ ... ] > +static int aslr_tool__process_mmap2(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); > + /* Remaps the mmap.start. */ > + new_event->mmap2.start = aslr_tool__remap_mapping(aslr, thread, cpumode, > + event->mmap2.start, > + event->mmap2.len, > + event->mmap2.pgoff); Similarly, does this leave the unobfuscated kernel base address in pgoff for mmap2 events? > + err = delegate->mmap2(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} [ ... ] > +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, > + struct perf_sample *sample, > + struct evsel *evsel, struct machine *machine) > +{ [ ... ] > + if (sample_type & PERF_SAMPLE_STACK_USER) { > + u64 size; > + > + CHECK_BOUNDS(1, 1); > + size = out_array[j++] = in_array[i++]; > + if (size > 0) { > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > + > + if (u64_words > max_i - i || u64_words > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], size); > + if (size % 8) { > + size_t pad = 8 - (size % 8); > + > + memset(((char *)&out_array[j]) + size, 0, pad); > + } Is this tail-word clearing dead code? > + i += u64_words; > + j += u64_words; > + > + COPY_U64(); /* dyn_size */ > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping stack user sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; Immediately after clearing the trailing heap padding in the output array, the code unconditionally jumps to out_put with ret = 0, skipping the sample delegate call. This drops the entire sample from the output stream. > + } [ ... ] > + if (sample_type & PERF_SAMPLE_AUX) { > + u64 size; > + > + CHECK_BOUNDS(1, 1); > + size = out_array[j++] = in_array[i++]; > + if (size > 0) { > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > + > + if (u64_words > max_i - i || u64_words > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], size); > + if (size % 8) { > + size_t pad = 8 - (size % 8); > + > + memset(((char *)&out_array[j]) + size, 0, pad); > + } > + i += u64_words; > + j += u64_words; > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping aux sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } Does this code also unnecessarily calculate and clear the tail padding for AUX frames, given that the sample is unconditionally dropped? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260504072937.2103453-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v4 4/4] perf test: Add inject ASLR test 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (2 preceding siblings ...) 2026-05-04 7:29 ` [PATCH v4 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-04 7:29 ` Ian Rogers 2026-05-04 8:48 ` sashiko-bot 2026-05-04 8:23 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-04 7:29 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz, Ian Rogers Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. - Callchain address remapping. - Consistency of `perf report` output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (skipping gracefully if permissions restrict recording the kernel map). - Kernel report consistency with address normalization. The test suite is hardened with: - Global 'set -o pipefail' pipeline checks to catch failures in perf script. - Safe awk processing loop closures that consume whole streams to avoid SIGPIPE signal aborts. - False success assertions to verify callchain data isn't dynamically dropped. - Graceful error paths on empty sample records. - Multi-arch support (32-bit and 64-bit address normalizations). - Adds a new 'test_pipe_out_report_aslr' pipeline validation scenario testing raw 'perf inject --aslr -o -' pipe stdout generation to verify attribute repipe wrapper stability. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> --- v4: Reorder set -e/pipefail to prevent temp file leakage in root directory on unprivileged record failures when run as root. Ensure grep report filters have || true suffixes to avoid aborts under pipefail. Add comprehensive pipe stdout injection attributes validation case. v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, callchain empty data asserts, baseline sample verification, and grep report abort protections. Reorder set -e/pipefail to prevent stack leaks in mktemp failures. v2: Add sum comparison for kernel overhead and 32-bit math corrections. Add awk with gsub for trailing dots and brackets normalizations. Trap EXIT, prevent race conditions and avoid hardcoded perf binary. --- tools/perf/tests/shell/inject_aslr.sh | 459 ++++++++++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..80ec9bf3daf8 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,459 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) +data="${temp_dir}/perf.data" +data2="${temp_dir}/perf.data2" + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 + + + +cleanup() { + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + cleanup + exit 1 +} + +trap cleanup EXIT +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ + print}' "${report1_clean}" | sort > "${report1_norm}" + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ + print}' "${report2_clean}" | sort > "${report2_norm}" + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v4 4/4] perf test: Add inject ASLR test 2026-05-04 7:29 ` [PATCH v4 4/4] perf test: Add inject ASLR test Ian Rogers @ 2026-05-04 8:48 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-04 8:48 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] The test script claims to perform address normalization for kernel reports but fails to implement it in the awk substitution. -- commit 35a72f67733a9ae0ed75cf3ba7c2a7ec0e4419cd Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test This commit adds a new shell test, inject_aslr.sh, to verify the perf inject --aslr feature. It covers various scenarios including basic address remapping, pipe modes, callchains, and kernel address remapping consistency. > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 000000000000..80ec9bf3daf8 > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh [ ... ] > +test_kernel_report_aslr() { [ ... ] > + # Normalize kernel DSOs and addresses in clean reports > + # This allows kernel modules to be either a module or kernel.kallsyms > + local report1_norm="${temp_dir}/report_kernel1.norm" > + local report2_norm="${temp_dir}/report_kernel2.norm" > + local diff_file="${temp_dir}/diff_kernel" > + > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ > + print}' "${report1_clean}" | sort > "${report1_norm}" > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); \ > + print}' "${report2_clean}" | sort > "${report2_norm}" The commit message mentions "32-bit and 64-bit address normalizations", and the inline comment here specifically states it will normalize addresses. However, the awk substitution appears to only match and replace the DSO module names. If unresolved kernel symbols are present in the trace, perf report will output their absolute hexadecimal addresses. Since --aslr shifts these addresses, wouldn't the unnormalized hex strings differ between the original and injected reports? Could this cause the diff check to fail on environments lacking full kernel symbol resolution, leading to flaky test failures? > + > + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true -- Sashiko AI review · https://sashiko.dev/#/patchset/20260504072937.2103453-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (3 preceding siblings ...) 2026-05-04 7:29 ` [PATCH v4 4/4] perf test: Add inject ASLR test Ian Rogers @ 2026-05-04 8:23 ` Ian Rogers 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-04 8:23 UTC (permalink / raw) To: acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz I found a regression when testing as root, so please ignore. Thanks, Ian On Mon, May 4, 2026 at 12:29 AM Ian Rogers <irogers@google.com> wrote: > > This patch series introduces the new 'perf inject --aslr' feature to remap > virtual memory addresses or drop physical memory event leaks when profile > record data is shared between machines. Bundled with this feature are two > independent, critical bug fixes inside core event dispatching tools that > harden perf session analysis against dynamic crashes and callchain mapping > failures. > > Core Feature: 'perf inject --aslr' (Patches 3 and 4) > > Transferring perf.data files across environments introduces a potential leak > of virtual address footprints, weakening Address Space Layout Randomization > (ASLR) on the originating machine. To mitigate this, we introduce the --aslr > flag into perf inject. Unknown or unhandled events are dropped conservatively, > while handled samples and branch loops undergo systematic virtual memory offset > obfuscation. > > To ensure comprehensive memory and error-path safety, the ASLR tool implements: > - Machine namespaces ('struct machines') to safely interleave host mappings and > unprivileged KVM guest virtual address mappings. > - Resolves VMA split map failures (caused by overlap fixups during map > insertions) consistently by anchoring mappings on DSO and memory > invariants. > - Guards against integer overflows in branch stack loops via > subtraction-based bounds arithmetic. > - Prevents heap buffer overflows by computing safe word limits on > userspace stacks and dynamic hardware tracing (AUX) sizes. > - Prevents key collisions/ABA lookups by correctly managing DSO > reference counts (dso__get/put). > - Cleans up error paths to avoid inconsistent hashmap mappings on > OOM failures. > - Optimizes performance by removing redundant hot-path memory > allocations. > - Cleanly advances session readers past dropped auxtrace streams > using pipe-stream I/O skip helpers. > - Scrubs breakpoint addresses (bp_addr) from output event headers > and dynamically synthesized events for pipes via a custom pipe > repipe wrapper to prevent unscrubbed address leakage. > - Remaps kernel memory maps linearly to maintain secure base > obfuscation bounds. > - Hardens guest cpumode lookups against corrupting host/guest user and > kernel mapping boundaries during sample fallback searches. > - Synchronizes ksymbol map tracking invariants using precise VMA > offset math rather than raw addresses to prevent unique base leaks > on every function symbol. > - Blocks trailing heap padding byte data leakage vectors in userspace > stacks and AUX tracking frames via targeted tail-word clearing. > > Verification is reinforced in Patch 4 with a new comprehensive POSIX shell > suite ('inject_aslr.sh'), hardened against SIGPIPE signal exits with stream > consuming awk loops and robust 'set -o pipefail' assertions. The suite includes > a new dedicated scenario validating pipe stdout injection attribute stability. > > Prerequisite Bug Fixes (Patches 1 and 2) > > During development, two core event delegation issues were identified and > resolved to prevent crashes and data-loss during analysis: > > 1. perf sched: 'timehist' registers standard MMAP, COMM, EXIT, and FORK stubs, > but completely omitted registering MMAP2 callbacks. Because modern environments > output maps primarily via MMAP2 frames, this caused timehist sessions to silently > drop shared library mappings, causing dynamic callchain symbol resolutions to > fail. Patch 1 corrects this by properly registering perf_event__process_mmap2. > > 2. perf tool: Patch 2 fixes missing copies of schedstat callbacks inside delegated > wrapper tools (which caused segfaults on NULL stubs) and properly initializes/copies > the 'dont_split_sample_group' grouping parameters to prevent stack garbage from > triggering silent non-leader events drops during split deliver streams. > > Changes since v3: > - Feature integration: Pass a dedicated 'perf_event__aslr_repipe' callback to > perf_event__synthesize_for_pipe() to scrub synthesized breakpoint attributes. > - Feature core: Loop through and scrub event evlist breakpoint attributes right > before writing file headers in __cmd_inject(). > - Feature core: Linearize kernel map base obfuscation and remove redundant pgoff > delta adjustments that leaked kernel layout calculations. > - Feature core: Fix host/guest cpumode mappings in sample fallback lookups. > - Feature core: Sync ksymbol tracking keys onto VMA offset invariants. > - Feature core: Zero out trailing padding word bytes in user stacks and AUX blocks. > - Validation suite: Add 'test_pipe_out_report_aslr' validation case. > - Validation suite: Upgrade kernel report checks to strict sorted line-by-line diffs. > - Style: Wrap all commit description lines to under 75 columns and fix code formatting. > > Ian Rogers (4): > perf sched: Add missing mmap2 handler in timehist > perf tool: Fix missing schedstat delegates and dont_split_sample_group > in delegate_tool > perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses > perf test: Add inject ASLR test > > tools/perf/builtin-inject.c | 52 +- > tools/perf/builtin-sched.c | 1 + > tools/perf/tests/shell/inject_aslr.sh | 459 ++++++++++ > tools/perf/util/Build | 1 + > tools/perf/util/aslr.c | 1161 +++++++++++++++++++++++++ > tools/perf/util/aslr.h | 10 + > tools/perf/util/tool.c | 6 + > 7 files changed, 1689 insertions(+), 1 deletion(-) > create mode 100755 tools/perf/tests/shell/inject_aslr.sh > create mode 100644 tools/perf/util/aslr.c > create mode 100644 tools/perf/util/aslr.h > > -- > 2.54.0.545.g6539524ca2-goog > ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v5 0/5] perf tools: Add inject --aslr feature and prerequisite robustness fixes 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (4 preceding siblings ...) 2026-05-04 8:23 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers @ 2026-05-06 0:45 ` Ian Rogers 2026-05-06 0:45 ` [PATCH v5 1/5] perf sched: Add missing mmap2 handler in timehist Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-06 0:45 UTC (permalink / raw) To: irogers, acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature are three independent, critical bug fixes inside core event dispatching and map tracking tools that harden perf session analysis against dynamic crashes and callchain mapping failures. Core Feature: 'perf inject --aslr' (Patches 4 and 5) Transferring perf.data files across environments introduces a potential leak of virtual address footprints, weakening Address Space Layout Randomization (ASLR) on the originating machine. To mitigate this, we introduce the --aslr flag into perf inject. Unknown or unhandled events are dropped conservatively, while handled samples and branch loops undergo systematic virtual memory offset obfuscation. The ASLR tracking tool virtualizes process and machine namespaces using 'struct machines' to safely isolate host mappings from unprivileged KVM guest address spaces. Memory space layouts are tracked globally per process context to ensure linear, continuous space allocations across successive mapping runs. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames) to completely eliminate accidental address leakage vectors. Verification is reinforced in Patch 5 with a comprehensive POSIX shell suite ('inject_aslr.sh'), hardened against SIGPIPE signal exits with stream consuming awk loops and robust 'set -o pipefail' assertions. The suite includes a dedicated scenario validating raw 'perf inject -o -' pipe stdout generation attribute stability. Prerequisite Bug Fixes (Patches 1, 2, and 3) During development, three core event delegation and map indexing issues were identified and resolved to prevent crashes and data-loss during analysis: 1. perf sched: 'timehist' registers standard MMAP, COMM, EXIT, and FORK stubs, but completely omitted registering MMAP2 callbacks. Because modern environments output maps primarily via MMAP2 frames, this caused timehist sessions to silently drop shared library mappings, causing dynamic callchain symbol resolutions to fail. Patch 1 corrects this by properly registering perf_event__process_mmap2. 2. perf tool: Patch 2 fixes missing copies of schedstat callbacks inside delegated wrapper tools (which caused segfaults on NULL stubs) and properly initializes/copies the 'dont_split_sample_group' grouping parameters to prevent stack garbage from triggering silent non-leader events drops during split deliver streams. 3. perf symbols: Patch 3 resolves a deep structural map tracking desynchronization bug inside symbol-elf.c by re-engineering the map removal sequence order to run strictly BEFORE in-place virtual address mutations, preventing absolute binary searches (bsearch) from failing on misaligned cache array slots. Changes since v4: - Core Bug Fix: Introduce a new prerequisite standalone fix patch (Patch 3) that re-engineers map tracking removal sequence order inside symbol-elf.c to prevent corrupting binary search index arrays during in-place address mutations. - Feature Core: Refactor aslr_tool__delete to cleanly clear host/guest maps and structures via machines__destroy_kernel_maps() to cure all destructor leaks. - Feature Core: Integrate the 'first_kernel_mapping' state guard to protect kernel module file offsets (pgoff) from corruption, preventing dynamic symbolization resolutions dropouts. - Feature Integration: Move breakpoint address (bp_addr) cleaning to the core session memory initialization startup level, natively securing both files and pipes while completely stripping away redundant runtime wrapper layers. - Validation Suite: Harden grep-v filters with || true operators to protect pipelines from crashing under set -o pipefail on empty inputs. - Style: Prune out and streamline commit log text clutter into concise high-level architectural summary overviews. Ian Rogers (5): perf sched: Add missing mmap2 handler in timehist perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool perf symbols: Fix map removal sequence inside dso__process_kernel_symbol() perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses perf test: Add inject ASLR test tools/perf/builtin-inject.c | 31 +- tools/perf/builtin-sched.c | 1 + tools/perf/tests/shell/inject_aslr.sh | 459 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1220 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + tools/perf/util/symbol-elf.c | 21 +- tools/perf/util/tool.c | 6 + 8 files changed, 1743 insertions(+), 6 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v5 1/5] perf sched: Add missing mmap2 handler in timehist 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers @ 2026-05-06 0:45 ` Ian Rogers 2026-05-06 13:22 ` Arnaldo Carvalho de Melo 2026-05-06 0:45 ` [PATCH v5 2/5] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers ` (4 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-06 0:45 UTC (permalink / raw) To: irogers, acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz perf_sched__timehist() registers event handlers for options using the sched->tool struct. It registers handlers for MMAP, COMM, EXIT, FORK, etc. but completely omits registering a handler for MMAP2 events. Failing to register both MMAP and MMAP2 handlers causes modern systems (which primarily output MMAP2 records) to silently drop VMA map mappings. This results in uninitialized machine/thread mapping structures, making it impossible to resolve shared library instruction pointers (IPs) to dynamic symbols/DSOs during timehist callchain analysis. Fix this by correctly registering perf_event__process_mmap2 in sched->tool inside perf_sched__timehist(). Assisted-by: Gemini-CLI:Google Gemini 3 Fixes: 5bbfec0ad93c ("perf sched: Implement timehist option") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 555247568e7a..241c2f808f7b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3299,6 +3299,7 @@ static int perf_sched__timehist(struct perf_sched *sched) */ sched->tool.sample = perf_timehist__process_sample; sched->tool.mmap = perf_event__process_mmap; + sched->tool.mmap2 = perf_event__process_mmap2; sched->tool.comm = perf_event__process_comm; sched->tool.exit = perf_event__process_exit; sched->tool.fork = perf_event__process_fork; -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v5 1/5] perf sched: Add missing mmap2 handler in timehist 2026-05-06 0:45 ` [PATCH v5 1/5] perf sched: Add missing mmap2 handler in timehist Ian Rogers @ 2026-05-06 13:22 ` Arnaldo Carvalho de Melo 2026-05-06 16:16 ` Ian Rogers 0 siblings, 1 reply; 183+ messages in thread From: Arnaldo Carvalho de Melo @ 2026-05-06 13:22 UTC (permalink / raw) To: Ian Rogers Cc: gmx, namhyung, adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Tue, May 05, 2026 at 05:45:42PM -0700, Ian Rogers wrote: > perf_sched__timehist() registers event handlers for options using the > sched->tool struct. It registers handlers for MMAP, COMM, EXIT, FORK, etc. > but completely omits registering a handler for MMAP2 events. > > Failing to register both MMAP and MMAP2 handlers causes modern systems > (which primarily output MMAP2 records) to silently drop VMA map mappings. > This results in uninitialized machine/thread mapping structures, making it > impossible to resolve shared library instruction pointers (IPs) to dynamic > symbols/DSOs during timehist callchain analysis. > > Fix this by correctly registering perf_event__process_mmap2 in > sched->tool inside perf_sched__timehist(). > > Assisted-by: Gemini-CLI:Google Gemini 3 > Fixes: 5bbfec0ad93c ("perf sched: Implement timehist option") I'm picking patches 1 and 2 as they're simple and passed sashiko review, but: ⬢ [acme@toolbx perf-tools-next2]$ git show 5bbfec0ad93c fatal: ambiguous argument '5bbfec0ad93c': unknown revision or path not in the working tree. Use '--' to separate paths from revisions, like this: 'git <command> [<revision>...] -- [<file>...]' ⬢ [acme@toolbx perf-tools-next2]$ The right one is: Fixes: 49394a2a24c78ce0 ("perf sched timehist: Introduce timehist command") I checked and at the time, mmap2 was already present and it missed adding this handler. I fixed it, in case I'm missing something, lemme know, - Arnaldo > Signed-off-by: Ian Rogers <irogers@google.com> > --- > tools/perf/builtin-sched.c | 1 + > 1 file changed, 1 insertion(+) > > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c > index 555247568e7a..241c2f808f7b 100644 > --- a/tools/perf/builtin-sched.c > +++ b/tools/perf/builtin-sched.c > @@ -3299,6 +3299,7 @@ static int perf_sched__timehist(struct perf_sched *sched) > */ > sched->tool.sample = perf_timehist__process_sample; > sched->tool.mmap = perf_event__process_mmap; > + sched->tool.mmap2 = perf_event__process_mmap2; > sched->tool.comm = perf_event__process_comm; > sched->tool.exit = perf_event__process_exit; > sched->tool.fork = perf_event__process_fork; > -- > 2.54.0.545.g6539524ca2-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v5 1/5] perf sched: Add missing mmap2 handler in timehist 2026-05-06 13:22 ` Arnaldo Carvalho de Melo @ 2026-05-06 16:16 ` Ian Rogers 0 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-06 16:16 UTC (permalink / raw) To: Arnaldo Carvalho de Melo Cc: gmx, namhyung, adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Wed, May 6, 2026 at 6:22 AM Arnaldo Carvalho de Melo <acme@kernel.org> wrote: > > On Tue, May 05, 2026 at 05:45:42PM -0700, Ian Rogers wrote: > > perf_sched__timehist() registers event handlers for options using the > > sched->tool struct. It registers handlers for MMAP, COMM, EXIT, FORK, etc. > > but completely omits registering a handler for MMAP2 events. > > > > Failing to register both MMAP and MMAP2 handlers causes modern systems > > (which primarily output MMAP2 records) to silently drop VMA map mappings. > > This results in uninitialized machine/thread mapping structures, making it > > impossible to resolve shared library instruction pointers (IPs) to dynamic > > symbols/DSOs during timehist callchain analysis. > > > > Fix this by correctly registering perf_event__process_mmap2 in > > sched->tool inside perf_sched__timehist(). > > > > Assisted-by: Gemini-CLI:Google Gemini 3 > > Fixes: 5bbfec0ad93c ("perf sched: Implement timehist option") > > I'm picking patches 1 and 2 as they're simple and passed sashiko review, > but: > > ⬢ [acme@toolbx perf-tools-next2]$ git show 5bbfec0ad93c > fatal: ambiguous argument '5bbfec0ad93c': unknown revision or path not in the working tree. > Use '--' to separate paths from revisions, like this: > 'git <command> [<revision>...] -- [<file>...]' > ⬢ [acme@toolbx perf-tools-next2]$ > > The right one is: > > Fixes: 49394a2a24c78ce0 ("perf sched timehist: Introduce timehist command") > > I checked and at the time, mmap2 was already present and it missed > adding this handler. > > I fixed it, in case I'm missing something, lemme know, Sounds good. I reworked patch3 because the remove/insert pattern was potentially racy. Now, with the write lock held it mutates the map entry in place and then marks the backing maps as being unsorted. Sashiko reviews made me aware of about four instances of this potential bug. I've also tweaked the ASLR code to make it more robust against colliding host/guest VM PIDs. I'm running sashiko locally and will post the next version when satisfied. For some reason the external Sashiko seems to be catching different issues :-/ Thanks, Ian > - Arnaldo > > > Signed-off-by: Ian Rogers <irogers@google.com> > > --- > > tools/perf/builtin-sched.c | 1 + > > 1 file changed, 1 insertion(+) > > > > diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c > > index 555247568e7a..241c2f808f7b 100644 > > --- a/tools/perf/builtin-sched.c > > +++ b/tools/perf/builtin-sched.c > > @@ -3299,6 +3299,7 @@ static int perf_sched__timehist(struct perf_sched *sched) > > */ > > sched->tool.sample = perf_timehist__process_sample; > > sched->tool.mmap = perf_event__process_mmap; > > + sched->tool.mmap2 = perf_event__process_mmap2; > > sched->tool.comm = perf_event__process_comm; > > sched->tool.exit = perf_event__process_exit; > > sched->tool.fork = perf_event__process_fork; > > -- > > 2.54.0.545.g6539524ca2-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v5 2/5] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers 2026-05-06 0:45 ` [PATCH v5 1/5] perf sched: Add missing mmap2 handler in timehist Ian Rogers @ 2026-05-06 0:45 ` Ian Rogers 2026-05-06 0:45 ` [PATCH v5 3/5] perf symbols: Fix map removal sequence inside dso__process_kernel_symbol() Ian Rogers ` (3 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-06 0:45 UTC (permalink / raw) To: irogers, acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz delegate_tool was missing the delegate overrides for schedstat_cpu and schedstat_domain. As a result, when allocated with zalloc, these callbacks defaulted to NULL, causing a segmentation fault crash if any schedstat events were delivered during event processing. Fix this by adding delegate_schedstat_cpu and delegate_schedstat_domain via the CREATE_DELEGATE_OP2 macro, and ensuring delegate_tool__init correctly registers them. Additionally, delegate_tool__init completely omitted copying the dont_split_sample_group property from the delegate. This would cause wrapper tools to default the flag to false, which corrupts piped event processing (e.g., in perf inject) by triggering duplicate event deliveries on split sample values in deliver_sample_group(). Similarly, perf_tool__init() omitted the initialization of this boolean field. On stack-allocated tools that rely on this initializer (like intel-tpebs or __cmd_evlist), this could result in uninitialized stack garbage evaluating to true—silently dropping non-leader event members in deliver_sample_group(). Fix both issues by properly copying the field in delegate_tool__init and initializing it to false in perf_tool__init. Assisted-by: Gemini-CLI:Google Gemini 3 Fixes: 6331b2669359 ("perf tool: Add a delegate_tool that just delegates actions to another tool") Fixes: 79bcd34e0f3d ("perf inject: Fix leader sampling inserting additional samples") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/tool.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 013c7839e2cf..ff2150517b75 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -285,6 +285,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events) tool->no_warn = false; tool->show_feat_hdr = SHOW_FEAT_NO_HEADER; tool->merge_deferred_callchains = true; + tool->dont_split_sample_group = false; tool->sample = process_event_sample_stub; tool->mmap = process_event_stub; @@ -433,6 +434,8 @@ CREATE_DELEGATE_OP2(stat_config); CREATE_DELEGATE_OP2(stat_round); CREATE_DELEGATE_OP2(thread_map); CREATE_DELEGATE_OP2(time_conv); +CREATE_DELEGATE_OP2(schedstat_cpu); +CREATE_DELEGATE_OP2(schedstat_domain); CREATE_DELEGATE_OP2(tracing_data); #define CREATE_DELEGATE_OP3(name) \ @@ -470,6 +473,7 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) tool->tool.no_warn = delegate->no_warn; tool->tool.show_feat_hdr = delegate->show_feat_hdr; tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains; + tool->tool.dont_split_sample_group = delegate->dont_split_sample_group; tool->tool.sample = delegate_sample; tool->tool.read = delegate_read; @@ -516,4 +520,6 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) tool->tool.bpf_metadata = delegate_bpf_metadata; tool->tool.compressed = delegate_compressed; tool->tool.auxtrace = delegate_auxtrace; + tool->tool.schedstat_cpu = delegate_schedstat_cpu; + tool->tool.schedstat_domain = delegate_schedstat_domain; } -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v5 3/5] perf symbols: Fix map removal sequence inside dso__process_kernel_symbol() 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers 2026-05-06 0:45 ` [PATCH v5 1/5] perf sched: Add missing mmap2 handler in timehist Ian Rogers 2026-05-06 0:45 ` [PATCH v5 2/5] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers @ 2026-05-06 0:45 ` Ian Rogers 2026-05-06 1:45 ` sashiko-bot 2026-05-06 0:45 ` [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-06 0:45 UTC (permalink / raw) To: irogers, acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz When parsing vmlinux ELF binary symbols, dso__process_kernel_symbol() mutates the map's start address key fields in place before executing maps__remove(). This forces maps__by_address_index() to look up the new mutated address range via strict binary search inside an array interval that was ordered using the old unmutated boundaries, leading to a bsearch() mismatch failure and leaking maps index errors. Fix this natively by executing maps__remove() before mutating the map fields in place, ensuring binary search maps queries always locate and extract target elements flawlessly. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/symbol-elf.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 7afa8a117139..f31d481a8627 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1372,20 +1372,31 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ + /* + * If the map is tracking inside the kmaps cache list array, we + * MUST remove it before mutating its virtual address key fields + * in place. Otherwise, downstream binary search lookups (bsearch) + * will search for mutated keys inside an array sorted under old + * invariants, causing indexing desynchronization faults. + */ if (kmaps) { int err; struct map *tmp = map__get(map); maps__remove(kmaps, map); + map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); + map__set_end(map, map__start(map) + shdr->sh_size); + map__set_pgoff(map, shdr->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); err = maps__insert(kmaps, map); map__put(tmp); if (err) return err; + } else { + map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); + map__set_end(map, map__start(map) + shdr->sh_size); + map__set_pgoff(map, shdr->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); } } -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v5 3/5] perf symbols: Fix map removal sequence inside dso__process_kernel_symbol() 2026-05-06 0:45 ` [PATCH v5 3/5] perf symbols: Fix map removal sequence inside dso__process_kernel_symbol() Ian Rogers @ 2026-05-06 1:45 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-06 1:45 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: - [Medium] Incomplete implementation missing identical map mutation anti-patterns in the kallsyms loading path. - [Medium] Out-of-bounds array access and crash on session teardown if `maps__insert()` fails. - [Medium] Lockless window exposes missing kernel map to concurrent worker threads, causing dropped samples. -- commit bf2ae4502d20517901689001acc83f0ec6ca94db Author: Ian Rogers <irogers@google.com> perf symbols: Fix map removal sequence inside dso__process_kernel_symbol() This commit addresses a binary search indexing issue by ensuring a map is removed from its cache list before mutating its address key fields. After the in-place mutations are complete, the map is re-inserted to preserve the sorting invariants of the underlying array. > diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c > index 7afa8a1171396..f31d481a8627a 100644 > --- a/tools/perf/util/symbol-elf.c > +++ b/tools/perf/util/symbol-elf.c > @@ -1372,20 +1372,31 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, > */ > if (*remap_kernel && dso__kernel(dso) && !kmodule) { > *remap_kernel = false; > - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); > - map__set_end(map, map__start(map) + shdr->sh_size); > - map__set_pgoff(map, shdr->sh_offset); > - map__set_mapping_type(map, MAPPING_TYPE__DSO); > - /* Ensure maps are correctly ordered */ > + /* > + * If the map is tracking inside the kmaps cache list array, we > + * MUST remove it before mutating its virtual address key fields > + * in place. Otherwise, downstream binary search lookups (bsearch) > + * will search for mutated keys inside an array sorted under old > + * invariants, causing indexing desynchronization faults. > + */ > if (kmaps) { > int err; > struct map *tmp = map__get(map); > > maps__remove(kmaps, map); Does this introduce a window where the kernel map is missing from kmaps? maps__remove() and maps__insert() each independently acquire and release the kmaps write lock. During the gap between these operations, the kernel map is missing from the collection. For KVM guests, kernel symbols are loaded lazily when the first guest sample is received. If concurrent perf top worker threads process samples during this window, will their maps__find() queries fail and cause samples to be dropped or misattributed as unknown? > + map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); > + map__set_end(map, map__start(map) + shdr->sh_size); > + map__set_pgoff(map, shdr->sh_offset); > + map__set_mapping_type(map, MAPPING_TYPE__DSO); > err = maps__insert(kmaps, map); If maps__insert() fails here (for example, due to an ENOMEM when reallocating the array), is the kernel map left permanently detached from kmaps? If it remains detached but is still referenced by machine->vmlinux_map, what happens during session teardown? It looks like machine__destroy_kernel_maps() unconditionally calls maps__remove(). Inside __maps__remove(), if maps__by_address_index() fails to find the map and returns -1 (UINT_MAX), it seems the code will use this index to call map__put(maps_by_address[UINT_MAX]) and perform a memmove(). Could this trigger an out-of-bounds memory access and crash? > map__put(tmp); > if (err) > return err; > + } else { > + map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); > + map__set_end(map, map__start(map) + shdr->sh_size); > + map__set_pgoff(map, shdr->sh_offset); > + map__set_mapping_type(map, MAPPING_TYPE__DSO); > } > } Also, this commit fixes the in-place map boundary mutations for ELF symbols, but does it miss similar patterns in the kallsyms loading path? In dso__load_kernel_sym() and dso__load_guest_kernel_sym(), it appears map__fixup_start(map) directly mutates map->start in place after the map is already inserted into the kmaps array. Will this lockless mutation break the sorting invariants of the maps_by_address array in the same way, causing bsearch lookups to fail? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260506004546.3140141-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers ` (2 preceding siblings ...) 2026-05-06 0:45 ` [PATCH v5 3/5] perf symbols: Fix map removal sequence inside dso__process_kernel_symbol() Ian Rogers @ 2026-05-06 0:45 ` Ian Rogers 2026-05-06 2:40 ` sashiko-bot 2026-05-06 18:52 ` Namhyung Kim 2026-05-06 0:45 ` [PATCH v5 5/5] perf test: Add inject ASLR test Ian Rogers 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 5 siblings, 2 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-06 0:45 UTC (permalink / raw) To: irogers, acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. When events are not known/handled by the tool they are dropped. This makes the tool conservative and it should never leak ASLR information, but it means virtual address remapping is needed for cases like auxtrace. The ASLR tracking tool virtualizes process and machine namespaces using 'struct machines' to safely isolate host mappings from unprivileged KVM guest address spaces. Memory layouts are tracked globally per process context to ensure linear, continuous space allocations across successive mapping runs. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames) to completely eliminate accidental address leakage vectors. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- v5: Fix memory leaks inside aslr_tool__delete destructor by calling standard machines__destroy_kernel_maps() to cleanly free host/guest maps and guest machine structures. Introduce the precise 'first_kernel_mapping' tracking guard inside aslr.c to rewrite the core kernel pgoff virtual address while safely protecting module file offsets from corruption. Harden skipn() pipe I/O stream reader loops against EINTR interruption errors. Clean up breakpoint address (bp_addr) memory scrubbing by executing the scrubbing loop directly at core session initialization startup level, natively securing both file headers and streaming pipe channels while removing redundant runtime tool wrapper interception hooks layers. v4: Scrub bp_addr from headers/pipe synthesis attributes. Remove kernel mmap pgoff mathematical delta adjustment leaks to maintain secure base obfuscation bounds. Harden guest space contexts mapping loops, correct ksymbol map base invariants tracking, and plug tail-word padding heap leakage vectors in user stacks and AUX payloads. v3: Combine split-map fixes, guest namespaces, bounds checks, OOM rollbacks, hot path optimization, safe dso references, and I/O stream error handling from v3/v4 development. Drop raw auxtrace events. Fix thread reference leaks in event handlers. Fix 32-bit truncation bug in hashmaps using u64* values. Prevent leaking uninitialized heap memory by zeroing copy buffer. Correct bitmask checks for branch stack flags. Avoid PMU configuration corruption. v2: First review feedback adjustments. --- tools/perf/builtin-inject.c | 31 +- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1220 +++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + 4 files changed, 1261 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f174bc69cec4..8fe479cb4152 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -123,6 +124,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -304,6 +306,8 @@ static int perf_event__repipe(const struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } + + static int perf_event__drop(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, @@ -2458,6 +2462,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2564,6 +2570,8 @@ int cmd_inject(int argc, const char **argv) " instance has a subdir"), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2571,6 +2579,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2684,18 +2693,36 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2789,6 +2816,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 70cc91d00804..65b96f3b87e2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..effdcbec0db0 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,1220 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <errno.h> +#include <inttypes.h> +#include <unistd.h> + +static int skipn(int fd, u64 n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, (n < (u64)sizeof(buf) ? n : (u64)sizeof(buf))); + if (ret < 0) { + if (errno == EINTR) + continue; + return ret; + } + if (ret == 0) + return 0; + n -= ret; + } + + return 0; +} + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE]; + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; + /** @first_kernel_mapping: flag indicating if we are still to process any kernel mapping. */ + bool first_kernel_mapping; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +static size_t top_addresses__hash(long key, void *ctx __maybe_unused) +{ + return key; +} + +static bool top_addresses__equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + if (effective_cpumode == PERF_RECORD_MISC_KERNEL) { + struct hashmap_entry *cur; + size_t bkt; + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *k; + u64 *v; + + k = (struct remap_addresses_key *)cur->pkey; + if (k->pid == kernel_pid && + k->invariant == key.invariant) { + v = (u64 *)cur->pvalue; + remap_addr = *v + map__pgoff(al.map) + + (addr - map__start(al.map)); + break; + } + } + } + if (remap_addr == 0) { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + } + + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__remap_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 start, u64 len, u64 pgoff) +{ + struct addr_location al; + struct addr_location prev_al; + struct remap_addresses_key key; + struct remap_addresses_key *new_key = NULL; + struct remap_addresses_key *old_key = NULL; + u64 remap_addr = 0; + u64 *remapped_invariant_ptr = NULL; + u64 *max_addr_ptr = NULL; + u64 *new_val = NULL; + u64 *new_max = NULL; + u64 *old_val = NULL; + u64 *old_val_remap = NULL; + bool is_contiguous = false; + bool first_mapping = false; + bool key_found = false; + int err; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + } else { + key.dso = NULL; + key.invariant = start - pgoff; + } + key.pid = cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + (al.map ? map__pgoff(al.map) : pgoff); + key_found = true; + } else { + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__start(prev_al.map) + map__size(prev_al.map) == start) { + is_contiguous = true; + } else { + pr_debug("Previous mmap [%lx, %lx] overlaps current map [%lx, %lx]\n", + map__start(prev_al.map), + map__start(prev_al.map) + map__size(prev_al.map), + start, start+len); + } + } + addr_location__exit(&prev_al); + + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + first_mapping = true; + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL ? + kernel_space_start : user_space_start); + } else { + remap_addr = *max_addr_ptr; + } + + remap_addr = round_up_to_page_size(remap_addr); + if (!is_contiguous && !first_mapping) + remap_addr += page_size; + + new_key = malloc(sizeof(*new_key)); + new_val = malloc(sizeof(u64)); + + if (!new_key || !new_val) { + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + *new_key = key; + new_key->dso = dso__get(key.dso); + *new_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) != 0) { + dso__put(new_key->dso); + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + } + + /* Update top_addresses */ + new_max = malloc(sizeof(u64)); + old_val = NULL; + + if (!new_max) { + old_key = NULL; + old_val_remap = NULL; + + if (!key_found) { + hashmap__delete(&aslr->remap_addresses, &key, + &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + } + addr_location__exit(&al); + return 0; + } + *new_max = remap_addr + len; + + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + if (*max_addr_ptr > *new_max) + *new_max = *max_addr_ptr; + } + + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, + (first_mapping && !key_found) ? + HASHMAP_ADD : HASHMAP_UPDATE, + NULL, &old_val); + if (err) { + old_key = NULL; + old_val_remap = NULL; + + free(new_max); + if (!key_found) { + hashmap__delete(&aslr->remap_addresses, &key, + &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + } + addr_location__exit(&al); + return 0; + } + free(old_val); + + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__remap_ksymbol(struct aslr_tool *aslr, + struct thread *aslr_thread, + u64 addr, u32 len) +{ + struct addr_location al; + struct remap_addresses_key key; + struct hashmap_entry *cur; + struct remap_addresses_key *new_key = NULL; + struct remap_addresses_key *old_key = NULL; + struct remap_addresses_key *k; + size_t bkt; + u64 remap_addr = 0; + u64 *remapped_invariant_ptr = NULL; + u64 *max_addr_ptr = NULL; + u64 *new_val = NULL; + u64 *new_max = NULL; + u64 *old_val = NULL; + u64 *old_val_remap = NULL; + u64 *v; + bool first_mapping = false; + int err; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (thread__find_map(aslr_thread, PERF_RECORD_MISC_KERNEL, addr, &al)) { + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + } else { + key.dso = NULL; + key.invariant = addr; /* pgoff is 0 for ksymbols */ + } + key.pid = aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + if (al.map) + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + else + remap_addr = *remapped_invariant_ptr; + addr_location__exit(&al); + return remap_addr; + } + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + k = (struct remap_addresses_key *)cur->pkey; + if (k->pid == kernel_pid && k->invariant == key.invariant) { + v = (u64 *)cur->pvalue; + + if (al.map) + remap_addr = *v + map__pgoff(al.map) + + (addr - map__start(al.map)); + else + remap_addr = *v; + addr_location__exit(&al); + return remap_addr; + } + } + + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + first_mapping = true; + remap_addr = kernel_space_start; + } else { + remap_addr = *max_addr_ptr; + } + + remap_addr = round_up_to_page_size(remap_addr) + page_size; + + new_key = malloc(sizeof(*new_key)); + new_val = malloc(sizeof(u64)); + + if (!new_key || !new_val) { + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + *new_key = key; + new_key->dso = dso__get(key.dso); + if (al.map) + *new_val = remap_addr - (addr - map__start(al.map)) - map__pgoff(al.map); + else + *new_val = remap_addr; + + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) < 0) { + dso__put(new_key->dso); + free(new_key); + free(new_val); + addr_location__exit(&al); + return 0; + } + + new_max = malloc(sizeof(u64)); + old_val = NULL; + + if (!new_max) { + old_key = NULL; + old_val_remap = NULL; + + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + addr_location__exit(&al); + return 0; + } + *new_max = remap_addr + len; + + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { + if (*max_addr_ptr > *new_max) + *new_max = *max_addr_ptr; + } + + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, + first_mapping ? + HASHMAP_ADD : HASHMAP_UPDATE, + NULL, &old_val); + if (err) { + old_key = NULL; + old_val_remap = NULL; + + free(new_max); + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); + if (old_key) + dso__put(old_key->dso); + free(old_key); + free(old_val_remap); + addr_location__exit(&al); + return 0; + } + free(old_val); + + addr_location__exit(&al); + return remap_addr; +} + + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { + new_event->mmap.pgoff = new_event->mmap.start; + aslr->first_kernel_mapping = false; + } + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__remap_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { + new_event->mmap2.pgoff = new_event->mmap2.start; + aslr->first_kernel_mapping = false; + } + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->text_poke, &event->text_poke, event->text_poke.header.size); + new_event->text_poke.addr = aslr_tool__remap_address(aslr, thread, cpumode, + event->text_poke.addr); + + err = delegate->text_poke(delegate, new_event, sample, machine); + + thread__put(thread); + return err; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__remap_ksymbol(aslr, thread, + event->ksymbol.addr, event->ksymbol.len); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + do { \ + if (i + (required_i) > max_i || j + (required_j) > max_j) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + } while (0) + +#define COPY_U64() \ + do { \ + CHECK_BOUNDS(1, 1); \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + CHECK_BOUNDS(1, 1); \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + + for (u64 cntr = 0; cntr < nr; cntr++) { + CHECK_BOUNDS(1, 1); + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + CHECK_BOUNDS(1, 1); + out_array[j++] = in_array[i++]; + cntr++; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + + COPY_U64(); /* abi */ + abi = out_array[j-1]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_user); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + CHECK_BOUNDS(1, 1); + size = out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + + COPY_U64(); /* dyn_size */ + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + + COPY_U64(); /* abi */ + abi = out_array[j-1]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + CHECK_BOUNDS(1, 1); + size = out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; +} + +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + + +static int aslr_tool__process_attr(const struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + memcpy(&new_event->attr, &event->attr, event->attr.header.size); + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + + return delegate->attr(delegate, new_event, pevlist); +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (perf_data__is_pipe(session->data)) { + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + aslr->first_kernel_mapping = true; + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + aslr->tool.tool.attr = aslr_tool__process_attr; + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ + /* auxtrace - no virtual addresses. */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..ea984d82681f --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +struct perf_tool; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-06 0:45 ` [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-06 2:40 ` sashiko-bot 2026-05-06 18:52 ` Namhyung Kim 1 sibling, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-06 2:40 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: - [High] Cross-VM Isolation Failure in ASLR Remapping - [Medium] Callchain Parsing Omission for `PERF_CONTEXT_USER_DEFERRED` - [Low] Fragile Alignment of `event_copy` Buffer -- commit 1b2fdd3058f351673614b5537a0bdcbb471a964a Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses This patch adds an aslr option for perf inject that remaps all virtual addresses to prevent leaking host ASLR information. It uses a struct machines instance to isolate host mappings from KVM guest address spaces. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..effdcbec0db0d > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +struct remap_addresses_key { > + struct dso *dso; > + u64 invariant; > + pid_t pid; > +}; Is there a missing machine identifier in this struct? If host processes and guest processes share the same PID and map anonymous memory at the same offset, it appears their keys will collide, causing the guest to reuse the host's remapped address. [ ... ] > +struct aslr_tool { > + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ > + struct delegate_tool tool; > + /** @machines: The machines with the input, not remapped, virtual address layout. */ > + struct machines machines; > + /** @event_copy: Buffer used to create an event to pass to the delegate. */ > + char event_copy[PERF_SAMPLE_MAX_SIZE]; This isn't a bug, but since event_copy is declared as a char array, it only guarantees a 1-byte alignment. While it currently happens to be 8-byte aligned because it follows struct machines, could future additions to struct aslr_tool break this alignment and cause crashes on strict-alignment architectures when it is cast to union perf_event *? > + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ > + struct hashmap remap_addresses; > + /** @top_addresses: mapping from process to max remapped address. */ > + struct hashmap top_addresses; Since top_addresses tracks allocations by pid alone, will host and guest processes with the same PID share the same allocation pool and intertwine their memory layouts? [ ... ] > +static u64 aslr_tool__remap_ksymbol(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u64 addr, u32 len) > +{ [ ... ] > + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { > + k = (struct remap_addresses_key *)cur->pkey; > + if (k->pid == kernel_pid && k->invariant == key.invariant) { > + v = (u64 *)cur->pvalue; > + > + if (al.map) > + remap_addr = *v + map__pgoff(al.map) + > + (addr - map__start(al.map)); > + else > + remap_addr = *v; > + addr_location__exit(&al); > + return remap_addr; > + } > + } Because both host and guest use kernel_pid (-1), will this loop match the first entry regardless of dso identity and cause guest kernel symbols to collide with host kernel symbols, leaking host kernel layouts to the guest? [ ... ] > +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, > + struct perf_sample *sample, > + struct evsel *evsel, struct machine *machine) > +{ [ ... ] > + case PERF_CONTEXT_GUEST_USER: > + cpumode = PERF_RECORD_MISC_GUEST_USER; > + break; > + case PERF_CONTEXT_USER_DEFERRED: > + /* > + * Immediately followed by a 64-bit > + * stitching cookie. Skip/Copy it! > + */ > + CHECK_BOUNDS(1, 1); > + out_array[j++] = in_array[i++]; > + cntr++; > + break; Should this case also update cpumode to PERF_RECORD_MISC_USER? Without it, subsequent user-space addresses will be processed using the preceding CPU mode (usually kernel). If a user address overlaps with a valid kernel map, it looks like it could be erroneously remapped as a kernel address. -- Sashiko AI review · https://sashiko.dev/#/patchset/20260506004546.3140141-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-06 0:45 ` [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-06 2:40 ` sashiko-bot @ 2026-05-06 18:52 ` Namhyung Kim 2026-05-06 20:01 ` Ian Rogers 1 sibling, 1 reply; 183+ messages in thread From: Namhyung Kim @ 2026-05-06 18:52 UTC (permalink / raw) To: Ian Rogers Cc: acme, gmx, adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Tue, May 05, 2026 at 05:45:45PM -0700, Ian Rogers wrote: > If perf.data files are taken from one machine to another they may > leak virtual addresses and so weaken ASLR on the machine they are > coming from. Add an aslr option for perf inject that remaps all > virtual addresses, or drops data/events, so that the virtual address > information isn't leaked. > > When events are not known/handled by the tool they are dropped. This > makes the tool conservative and it should never leak ASLR information, > but it means virtual address remapping is needed for cases like > auxtrace. > > The ASLR tracking tool virtualizes process and machine namespaces using > 'struct machines' to safely isolate host mappings from unprivileged KVM guest > address spaces. Memory layouts are tracked globally per process context to > ensure linear, continuous space allocations across successive mapping runs. > > To remain strictly conservative and guarantee security, the tool scrubs > breakpoint addresses (bp_addr) from all synthesized stream headers, and drops > unsupported complex payloads (such as user register stacks, raw tracepoints, > and hardware AUX tracing frames) to completely eliminate accidental address > leakage vectors. > > Assisted-by: Gemini-CLI:Google Gemini 3 > Signed-off-by: Ian Rogers <irogers@google.com> > Co-developed-by: Gabriel Marin <gmx@google.com> > Signed-off-by: Gabriel Marin <gmx@google.com> > --- > v5: Fix memory leaks inside aslr_tool__delete destructor by calling standard > machines__destroy_kernel_maps() to cleanly free host/guest maps and guest > machine structures. Introduce the precise 'first_kernel_mapping' tracking > guard inside aslr.c to rewrite the core kernel pgoff virtual address while > safely protecting module file offsets from corruption. Harden skipn() > pipe I/O stream reader loops against EINTR interruption errors. Clean up > breakpoint address (bp_addr) memory scrubbing by executing the scrubbing loop > directly at core session initialization startup level, natively securing both > file headers and streaming pipe channels while removing redundant runtime > tool wrapper interception hooks layers. > v4: Scrub bp_addr from headers/pipe synthesis attributes. Remove kernel > mmap pgoff mathematical delta adjustment leaks to maintain secure > base obfuscation bounds. Harden guest space contexts mapping loops, > correct ksymbol map base invariants tracking, and plug tail-word > padding heap leakage vectors in user stacks and AUX payloads. > v3: Combine split-map fixes, guest namespaces, bounds checks, OOM rollbacks, > hot path optimization, safe dso references, and I/O stream error handling > from v3/v4 development. Drop raw auxtrace events. Fix thread reference leaks > in event handlers. Fix 32-bit truncation bug in hashmaps using u64* values. > Prevent leaking uninitialized heap memory by zeroing copy buffer. Correct > bitmask checks for branch stack flags. Avoid PMU configuration corruption. > v2: First review feedback adjustments. > --- > tools/perf/builtin-inject.c | 31 +- > tools/perf/util/Build | 1 + > tools/perf/util/aslr.c | 1220 +++++++++++++++++++++++++++++++++++ > tools/perf/util/aslr.h | 10 + > 4 files changed, 1261 insertions(+), 1 deletion(-) > create mode 100644 tools/perf/util/aslr.c > create mode 100644 tools/perf/util/aslr.h > > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index f174bc69cec4..8fe479cb4152 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c > @@ -8,6 +8,7 @@ > */ > #include "builtin.h" > > +#include "util/aslr.h" > #include "util/color.h" > #include "util/dso.h" > #include "util/vdso.h" > @@ -123,6 +124,7 @@ struct perf_inject { > bool in_place_update_dry_run; > bool copy_kcore_dir; > bool convert_callchain; > + bool aslr; > const char *input_name; > struct perf_data output; > u64 bytes_written; > @@ -304,6 +306,8 @@ static int perf_event__repipe(const struct perf_tool *tool, > return perf_event__repipe_synth(tool, event); > } > > + > + > static int perf_event__drop(const struct perf_tool *tool __maybe_unused, > union perf_event *event __maybe_unused, > struct perf_sample *sample __maybe_unused, > @@ -2458,6 +2462,8 @@ static int __cmd_inject(struct perf_inject *inject) > } > } > > + > + Unnessary whitespace changes here and the above. > session->header.data_offset = output_data_offset; > session->header.data_size = inject->bytes_written; > perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, > @@ -2564,6 +2570,8 @@ int cmd_inject(int argc, const char **argv) > " instance has a subdir"), > OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, > "Generate callchains using DWARF and drop register/stack data"), > + OPT_BOOLEAN(0, "aslr", &inject.aslr, > + "Remap virtual memory addresses similar to ASLR"), > OPT_END() > }; > const char * const inject_usage[] = { > @@ -2571,6 +2579,7 @@ int cmd_inject(int argc, const char **argv) > NULL > }; > bool ordered_events; > + struct perf_tool *tool = &inject.tool; > > if (!inject.itrace_synth_opts.set) { > /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ > @@ -2684,18 +2693,36 @@ int cmd_inject(int argc, const char **argv) > inject.tool.schedstat_domain = perf_event__repipe_op2_synth; > inject.tool.dont_split_sample_group = true; > inject.tool.merge_deferred_callchains = false; > - inject.session = __perf_session__new(&data, &inject.tool, > + if (inject.aslr) { > + tool = aslr_tool__new(&inject.tool); > + if (!tool) { > + ret = -ENOMEM; > + goto out_close_output; > + } > + } > + inject.session = __perf_session__new(&data, tool, > /*trace_event_repipe=*/inject.output.is_pipe, > /*host_env=*/NULL); > > if (IS_ERR(inject.session)) { > ret = PTR_ERR(inject.session); > + if (inject.aslr) > + aslr_tool__delete(tool); > goto out_close_output; > } > > if (zstd_init(&(inject.session->zstd_data), 0) < 0) > pr_warning("Decompression initialization failed.\n"); > > + if (inject.aslr) { > + struct evsel *evsel; > + > + evlist__for_each_entry(inject.session->evlist, evsel) { > + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) > + evsel->core.attr.bp_addr = 0; > + } > + } > + > /* Save original section info before feature bits change */ > ret = save_section_info(&inject); > if (ret) > @@ -2789,6 +2816,8 @@ int cmd_inject(int argc, const char **argv) > strlist__delete(inject.known_build_ids); > zstd_fini(&(inject.session->zstd_data)); > perf_session__delete(inject.session); > + if (inject.aslr) > + aslr_tool__delete(tool); > out_close_output: > if (!inject.in_place_update) > perf_data__close(&inject.output); > diff --git a/tools/perf/util/Build b/tools/perf/util/Build > index 70cc91d00804..65b96f3b87e2 100644 > --- a/tools/perf/util/Build > +++ b/tools/perf/util/Build > @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o > perf-util-y += addr2line.o > perf-util-y += addr_location.o > perf-util-y += annotate.o > +perf-util-y += aslr.o > perf-util-y += blake2s.o > perf-util-y += block-info.o > perf-util-y += block-range.o > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 000000000000..effdcbec0db0 > --- /dev/null > +++ b/tools/perf/util/aslr.c > @@ -0,0 +1,1220 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include "aslr.h" > + > +#include "addr_location.h" > +#include "debug.h" > +#include "event.h" > +#include "evsel.h" > +#include "machine.h" > +#include "map.h" > +#include "thread.h" > +#include "tool.h" > +#include "session.h" > +#include "data.h" > +#include "dso.h" > + > +#include <internal/lib.h> /* page_size */ > +#include <linux/compiler.h> > +#include <linux/zalloc.h> > +#include <errno.h> > +#include <inttypes.h> > +#include <unistd.h> > + > +static int skipn(int fd, u64 n) > +{ > + char buf[4096]; > + ssize_t ret; > + > + while (n > 0) { > + ret = read(fd, buf, (n < (u64)sizeof(buf) ? n : (u64)sizeof(buf))); > + if (ret < 0) { > + if (errno == EINTR) > + continue; > + return ret; > + } > + if (ret == 0) > + return 0; > + n -= ret; > + } > + > + return 0; > +} > + > +/** > + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. > + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. > + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). > + * Calculated as `start - pgoff`. This value remains constant when > + * perf's internal `maps__fixup_overlap_and_insert` splits a map into > + * fragmented VMA pieces due to overlapping events, allowing us to > + * resolve split maps consistently back to the original VMA. I'm curious if it's guaranteed to be unique within a process. > + * @pid: Process ID associated with the mapping. > + */ > +struct remap_addresses_key { > + struct dso *dso; > + u64 invariant; > + pid_t pid; > +}; > + > +struct aslr_mapping { > + struct list_head node; > + u64 orig_start; > + u64 len; > + u64 remap_start; > +}; > + > +struct aslr_tool { > + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ > + struct delegate_tool tool; > + /** @machines: The machines with the input, not remapped, virtual address layout. */ > + struct machines machines; > + /** @event_copy: Buffer used to create an event to pass to the delegate. */ > + char event_copy[PERF_SAMPLE_MAX_SIZE]; > + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ > + struct hashmap remap_addresses; > + /** @top_addresses: mapping from process to max remapped address. */ > + struct hashmap top_addresses; > + /** @first_kernel_mapping: flag indicating if we are still to process any kernel mapping. */ > + bool first_kernel_mapping; > +}; > + > +static const pid_t kernel_pid = -1; > + > +/* Start remapping user processes from a small non-zero offset. */ > +static const u64 user_space_start = 0x200000; > +static const u64 kernel_space_start = 0xffff800010000000; > + > +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) > +{ > + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; > + > + return (size_t)key->dso ^ key->invariant ^ key->pid; > +} > + > +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) > +{ > + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; > + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; > + > + return RC_CHK_EQUAL(key1->dso, key2->dso) && > + key1->invariant == key2->invariant && > + key1->pid == key2->pid; > +} > + > +static size_t top_addresses__hash(long key, void *ctx __maybe_unused) > +{ > + return key; > +} > + > +static bool top_addresses__equal(long key1, long key2, void *ctx __maybe_unused) > +{ > + return key1 == key2; > +} > + > +static u64 round_up_to_page_size(u64 addr) > +{ > + return (addr + page_size - 1) & ~((u64)page_size - 1); > +} > + > +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, > + u64 addr) > +{ > + struct addr_location al; > + struct remap_addresses_key key; > + u64 *remapped_invariant_ptr = NULL; > + u64 remap_addr = 0; > + u8 effective_cpumode = cpumode; > + > + if (!aslr_thread) > + return 0; /* No thread. */ > + > + addr_location__init(&al); > + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { > + /* > + * If lookup fails with specified cpumode, try fallback to the other space > + * to be robust against bad cpumode in samples. > + */ > + if (cpumode == PERF_RECORD_MISC_KERNEL) > + effective_cpumode = PERF_RECORD_MISC_USER; > + else if (cpumode == PERF_RECORD_MISC_USER) > + effective_cpumode = PERF_RECORD_MISC_KERNEL; > + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) > + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; > + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) > + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > + > + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { > + addr_location__exit(&al); > + return 0; /* No mmap. */ > + } > + } > + > + key.dso = map__dso(al.map); > + key.invariant = map__start(al.map) - map__pgoff(al.map); > + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; > + > + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { > + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + > + (addr - map__start(al.map)); > + } else { > + if (effective_cpumode == PERF_RECORD_MISC_KERNEL) { > + struct hashmap_entry *cur; > + size_t bkt; > + > + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { > + struct remap_addresses_key *k; > + u64 *v; > + > + k = (struct remap_addresses_key *)cur->pkey; > + if (k->pid == kernel_pid && > + k->invariant == key.invariant) { > + v = (u64 *)cur->pvalue; > + remap_addr = *v + map__pgoff(al.map) + > + (addr - map__start(al.map)); > + break; > + } > + } > + } > + if (remap_addr == 0) { > + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", > + addr, map__start(al.map), map__size(al.map), key.pid); > + } > + } > + > + addr_location__exit(&al); > + return remap_addr; > +} > + > +static u64 aslr_tool__remap_mapping(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, > + u64 start, u64 len, u64 pgoff) > +{ > + struct addr_location al; > + struct addr_location prev_al; > + struct remap_addresses_key key; > + struct remap_addresses_key *new_key = NULL; > + struct remap_addresses_key *old_key = NULL; > + u64 remap_addr = 0; > + u64 *remapped_invariant_ptr = NULL; > + u64 *max_addr_ptr = NULL; > + u64 *new_val = NULL; > + u64 *new_max = NULL; > + u64 *old_val = NULL; > + u64 *old_val_remap = NULL; > + bool is_contiguous = false; > + bool first_mapping = false; > + bool key_found = false; > + int err; > + > + if (!aslr_thread) > + return 0; /* No thread. */ > + > + addr_location__init(&al); > + if (thread__find_map(aslr_thread, cpumode, start, &al)) { > + key.dso = map__dso(al.map); > + key.invariant = map__start(al.map) - map__pgoff(al.map); > + } else { > + key.dso = NULL; > + key.invariant = start - pgoff; > + } > + key.pid = cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; > + > + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { > + remap_addr = *remapped_invariant_ptr + (al.map ? map__pgoff(al.map) : pgoff); > + key_found = true; > + } else { > + addr_location__init(&prev_al); > + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { > + if (map__start(prev_al.map) + map__size(prev_al.map) == start) { > + is_contiguous = true; > + } else { > + pr_debug("Previous mmap [%lx, %lx] overlaps current map [%lx, %lx]\n", > + map__start(prev_al.map), > + map__start(prev_al.map) + map__size(prev_al.map), > + start, start+len); > + } > + } > + addr_location__exit(&prev_al); > + > + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { > + first_mapping = true; > + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL ? > + kernel_space_start : user_space_start); > + } else { > + remap_addr = *max_addr_ptr; > + } > + > + remap_addr = round_up_to_page_size(remap_addr); > + if (!is_contiguous && !first_mapping) > + remap_addr += page_size; > + > + new_key = malloc(sizeof(*new_key)); > + new_val = malloc(sizeof(u64)); I think the value of hashmap can be passed as value if it's u64.. well on 64-bit systems. > + > + if (!new_key || !new_val) { > + free(new_key); > + free(new_val); > + addr_location__exit(&al); > + return 0; > + } > + *new_key = key; > + new_key->dso = dso__get(key.dso); > + *new_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); > + > + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) != 0) { > + dso__put(new_key->dso); > + free(new_key); > + free(new_val); > + addr_location__exit(&al); > + return 0; > + } > + } > + > + /* Update top_addresses */ > + new_max = malloc(sizeof(u64)); > + old_val = NULL; > + > + if (!new_max) { > + old_key = NULL; > + old_val_remap = NULL; > + > + if (!key_found) { > + hashmap__delete(&aslr->remap_addresses, &key, > + &old_key, &old_val_remap); > + if (old_key) > + dso__put(old_key->dso); > + free(old_key); > + free(old_val_remap); > + } > + addr_location__exit(&al); > + return 0; > + } > + *new_max = remap_addr + len; > + > + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { > + if (*max_addr_ptr > *new_max) > + *new_max = *max_addr_ptr; > + } > + > + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, > + (first_mapping && !key_found) ? > + HASHMAP_ADD : HASHMAP_UPDATE, > + NULL, &old_val); > + if (err) { > + old_key = NULL; > + old_val_remap = NULL; > + > + free(new_max); > + if (!key_found) { > + hashmap__delete(&aslr->remap_addresses, &key, > + &old_key, &old_val_remap); > + if (old_key) > + dso__put(old_key->dso); > + free(old_key); > + free(old_val_remap); > + } > + addr_location__exit(&al); > + return 0; > + } > + free(old_val); > + > + addr_location__exit(&al); > + return remap_addr; > +} > + > +static u64 aslr_tool__remap_ksymbol(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u64 addr, u32 len) Any chance you can share the code with the above function? > +{ > + struct addr_location al; > + struct remap_addresses_key key; > + struct hashmap_entry *cur; > + struct remap_addresses_key *new_key = NULL; > + struct remap_addresses_key *old_key = NULL; > + struct remap_addresses_key *k; > + size_t bkt; > + u64 remap_addr = 0; > + u64 *remapped_invariant_ptr = NULL; > + u64 *max_addr_ptr = NULL; > + u64 *new_val = NULL; > + u64 *new_max = NULL; > + u64 *old_val = NULL; > + u64 *old_val_remap = NULL; > + u64 *v; > + bool first_mapping = false; > + int err; > + > + if (!aslr_thread) > + return 0; /* No thread. */ > + > + addr_location__init(&al); > + if (thread__find_map(aslr_thread, PERF_RECORD_MISC_KERNEL, addr, &al)) { > + key.dso = map__dso(al.map); > + key.invariant = map__start(al.map) - map__pgoff(al.map); > + } else { > + key.dso = NULL; > + key.invariant = addr; /* pgoff is 0 for ksymbols */ > + } > + key.pid = aslr_thread->pid_; > + > + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { > + if (al.map) > + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + > + (addr - map__start(al.map)); > + else > + remap_addr = *remapped_invariant_ptr; > + addr_location__exit(&al); > + return remap_addr; > + } > + > + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { > + k = (struct remap_addresses_key *)cur->pkey; > + if (k->pid == kernel_pid && k->invariant == key.invariant) { > + v = (u64 *)cur->pvalue; > + > + if (al.map) > + remap_addr = *v + map__pgoff(al.map) + > + (addr - map__start(al.map)); > + else > + remap_addr = *v; > + addr_location__exit(&al); > + return remap_addr; > + } > + } > + > + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { > + first_mapping = true; > + remap_addr = kernel_space_start; > + } else { > + remap_addr = *max_addr_ptr; > + } > + > + remap_addr = round_up_to_page_size(remap_addr) + page_size; > + > + new_key = malloc(sizeof(*new_key)); > + new_val = malloc(sizeof(u64)); > + > + if (!new_key || !new_val) { > + free(new_key); > + free(new_val); > + addr_location__exit(&al); > + return 0; > + } > + *new_key = key; > + new_key->dso = dso__get(key.dso); > + if (al.map) > + *new_val = remap_addr - (addr - map__start(al.map)) - map__pgoff(al.map); > + else > + *new_val = remap_addr; > + > + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) < 0) { > + dso__put(new_key->dso); > + free(new_key); > + free(new_val); > + addr_location__exit(&al); > + return 0; > + } > + > + new_max = malloc(sizeof(u64)); > + old_val = NULL; > + > + if (!new_max) { > + old_key = NULL; > + old_val_remap = NULL; > + > + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); > + if (old_key) > + dso__put(old_key->dso); > + free(old_key); > + free(old_val_remap); > + addr_location__exit(&al); > + return 0; > + } > + *new_max = remap_addr + len; > + > + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { > + if (*max_addr_ptr > *new_max) > + *new_max = *max_addr_ptr; > + } > + > + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, > + first_mapping ? > + HASHMAP_ADD : HASHMAP_UPDATE, > + NULL, &old_val); > + if (err) { > + old_key = NULL; > + old_val_remap = NULL; > + > + free(new_max); > + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); > + if (old_key) > + dso__put(old_key->dso); > + free(old_key); > + free(old_val_remap); > + addr_location__exit(&al); > + return 0; > + } > + free(old_val); > + > + addr_location__exit(&al); > + return remap_addr; > +} > + > + > +static int aslr_tool__process_mmap(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + u8 cpumode; > + struct thread *thread; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_mmap(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); > + if (!thread) > + return -ENOMEM; > + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); > + /* Remaps the mmap.start. */ > + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, > + event->mmap.start, > + event->mmap.len, > + event->mmap.pgoff); > + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { > + new_event->mmap.pgoff = new_event->mmap.start; > + aslr->first_kernel_mapping = false; > + } > + err = delegate->mmap(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_mmap2(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + u8 cpumode; > + struct thread *thread; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); > + if (!thread) > + return -ENOMEM; > + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); > + /* Remaps the mmap.start. */ > + new_event->mmap2.start = aslr_tool__remap_mapping(aslr, thread, cpumode, > + event->mmap2.start, > + event->mmap2.len, > + event->mmap2.pgoff); > + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { > + new_event->mmap2.pgoff = new_event->mmap2.start; > + aslr->first_kernel_mapping = false; > + } > + err = delegate->mmap2(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_comm(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_comm(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + return delegate->comm(delegate, event, sample, machine); > +} > + > +static int aslr_tool__process_fork(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_fork(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + return delegate->fork(delegate, event, sample, machine); > +} > + > +static int aslr_tool__process_exit(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_exit(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + return delegate->exit(delegate, event, sample, machine); > +} > + > +static int aslr_tool__process_text_poke(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + u8 cpumode; > + struct thread *thread; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); > + if (!thread) > + return -ENOMEM; > + memcpy(&new_event->text_poke, &event->text_poke, event->text_poke.header.size); > + new_event->text_poke.addr = aslr_tool__remap_address(aslr, thread, cpumode, > + event->text_poke.addr); > + > + err = delegate->text_poke(delegate, new_event, sample, machine); > + > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + struct thread *thread; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); > + if (!thread) > + return -ENOMEM; > + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); > + /* Remaps the ksymbol.start */ > + new_event->ksymbol.addr = aslr_tool__remap_ksymbol(aslr, thread, > + event->ksymbol.addr, event->ksymbol.len); > + > + err = delegate->ksymbol(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, > + struct perf_sample *sample, > + struct evsel *evsel, struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + int ret; > + u64 sample_type; > + struct thread *thread; > + struct machine *aslr_machine; > + __u64 max_i; > + __u64 max_j; > + union perf_event *new_event; > + struct perf_sample new_sample; > + __u64 *in_array, *out_array; > + u8 cpumode; > + u64 addr; > + size_t i; > + size_t j; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + ret = -EFAULT; > + sample_type = evsel->core.attr.sample_type; > + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); > + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); > + new_event = (union perf_event *)aslr->event_copy; > + cpumode = sample->cpumode; > + i = 0; > + j = 0; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); > + > + if (!thread) > + return -ENOMEM; > + > + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) > + goto out_put; > + > + > + Excessive blank lines. > + new_event->sample.header = event->sample.header; > + > + in_array = &event->sample.array[0]; > + out_array = &new_event->sample.array[0]; > + > +#define CHECK_BOUNDS(required_i, required_j) \ > + do { \ > + if (i + (required_i) > max_i || j + (required_j) > max_j) { \ > + ret = -EFAULT; \ > + goto out_put; \ > + } \ > + } while (0) > + > +#define COPY_U64() \ > + do { \ > + CHECK_BOUNDS(1, 1); \ > + out_array[j++] = in_array[i++]; \ > + } while (0) > + > +#define REMAP_U64(addr_field) \ > + do { \ > + CHECK_BOUNDS(1, 1); \ > + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ > + i++; \ > + } while (0) > + > + if (sample_type & PERF_SAMPLE_IDENTIFIER) > + COPY_U64(); /* id */ > + if (sample_type & PERF_SAMPLE_IP) > + REMAP_U64(sample->ip); > + if (sample_type & PERF_SAMPLE_TID) > + COPY_U64(); /* pid, tid */ > + if (sample_type & PERF_SAMPLE_TIME) > + COPY_U64(); /* time */ > + if (sample_type & PERF_SAMPLE_ADDR) > + REMAP_U64(sample->addr); > + if (sample_type & PERF_SAMPLE_ID) > + COPY_U64(); /* id */ > + if (sample_type & PERF_SAMPLE_STREAM_ID) > + COPY_U64(); /* stream_id */ > + if (sample_type & PERF_SAMPLE_CPU) > + COPY_U64(); /* cpu, res */ > + if (sample_type & PERF_SAMPLE_PERIOD) > + COPY_U64(); /* period */ > + if (sample_type & PERF_SAMPLE_READ) { > + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { > + COPY_U64(); /* value */ > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) > + COPY_U64(); /* time_enabled */ > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) > + COPY_U64(); /* time_running */ > + if (evsel->core.attr.read_format & PERF_FORMAT_ID) > + COPY_U64(); /* id */ > + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) > + COPY_U64(); /* lost */ > + } else { > + u64 nr; > + > + CHECK_BOUNDS(1, 1); > + nr = out_array[j++] = in_array[i++]; > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) > + COPY_U64(); /* time_enabled */ > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) > + COPY_U64(); /* time_running */ > + for (u64 cntr = 0; cntr < nr; cntr++) { > + COPY_U64(); /* value */ > + if (evsel->core.attr.read_format & PERF_FORMAT_ID) > + COPY_U64(); /* id */ > + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) > + COPY_U64(); /* lost */ > + } > + } > + } > + if (sample_type & PERF_SAMPLE_CALLCHAIN) { > + u64 nr; > + > + CHECK_BOUNDS(1, 1); > + nr = out_array[j++] = in_array[i++]; > + > + for (u64 cntr = 0; cntr < nr; cntr++) { > + CHECK_BOUNDS(1, 1); > + addr = in_array[i++]; > + if (addr >= PERF_CONTEXT_MAX) { > + out_array[j++] = addr; > + switch (addr) { > + case PERF_CONTEXT_HV: > + cpumode = PERF_RECORD_MISC_HYPERVISOR; > + break; > + case PERF_CONTEXT_KERNEL: > + cpumode = PERF_RECORD_MISC_KERNEL; > + break; > + case PERF_CONTEXT_USER: > + cpumode = PERF_RECORD_MISC_USER; > + break; > + case PERF_CONTEXT_GUEST: > + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > + break; > + case PERF_CONTEXT_GUEST_KERNEL: > + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > + break; > + case PERF_CONTEXT_GUEST_USER: > + cpumode = PERF_RECORD_MISC_GUEST_USER; > + break; > + case PERF_CONTEXT_USER_DEFERRED: > + /* > + * Immediately followed by a 64-bit > + * stitching cookie. Skip/Copy it! > + */ > + CHECK_BOUNDS(1, 1); > + out_array[j++] = in_array[i++]; > + cntr++; > + break; > + default: > + pr_debug("invalid callchain context: %"PRIx64"\n", addr); > + ret = 0; > + goto out_put; > + } > + continue; > + } > + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); > + } > + } > + if (sample_type & PERF_SAMPLE_RAW) { > + size_t bytes = sizeof(u32) + sample->raw_size; > + size_t u64_words = (bytes + 7) / 8; > + > + if (i + u64_words > max_i || j + u64_words > max_j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], bytes); > + i += u64_words; > + j += u64_words; > + /* > + * TODO: certain raw samples can be remapped, such as > + * tracepoints by examining their fields. > + */ > + pr_debug("Dropping raw samples as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { > + u64 nr; > + > + CHECK_BOUNDS(1, 1); > + nr = out_array[j++] = in_array[i++]; > + > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) > + COPY_U64(); /* hw_idx */ > + > + if (nr > (ULLONG_MAX / 3)) { > + ret = -EFAULT; > + goto out_put; > + } > + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + for (u64 cntr = 0; cntr < nr; cntr++) { > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > + sample->cpumode, > + in_array[i++]); /* from */ > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > + sample->cpumode, > + in_array[i++]); /* to */ > + out_array[j++] = in_array[i++]; /* flags */ > + } > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { > + if (nr > max_i - i || nr > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > + i += nr; > + j += nr; > + /* TODO: confirm branch counters don't leak ASLR information. */ > + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + } > + if (sample_type & PERF_SAMPLE_REGS_USER) { > + u64 abi; > + > + COPY_U64(); /* abi */ > + abi = out_array[j-1]; > + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { > + u64 nr = hweight64(evsel->core.attr.sample_regs_user); > + > + if (nr > max_i - i || nr > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > + i += nr; > + j += nr; > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping regs user sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; Is this mean you drop samples if it contains registers? > + } > + if (sample_type & PERF_SAMPLE_STACK_USER) { > + u64 size; > + > + CHECK_BOUNDS(1, 1); > + size = out_array[j++] = in_array[i++]; > + if (size > 0) { > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > + > + if (u64_words > max_i - i || u64_words > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], size); > + if (size % 8) { > + size_t pad = 8 - (size % 8); > + > + memset(((char *)&out_array[j]) + size, 0, pad); > + } > + i += u64_words; > + j += u64_words; > + > + COPY_U64(); /* dyn_size */ > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping stack user sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) > + COPY_U64(); /* perf_sample_weight */ > + if (sample_type & PERF_SAMPLE_DATA_SRC) > + COPY_U64(); /* data_src */ > + if (sample_type & PERF_SAMPLE_TRANSACTION) > + COPY_U64(); /* transaction */ > + if (sample_type & PERF_SAMPLE_REGS_INTR) { > + u64 abi; > + > + COPY_U64(); /* abi */ > + abi = out_array[j-1]; > + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { > + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); > + > + if (nr > max_i - i || nr > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > + i += nr; > + j += nr; > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { > + COPY_U64(); /* phys_addr */ > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping physical address sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + if (sample_type & PERF_SAMPLE_CGROUP) > + COPY_U64(); /* cgroup */ > + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) > + COPY_U64(); /* data_page_size */ > + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) > + COPY_U64(); /* code_page_size */ > + > + if (sample_type & PERF_SAMPLE_AUX) { > + u64 size; > + > + CHECK_BOUNDS(1, 1); > + size = out_array[j++] = in_array[i++]; > + if (size > 0) { > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > + > + if (u64_words > max_i - i || u64_words > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], size); > + if (size % 8) { > + size_t pad = 8 - (size % 8); > + > + memset(((char *)&out_array[j]) + size, 0, pad); > + } > + i += u64_words; > + j += u64_words; > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping aux sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } Can you use perf_event__synthesize_sample()? > + > + if (evsel__is_offcpu_event(evsel)) { > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); Why not remap the address? Thanks, Namhyung > + ret = 0; > + goto out_put; > + } > + > + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); > + > + perf_sample__init(&new_sample, /*all=*/ true); > + ret = evsel__parse_sample(evsel, new_event, &new_sample); > + if (ret) { > + perf_sample__exit(&new_sample); > + goto out_put; > + } > + > + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); > + perf_sample__exit(&new_sample); > + > +out_put: > + thread__put(thread); > + return ret; > +} > + > +#undef CHECK_BOUNDS > +#undef COPY_U64 > +#undef REMAP_U64 > + > + > +static int aslr_tool__process_attr(const struct perf_tool *tool, > + union perf_event *event, > + struct evlist **pevlist) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + > + memcpy(&new_event->attr, &event->attr, event->attr.header.size); > + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) > + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ > + > + return delegate->attr(delegate, new_event, pevlist); > +} > + > +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, > + struct perf_session *session, > + union perf_event *event) > +{ > + if (perf_data__is_pipe(session->data)) { > + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); > + > + if (err < 0) > + return err; > + } > + return event->auxtrace.size; > +} > + > +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, > + struct perf_session *session __maybe_unused, > + union perf_event *event __maybe_unused) > +{ > + return 0; > +} > + > +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, > + struct perf_session *session __maybe_unused, > + union perf_event *event __maybe_unused) > +{ > + return 0; > +} > + > +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) > +{ > + delegate_tool__init(&aslr->tool, delegate); > + aslr->tool.tool.ordered_events = true; > + > + machines__init(&aslr->machines); > + > + hashmap__init(&aslr->remap_addresses, > + remap_addresses__hash, remap_addresses__equal, > + /*ctx=*/NULL); > + hashmap__init(&aslr->top_addresses, > + top_addresses__hash, top_addresses__equal, > + /*ctx=*/NULL); > + aslr->first_kernel_mapping = true; > + > + aslr->tool.tool.sample = aslr_tool__process_sample; > + /* read - reads a counter, okay to delegate. */ > + aslr->tool.tool.mmap = aslr_tool__process_mmap; > + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; > + aslr->tool.tool.comm = aslr_tool__process_comm; > + aslr->tool.tool.fork = aslr_tool__process_fork; > + aslr->tool.tool.exit = aslr_tool__process_exit; > + /* namesspaces, cgroup, lost, lost_sample, aux, */ > + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ > + /* - no virtual addresses. */ > + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; > + /* bpf - no virtual address. */ > + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; > + aslr->tool.tool.attr = aslr_tool__process_attr; > + /* event_update, tracing_data, finished_round, build_id, id_index, */ > + /* event_update, tracing_data, finished_round, build_id, id_index, */ > + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ > + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ > + /* auxtrace - no virtual addresses. */ > + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; > + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; > + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; > +} > + > +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) > +{ > + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); > + > + if (!aslr) > + return NULL; > + > + aslr_tool__init(aslr, delegate); > + return &aslr->tool.tool; > +} > + > +void aslr_tool__delete(struct perf_tool *tool) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct hashmap_entry *cur; > + size_t bkt; > + > + if (!tool) > + return; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + > + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { > + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; > + > + if (key) > + dso__put(key->dso); > + zfree(&cur->pkey); > + zfree(&cur->pvalue); > + } > + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { > + zfree(&cur->pvalue); > + } > + > + hashmap__clear(&aslr->remap_addresses); > + hashmap__clear(&aslr->top_addresses); > + machines__destroy_kernel_maps(&aslr->machines); > + machines__exit(&aslr->machines); > + free(aslr); > +} > diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h > new file mode 100644 > index 000000000000..ea984d82681f > --- /dev/null > +++ b/tools/perf/util/aslr.h > @@ -0,0 +1,10 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __PERF_ASLR_H > +#define __PERF_ASLR_H > + > +struct perf_tool; > + > +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); > +void aslr_tool__delete(struct perf_tool *aslr); > + > +#endif /* __PERF_ASLR_H */ > -- > 2.54.0.545.g6539524ca2-goog > ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-06 18:52 ` Namhyung Kim @ 2026-05-06 20:01 ` Ian Rogers 0 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-06 20:01 UTC (permalink / raw) To: Namhyung Kim Cc: acme, gmx, adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Wed, May 6, 2026 at 11:52 AM Namhyung Kim <namhyung@kernel.org> wrote: > > On Tue, May 05, 2026 at 05:45:45PM -0700, Ian Rogers wrote: > > If perf.data files are taken from one machine to another they may > > leak virtual addresses and so weaken ASLR on the machine they are > > coming from. Add an aslr option for perf inject that remaps all > > virtual addresses, or drops data/events, so that the virtual address > > information isn't leaked. > > > > When events are not known/handled by the tool they are dropped. This > > makes the tool conservative and it should never leak ASLR information, > > but it means virtual address remapping is needed for cases like > > auxtrace. > > > > The ASLR tracking tool virtualizes process and machine namespaces using > > 'struct machines' to safely isolate host mappings from unprivileged KVM guest > > address spaces. Memory layouts are tracked globally per process context to > > ensure linear, continuous space allocations across successive mapping runs. > > > > To remain strictly conservative and guarantee security, the tool scrubs > > breakpoint addresses (bp_addr) from all synthesized stream headers, and drops > > unsupported complex payloads (such as user register stacks, raw tracepoints, > > and hardware AUX tracing frames) to completely eliminate accidental address > > leakage vectors. > > > > Assisted-by: Gemini-CLI:Google Gemini 3 > > Signed-off-by: Ian Rogers <irogers@google.com> > > Co-developed-by: Gabriel Marin <gmx@google.com> > > Signed-off-by: Gabriel Marin <gmx@google.com> > > --- > > v5: Fix memory leaks inside aslr_tool__delete destructor by calling standard > > machines__destroy_kernel_maps() to cleanly free host/guest maps and guest > > machine structures. Introduce the precise 'first_kernel_mapping' tracking > > guard inside aslr.c to rewrite the core kernel pgoff virtual address while > > safely protecting module file offsets from corruption. Harden skipn() > > pipe I/O stream reader loops against EINTR interruption errors. Clean up > > breakpoint address (bp_addr) memory scrubbing by executing the scrubbing loop > > directly at core session initialization startup level, natively securing both > > file headers and streaming pipe channels while removing redundant runtime > > tool wrapper interception hooks layers. > > v4: Scrub bp_addr from headers/pipe synthesis attributes. Remove kernel > > mmap pgoff mathematical delta adjustment leaks to maintain secure > > base obfuscation bounds. Harden guest space contexts mapping loops, > > correct ksymbol map base invariants tracking, and plug tail-word > > padding heap leakage vectors in user stacks and AUX payloads. > > v3: Combine split-map fixes, guest namespaces, bounds checks, OOM rollbacks, > > hot path optimization, safe dso references, and I/O stream error handling > > from v3/v4 development. Drop raw auxtrace events. Fix thread reference leaks > > in event handlers. Fix 32-bit truncation bug in hashmaps using u64* values. > > Prevent leaking uninitialized heap memory by zeroing copy buffer. Correct > > bitmask checks for branch stack flags. Avoid PMU configuration corruption. > > v2: First review feedback adjustments. > > --- > > tools/perf/builtin-inject.c | 31 +- > > tools/perf/util/Build | 1 + > > tools/perf/util/aslr.c | 1220 +++++++++++++++++++++++++++++++++++ > > tools/perf/util/aslr.h | 10 + > > 4 files changed, 1261 insertions(+), 1 deletion(-) > > create mode 100644 tools/perf/util/aslr.c > > create mode 100644 tools/perf/util/aslr.h > > > > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > > index f174bc69cec4..8fe479cb4152 100644 > > --- a/tools/perf/builtin-inject.c > > +++ b/tools/perf/builtin-inject.c > > @@ -8,6 +8,7 @@ > > */ > > #include "builtin.h" > > > > +#include "util/aslr.h" > > #include "util/color.h" > > #include "util/dso.h" > > #include "util/vdso.h" > > @@ -123,6 +124,7 @@ struct perf_inject { > > bool in_place_update_dry_run; > > bool copy_kcore_dir; > > bool convert_callchain; > > + bool aslr; > > const char *input_name; > > struct perf_data output; > > u64 bytes_written; > > @@ -304,6 +306,8 @@ static int perf_event__repipe(const struct perf_tool *tool, > > return perf_event__repipe_synth(tool, event); > > } > > > > + > > + > > static int perf_event__drop(const struct perf_tool *tool __maybe_unused, > > union perf_event *event __maybe_unused, > > struct perf_sample *sample __maybe_unused, > > @@ -2458,6 +2462,8 @@ static int __cmd_inject(struct perf_inject *inject) > > } > > } > > > > + > > + > > Unnessary whitespace changes here and the above. Ack. > > session->header.data_offset = output_data_offset; > > session->header.data_size = inject->bytes_written; > > perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, > > @@ -2564,6 +2570,8 @@ int cmd_inject(int argc, const char **argv) > > " instance has a subdir"), > > OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, > > "Generate callchains using DWARF and drop register/stack data"), > > + OPT_BOOLEAN(0, "aslr", &inject.aslr, > > + "Remap virtual memory addresses similar to ASLR"), > > OPT_END() > > }; > > const char * const inject_usage[] = { > > @@ -2571,6 +2579,7 @@ int cmd_inject(int argc, const char **argv) > > NULL > > }; > > bool ordered_events; > > + struct perf_tool *tool = &inject.tool; > > > > if (!inject.itrace_synth_opts.set) { > > /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ > > @@ -2684,18 +2693,36 @@ int cmd_inject(int argc, const char **argv) > > inject.tool.schedstat_domain = perf_event__repipe_op2_synth; > > inject.tool.dont_split_sample_group = true; > > inject.tool.merge_deferred_callchains = false; > > - inject.session = __perf_session__new(&data, &inject.tool, > > + if (inject.aslr) { > > + tool = aslr_tool__new(&inject.tool); > > + if (!tool) { > > + ret = -ENOMEM; > > + goto out_close_output; > > + } > > + } > > + inject.session = __perf_session__new(&data, tool, > > /*trace_event_repipe=*/inject.output.is_pipe, > > /*host_env=*/NULL); > > > > if (IS_ERR(inject.session)) { > > ret = PTR_ERR(inject.session); > > + if (inject.aslr) > > + aslr_tool__delete(tool); > > goto out_close_output; > > } > > > > if (zstd_init(&(inject.session->zstd_data), 0) < 0) > > pr_warning("Decompression initialization failed.\n"); > > > > + if (inject.aslr) { > > + struct evsel *evsel; > > + > > + evlist__for_each_entry(inject.session->evlist, evsel) { > > + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) > > + evsel->core.attr.bp_addr = 0; > > + } > > + } > > + > > /* Save original section info before feature bits change */ > > ret = save_section_info(&inject); > > if (ret) > > @@ -2789,6 +2816,8 @@ int cmd_inject(int argc, const char **argv) > > strlist__delete(inject.known_build_ids); > > zstd_fini(&(inject.session->zstd_data)); > > perf_session__delete(inject.session); > > + if (inject.aslr) > > + aslr_tool__delete(tool); > > out_close_output: > > if (!inject.in_place_update) > > perf_data__close(&inject.output); > > diff --git a/tools/perf/util/Build b/tools/perf/util/Build > > index 70cc91d00804..65b96f3b87e2 100644 > > --- a/tools/perf/util/Build > > +++ b/tools/perf/util/Build > > @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o > > perf-util-y += addr2line.o > > perf-util-y += addr_location.o > > perf-util-y += annotate.o > > +perf-util-y += aslr.o > > perf-util-y += blake2s.o > > perf-util-y += block-info.o > > perf-util-y += block-range.o > > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > > new file mode 100644 > > index 000000000000..effdcbec0db0 > > --- /dev/null > > +++ b/tools/perf/util/aslr.c > > @@ -0,0 +1,1220 @@ > > +// SPDX-License-Identifier: GPL-2.0 > > +#include "aslr.h" > > + > > +#include "addr_location.h" > > +#include "debug.h" > > +#include "event.h" > > +#include "evsel.h" > > +#include "machine.h" > > +#include "map.h" > > +#include "thread.h" > > +#include "tool.h" > > +#include "session.h" > > +#include "data.h" > > +#include "dso.h" > > + > > +#include <internal/lib.h> /* page_size */ > > +#include <linux/compiler.h> > > +#include <linux/zalloc.h> > > +#include <errno.h> > > +#include <inttypes.h> > > +#include <unistd.h> > > + > > +static int skipn(int fd, u64 n) > > +{ > > + char buf[4096]; > > + ssize_t ret; > > + > > + while (n > 0) { > > + ret = read(fd, buf, (n < (u64)sizeof(buf) ? n : (u64)sizeof(buf))); > > + if (ret < 0) { > > + if (errno == EINTR) > > + continue; > > + return ret; > > + } > > + if (ret == 0) > > + return 0; > > + n -= ret; > > + } > > + > > + return 0; > > +} > > + > > +/** > > + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. > > + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. > > + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). > > + * Calculated as `start - pgoff`. This value remains constant when > > + * perf's internal `maps__fixup_overlap_and_insert` splits a map into > > + * fragmented VMA pieces due to overlapping events, allowing us to > > + * resolve split maps consistently back to the original VMA. > > I'm curious if it's guaranteed to be unique within a process. You could have two DSOs at different addresses, and when the `address - pgoff` is computed, they might yield the same invariant value. However, in that case the DSOs differ, so the keys don't match. If it were the same DSO, either the address or the pgoff would differ making it impossible to have the same invariant value. > > > + * @pid: Process ID associated with the mapping. > > + */ > > +struct remap_addresses_key { > > + struct dso *dso; > > + u64 invariant; > > + pid_t pid; > > +}; > > + > > +struct aslr_mapping { > > + struct list_head node; > > + u64 orig_start; > > + u64 len; > > + u64 remap_start; > > +}; > > + > > +struct aslr_tool { > > + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ > > + struct delegate_tool tool; > > + /** @machines: The machines with the input, not remapped, virtual address layout. */ > > + struct machines machines; > > + /** @event_copy: Buffer used to create an event to pass to the delegate. */ > > + char event_copy[PERF_SAMPLE_MAX_SIZE]; > > + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ > > + struct hashmap remap_addresses; > > + /** @top_addresses: mapping from process to max remapped address. */ > > + struct hashmap top_addresses; > > + /** @first_kernel_mapping: flag indicating if we are still to process any kernel mapping. */ > > + bool first_kernel_mapping; > > +}; > > + > > +static const pid_t kernel_pid = -1; > > + > > +/* Start remapping user processes from a small non-zero offset. */ > > +static const u64 user_space_start = 0x200000; > > +static const u64 kernel_space_start = 0xffff800010000000; > > + > > +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) > > +{ > > + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; > > + > > + return (size_t)key->dso ^ key->invariant ^ key->pid; > > +} > > + > > +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) > > +{ > > + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; > > + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; > > + > > + return RC_CHK_EQUAL(key1->dso, key2->dso) && > > + key1->invariant == key2->invariant && > > + key1->pid == key2->pid; > > +} > > + > > +static size_t top_addresses__hash(long key, void *ctx __maybe_unused) > > +{ > > + return key; > > +} > > + > > +static bool top_addresses__equal(long key1, long key2, void *ctx __maybe_unused) > > +{ > > + return key1 == key2; > > +} > > + > > +static u64 round_up_to_page_size(u64 addr) > > +{ > > + return (addr + page_size - 1) & ~((u64)page_size - 1); > > +} > > + > > +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, > > + struct thread *aslr_thread, > > + u8 cpumode, > > + u64 addr) > > +{ > > + struct addr_location al; > > + struct remap_addresses_key key; > > + u64 *remapped_invariant_ptr = NULL; > > + u64 remap_addr = 0; > > + u8 effective_cpumode = cpumode; > > + > > + if (!aslr_thread) > > + return 0; /* No thread. */ > > + > > + addr_location__init(&al); > > + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { > > + /* > > + * If lookup fails with specified cpumode, try fallback to the other space > > + * to be robust against bad cpumode in samples. > > + */ > > + if (cpumode == PERF_RECORD_MISC_KERNEL) > > + effective_cpumode = PERF_RECORD_MISC_USER; > > + else if (cpumode == PERF_RECORD_MISC_USER) > > + effective_cpumode = PERF_RECORD_MISC_KERNEL; > > + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) > > + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; > > + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) > > + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > > + > > + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { > > + addr_location__exit(&al); > > + return 0; /* No mmap. */ > > + } > > + } > > + > > + key.dso = map__dso(al.map); > > + key.invariant = map__start(al.map) - map__pgoff(al.map); > > + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; > > + > > + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { > > + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + > > + (addr - map__start(al.map)); > > + } else { > > + if (effective_cpumode == PERF_RECORD_MISC_KERNEL) { > > + struct hashmap_entry *cur; > > + size_t bkt; > > + > > + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { > > + struct remap_addresses_key *k; > > + u64 *v; > > + > > + k = (struct remap_addresses_key *)cur->pkey; > > + if (k->pid == kernel_pid && > > + k->invariant == key.invariant) { > > + v = (u64 *)cur->pvalue; > > + remap_addr = *v + map__pgoff(al.map) + > > + (addr - map__start(al.map)); > > + break; > > + } > > + } > > + } > > + if (remap_addr == 0) { > > + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", > > + addr, map__start(al.map), map__size(al.map), key.pid); > > + } > > + } > > + > > + addr_location__exit(&al); > > + return remap_addr; > > +} > > + > > +static u64 aslr_tool__remap_mapping(struct aslr_tool *aslr, > > + struct thread *aslr_thread, > > + u8 cpumode, > > + u64 start, u64 len, u64 pgoff) > > +{ > > + struct addr_location al; > > + struct addr_location prev_al; > > + struct remap_addresses_key key; > > + struct remap_addresses_key *new_key = NULL; > > + struct remap_addresses_key *old_key = NULL; > > + u64 remap_addr = 0; > > + u64 *remapped_invariant_ptr = NULL; > > + u64 *max_addr_ptr = NULL; > > + u64 *new_val = NULL; > > + u64 *new_max = NULL; > > + u64 *old_val = NULL; > > + u64 *old_val_remap = NULL; > > + bool is_contiguous = false; > > + bool first_mapping = false; > > + bool key_found = false; > > + int err; > > + > > + if (!aslr_thread) > > + return 0; /* No thread. */ > > + > > + addr_location__init(&al); > > + if (thread__find_map(aslr_thread, cpumode, start, &al)) { > > + key.dso = map__dso(al.map); > > + key.invariant = map__start(al.map) - map__pgoff(al.map); > > + } else { > > + key.dso = NULL; > > + key.invariant = start - pgoff; > > + } > > + key.pid = cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; > > + > > + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { > > + remap_addr = *remapped_invariant_ptr + (al.map ? map__pgoff(al.map) : pgoff); > > + key_found = true; > > + } else { > > + addr_location__init(&prev_al); > > + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { > > + if (map__start(prev_al.map) + map__size(prev_al.map) == start) { > > + is_contiguous = true; > > + } else { > > + pr_debug("Previous mmap [%lx, %lx] overlaps current map [%lx, %lx]\n", > > + map__start(prev_al.map), > > + map__start(prev_al.map) + map__size(prev_al.map), > > + start, start+len); > > + } > > + } > > + addr_location__exit(&prev_al); > > + > > + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { > > + first_mapping = true; > > + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL ? > > + kernel_space_start : user_space_start); > > + } else { > > + remap_addr = *max_addr_ptr; > > + } > > + > > + remap_addr = round_up_to_page_size(remap_addr); > > + if (!is_contiguous && !first_mapping) > > + remap_addr += page_size; > > + > > + new_key = malloc(sizeof(*new_key)); > > + new_val = malloc(sizeof(u64)); > > I think the value of hashmap can be passed as value if it's u64.. well > on 64-bit systems. Yeah, sashiko was complaining about 32-bit builds. I think we should diverge our hashmap implementation as using long for the key rather than s64 just causes this kind of silliness for us. I think we can also add generic support avoid errptrs, etc. I don't have hope of BPF people being sensible as in this series: https://lore.kernel.org/linux-perf-users/20260322005823.981079-1-irogers@google.com/ > > > + > > + if (!new_key || !new_val) { > > + free(new_key); > > + free(new_val); > > + addr_location__exit(&al); > > + return 0; > > + } > > + *new_key = key; > > + new_key->dso = dso__get(key.dso); > > + *new_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); > > + > > + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) != 0) { > > + dso__put(new_key->dso); > > + free(new_key); > > + free(new_val); > > + addr_location__exit(&al); > > + return 0; > > + } > > + } > > + > > + /* Update top_addresses */ > > + new_max = malloc(sizeof(u64)); > > + old_val = NULL; > > + > > + if (!new_max) { > > + old_key = NULL; > > + old_val_remap = NULL; > > + > > + if (!key_found) { > > + hashmap__delete(&aslr->remap_addresses, &key, > > + &old_key, &old_val_remap); > > + if (old_key) > > + dso__put(old_key->dso); > > + free(old_key); > > + free(old_val_remap); > > + } > > + addr_location__exit(&al); > > + return 0; > > + } > > + *new_max = remap_addr + len; > > + > > + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { > > + if (*max_addr_ptr > *new_max) > > + *new_max = *max_addr_ptr; > > + } > > + > > + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, > > + (first_mapping && !key_found) ? > > + HASHMAP_ADD : HASHMAP_UPDATE, > > + NULL, &old_val); > > + if (err) { > > + old_key = NULL; > > + old_val_remap = NULL; > > + > > + free(new_max); > > + if (!key_found) { > > + hashmap__delete(&aslr->remap_addresses, &key, > > + &old_key, &old_val_remap); > > + if (old_key) > > + dso__put(old_key->dso); > > + free(old_key); > > + free(old_val_remap); > > + } > > + addr_location__exit(&al); > > + return 0; > > + } > > + free(old_val); > > + > > + addr_location__exit(&al); > > + return remap_addr; > > +} > > + > > +static u64 aslr_tool__remap_ksymbol(struct aslr_tool *aslr, > > + struct thread *aslr_thread, > > + u64 addr, u32 len) > > Any chance you can share the code with the above function? I'll check. > > +{ > > + struct addr_location al; > > + struct remap_addresses_key key; > > + struct hashmap_entry *cur; > > + struct remap_addresses_key *new_key = NULL; > > + struct remap_addresses_key *old_key = NULL; > > + struct remap_addresses_key *k; > > + size_t bkt; > > + u64 remap_addr = 0; > > + u64 *remapped_invariant_ptr = NULL; > > + u64 *max_addr_ptr = NULL; > > + u64 *new_val = NULL; > > + u64 *new_max = NULL; > > + u64 *old_val = NULL; > > + u64 *old_val_remap = NULL; > > + u64 *v; > > + bool first_mapping = false; > > + int err; > > + > > + if (!aslr_thread) > > + return 0; /* No thread. */ > > + > > + addr_location__init(&al); > > + if (thread__find_map(aslr_thread, PERF_RECORD_MISC_KERNEL, addr, &al)) { > > + key.dso = map__dso(al.map); > > + key.invariant = map__start(al.map) - map__pgoff(al.map); > > + } else { > > + key.dso = NULL; > > + key.invariant = addr; /* pgoff is 0 for ksymbols */ > > + } > > + key.pid = aslr_thread->pid_; > > + > > + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { > > + if (al.map) > > + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + > > + (addr - map__start(al.map)); > > + else > > + remap_addr = *remapped_invariant_ptr; > > + addr_location__exit(&al); > > + return remap_addr; > > + } > > + > > + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { > > + k = (struct remap_addresses_key *)cur->pkey; > > + if (k->pid == kernel_pid && k->invariant == key.invariant) { > > + v = (u64 *)cur->pvalue; > > + > > + if (al.map) > > + remap_addr = *v + map__pgoff(al.map) + > > + (addr - map__start(al.map)); > > + else > > + remap_addr = *v; > > + addr_location__exit(&al); > > + return remap_addr; > > + } > > + } > > + > > + if (!hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { > > + first_mapping = true; > > + remap_addr = kernel_space_start; > > + } else { > > + remap_addr = *max_addr_ptr; > > + } > > + > > + remap_addr = round_up_to_page_size(remap_addr) + page_size; > > + > > + new_key = malloc(sizeof(*new_key)); > > + new_val = malloc(sizeof(u64)); > > + > > + if (!new_key || !new_val) { > > + free(new_key); > > + free(new_val); > > + addr_location__exit(&al); > > + return 0; > > + } > > + *new_key = key; > > + new_key->dso = dso__get(key.dso); > > + if (al.map) > > + *new_val = remap_addr - (addr - map__start(al.map)) - map__pgoff(al.map); > > + else > > + *new_val = remap_addr; > > + > > + if (hashmap__add(&aslr->remap_addresses, new_key, new_val) < 0) { > > + dso__put(new_key->dso); > > + free(new_key); > > + free(new_val); > > + addr_location__exit(&al); > > + return 0; > > + } > > + > > + new_max = malloc(sizeof(u64)); > > + old_val = NULL; > > + > > + if (!new_max) { > > + old_key = NULL; > > + old_val_remap = NULL; > > + > > + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); > > + if (old_key) > > + dso__put(old_key->dso); > > + free(old_key); > > + free(old_val_remap); > > + addr_location__exit(&al); > > + return 0; > > + } > > + *new_max = remap_addr + len; > > + > > + if (hashmap__find(&aslr->top_addresses, key.pid, &max_addr_ptr)) { > > + if (*max_addr_ptr > *new_max) > > + *new_max = *max_addr_ptr; > > + } > > + > > + err = hashmap__insert(&aslr->top_addresses, key.pid, new_max, > > + first_mapping ? > > + HASHMAP_ADD : HASHMAP_UPDATE, > > + NULL, &old_val); > > + if (err) { > > + old_key = NULL; > > + old_val_remap = NULL; > > + > > + free(new_max); > > + hashmap__delete(&aslr->remap_addresses, &key, &old_key, &old_val_remap); > > + if (old_key) > > + dso__put(old_key->dso); > > + free(old_key); > > + free(old_val_remap); > > + addr_location__exit(&al); > > + return 0; > > + } > > + free(old_val); > > + > > + addr_location__exit(&al); > > + return remap_addr; > > +} > > + > > + > > +static int aslr_tool__process_mmap(const struct perf_tool *tool, > > + union perf_event *event, > > + struct perf_sample *sample, > > + struct machine *machine) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + union perf_event *new_event; > > + u8 cpumode; > > + struct thread *thread; > > + struct machine *aslr_machine; > > + int err; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + new_event = (union perf_event *)aslr->event_copy; > > + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; > > + > > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > > + if (!aslr_machine) > > + return -ENOMEM; > > + > > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > > + err = perf_event__process_mmap(tool, event, sample, aslr_machine); > > + if (err) > > + return err; > > + > > + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); > > + if (!thread) > > + return -ENOMEM; > > + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); > > + /* Remaps the mmap.start. */ > > + new_event->mmap.start = aslr_tool__remap_mapping(aslr, thread, cpumode, > > + event->mmap.start, > > + event->mmap.len, > > + event->mmap.pgoff); > > + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { > > + new_event->mmap.pgoff = new_event->mmap.start; > > + aslr->first_kernel_mapping = false; > > + } > > + err = delegate->mmap(delegate, new_event, sample, machine); > > + thread__put(thread); > > + return err; > > +} > > + > > +static int aslr_tool__process_mmap2(const struct perf_tool *tool, > > + union perf_event *event, > > + struct perf_sample *sample, > > + struct machine *machine) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + union perf_event *new_event; > > + u8 cpumode; > > + struct thread *thread; > > + struct machine *aslr_machine; > > + int err; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + new_event = (union perf_event *)aslr->event_copy; > > + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; > > + > > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > > + if (!aslr_machine) > > + return -ENOMEM; > > + > > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > > + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); > > + if (err) > > + return err; > > + > > + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); > > + if (!thread) > > + return -ENOMEM; > > + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); > > + /* Remaps the mmap.start. */ > > + new_event->mmap2.start = aslr_tool__remap_mapping(aslr, thread, cpumode, > > + event->mmap2.start, > > + event->mmap2.len, > > + event->mmap2.pgoff); > > + if (aslr->first_kernel_mapping && cpumode == PERF_RECORD_MISC_KERNEL) { > > + new_event->mmap2.pgoff = new_event->mmap2.start; > > + aslr->first_kernel_mapping = false; > > + } > > + err = delegate->mmap2(delegate, new_event, sample, machine); > > + thread__put(thread); > > + return err; > > +} > > + > > +static int aslr_tool__process_comm(const struct perf_tool *tool, > > + union perf_event *event, > > + struct perf_sample *sample, > > + struct machine *machine) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + struct machine *aslr_machine; > > + int err; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + > > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > > + if (!aslr_machine) > > + return -ENOMEM; > > + > > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > > + err = perf_event__process_comm(tool, event, sample, aslr_machine); > > + if (err) > > + return err; > > + > > + return delegate->comm(delegate, event, sample, machine); > > +} > > + > > +static int aslr_tool__process_fork(const struct perf_tool *tool, > > + union perf_event *event, > > + struct perf_sample *sample, > > + struct machine *machine) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + struct machine *aslr_machine; > > + int err; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + > > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > > + if (!aslr_machine) > > + return -ENOMEM; > > + > > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > > + err = perf_event__process_fork(tool, event, sample, aslr_machine); > > + if (err) > > + return err; > > + > > + return delegate->fork(delegate, event, sample, machine); > > +} > > + > > +static int aslr_tool__process_exit(const struct perf_tool *tool, > > + union perf_event *event, > > + struct perf_sample *sample, > > + struct machine *machine) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + struct machine *aslr_machine; > > + int err; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + > > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > > + if (!aslr_machine) > > + return -ENOMEM; > > + > > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > > + err = perf_event__process_exit(tool, event, sample, aslr_machine); > > + if (err) > > + return err; > > + > > + return delegate->exit(delegate, event, sample, machine); > > +} > > + > > +static int aslr_tool__process_text_poke(const struct perf_tool *tool, > > + union perf_event *event, > > + struct perf_sample *sample, > > + struct machine *machine) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + union perf_event *new_event; > > + u8 cpumode; > > + struct thread *thread; > > + struct machine *aslr_machine; > > + int err; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + new_event = (union perf_event *)aslr->event_copy; > > + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; > > + > > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > > + if (!aslr_machine) > > + return -ENOMEM; > > + > > + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); > > + if (!thread) > > + return -ENOMEM; > > + memcpy(&new_event->text_poke, &event->text_poke, event->text_poke.header.size); > > + new_event->text_poke.addr = aslr_tool__remap_address(aslr, thread, cpumode, > > + event->text_poke.addr); > > + > > + err = delegate->text_poke(delegate, new_event, sample, machine); > > + > > + thread__put(thread); > > + return err; > > +} > > + > > +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, > > + union perf_event *event, > > + struct perf_sample *sample, > > + struct machine *machine) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + union perf_event *new_event; > > + struct thread *thread; > > + struct machine *aslr_machine; > > + int err; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + new_event = (union perf_event *)aslr->event_copy; > > + > > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > > + if (!aslr_machine) > > + return -ENOMEM; > > + > > + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); > > + if (err) > > + return err; > > + > > + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); > > + if (!thread) > > + return -ENOMEM; > > + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); > > + /* Remaps the ksymbol.start */ > > + new_event->ksymbol.addr = aslr_tool__remap_ksymbol(aslr, thread, > > + event->ksymbol.addr, event->ksymbol.len); > > + > > + err = delegate->ksymbol(delegate, new_event, sample, machine); > > + thread__put(thread); > > + return err; > > +} > > + > > +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, > > + struct perf_sample *sample, > > + struct evsel *evsel, struct machine *machine) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + int ret; > > + u64 sample_type; > > + struct thread *thread; > > + struct machine *aslr_machine; > > + __u64 max_i; > > + __u64 max_j; > > + union perf_event *new_event; > > + struct perf_sample new_sample; > > + __u64 *in_array, *out_array; > > + u8 cpumode; > > + u64 addr; > > + size_t i; > > + size_t j; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + ret = -EFAULT; > > + sample_type = evsel->core.attr.sample_type; > > + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); > > + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); > > + new_event = (union perf_event *)aslr->event_copy; > > + cpumode = sample->cpumode; > > + i = 0; > > + j = 0; > > + > > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > > + if (!aslr_machine) > > + return -ENOMEM; > > + > > + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); > > + > > + if (!thread) > > + return -ENOMEM; > > + > > + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) > > + goto out_put; > > + > > + > > + > > Excessive blank lines. Ack. > > + new_event->sample.header = event->sample.header; > > + > > + in_array = &event->sample.array[0]; > > + out_array = &new_event->sample.array[0]; > > + > > +#define CHECK_BOUNDS(required_i, required_j) \ > > + do { \ > > + if (i + (required_i) > max_i || j + (required_j) > max_j) { \ > > + ret = -EFAULT; \ > > + goto out_put; \ > > + } \ > > + } while (0) > > + > > +#define COPY_U64() \ > > + do { \ > > + CHECK_BOUNDS(1, 1); \ > > + out_array[j++] = in_array[i++]; \ > > + } while (0) > > + > > +#define REMAP_U64(addr_field) \ > > + do { \ > > + CHECK_BOUNDS(1, 1); \ > > + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ > > + i++; \ > > + } while (0) > > + > > + if (sample_type & PERF_SAMPLE_IDENTIFIER) > > + COPY_U64(); /* id */ > > + if (sample_type & PERF_SAMPLE_IP) > > + REMAP_U64(sample->ip); > > + if (sample_type & PERF_SAMPLE_TID) > > + COPY_U64(); /* pid, tid */ > > + if (sample_type & PERF_SAMPLE_TIME) > > + COPY_U64(); /* time */ > > + if (sample_type & PERF_SAMPLE_ADDR) > > + REMAP_U64(sample->addr); > > + if (sample_type & PERF_SAMPLE_ID) > > + COPY_U64(); /* id */ > > + if (sample_type & PERF_SAMPLE_STREAM_ID) > > + COPY_U64(); /* stream_id */ > > + if (sample_type & PERF_SAMPLE_CPU) > > + COPY_U64(); /* cpu, res */ > > + if (sample_type & PERF_SAMPLE_PERIOD) > > + COPY_U64(); /* period */ > > + if (sample_type & PERF_SAMPLE_READ) { > > + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { > > + COPY_U64(); /* value */ > > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) > > + COPY_U64(); /* time_enabled */ > > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) > > + COPY_U64(); /* time_running */ > > + if (evsel->core.attr.read_format & PERF_FORMAT_ID) > > + COPY_U64(); /* id */ > > + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) > > + COPY_U64(); /* lost */ > > + } else { > > + u64 nr; > > + > > + CHECK_BOUNDS(1, 1); > > + nr = out_array[j++] = in_array[i++]; > > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) > > + COPY_U64(); /* time_enabled */ > > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) > > + COPY_U64(); /* time_running */ > > + for (u64 cntr = 0; cntr < nr; cntr++) { > > + COPY_U64(); /* value */ > > + if (evsel->core.attr.read_format & PERF_FORMAT_ID) > > + COPY_U64(); /* id */ > > + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) > > + COPY_U64(); /* lost */ > > + } > > + } > > + } > > + if (sample_type & PERF_SAMPLE_CALLCHAIN) { > > + u64 nr; > > + > > + CHECK_BOUNDS(1, 1); > > + nr = out_array[j++] = in_array[i++]; > > + > > + for (u64 cntr = 0; cntr < nr; cntr++) { > > + CHECK_BOUNDS(1, 1); > > + addr = in_array[i++]; > > + if (addr >= PERF_CONTEXT_MAX) { > > + out_array[j++] = addr; > > + switch (addr) { > > + case PERF_CONTEXT_HV: > > + cpumode = PERF_RECORD_MISC_HYPERVISOR; > > + break; > > + case PERF_CONTEXT_KERNEL: > > + cpumode = PERF_RECORD_MISC_KERNEL; > > + break; > > + case PERF_CONTEXT_USER: > > + cpumode = PERF_RECORD_MISC_USER; > > + break; > > + case PERF_CONTEXT_GUEST: > > + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > > + break; > > + case PERF_CONTEXT_GUEST_KERNEL: > > + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > > + break; > > + case PERF_CONTEXT_GUEST_USER: > > + cpumode = PERF_RECORD_MISC_GUEST_USER; > > + break; > > + case PERF_CONTEXT_USER_DEFERRED: > > + /* > > + * Immediately followed by a 64-bit > > + * stitching cookie. Skip/Copy it! > > + */ > > + CHECK_BOUNDS(1, 1); > > + out_array[j++] = in_array[i++]; > > + cntr++; > > + break; > > + default: > > + pr_debug("invalid callchain context: %"PRIx64"\n", addr); > > + ret = 0; > > + goto out_put; > > + } > > + continue; > > + } > > + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); > > + } > > + } > > + if (sample_type & PERF_SAMPLE_RAW) { > > + size_t bytes = sizeof(u32) + sample->raw_size; > > + size_t u64_words = (bytes + 7) / 8; > > + > > + if (i + u64_words > max_i || j + u64_words > max_j) { > > + ret = -EFAULT; > > + goto out_put; > > + } > > + memcpy(&out_array[j], &in_array[i], bytes); > > + i += u64_words; > > + j += u64_words; > > + /* > > + * TODO: certain raw samples can be remapped, such as > > + * tracepoints by examining their fields. > > + */ > > + pr_debug("Dropping raw samples as possible ASLR leak\n"); > > + ret = 0; > > + goto out_put; > > + } > > + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { > > + u64 nr; > > + > > + CHECK_BOUNDS(1, 1); > > + nr = out_array[j++] = in_array[i++]; > > + > > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) > > + COPY_U64(); /* hw_idx */ > > + > > + if (nr > (ULLONG_MAX / 3)) { > > + ret = -EFAULT; > > + goto out_put; > > + } > > + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { > > + ret = -EFAULT; > > + goto out_put; > > + } > > + for (u64 cntr = 0; cntr < nr; cntr++) { > > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > > + sample->cpumode, > > + in_array[i++]); /* from */ > > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > > + sample->cpumode, > > + in_array[i++]); /* to */ > > + out_array[j++] = in_array[i++]; /* flags */ > > + } > > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { > > + if (nr > max_i - i || nr > max_j - j) { > > + ret = -EFAULT; > > + goto out_put; > > + } > > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > > + i += nr; > > + j += nr; > > + /* TODO: confirm branch counters don't leak ASLR information. */ > > + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); > > + ret = 0; > > + goto out_put; > > + } > > + } > > + if (sample_type & PERF_SAMPLE_REGS_USER) { > > + u64 abi; > > + > > + COPY_U64(); /* abi */ > > + abi = out_array[j-1]; > > + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { > > + u64 nr = hweight64(evsel->core.attr.sample_regs_user); > > + > > + if (nr > max_i - i || nr > max_j - j) { > > + ret = -EFAULT; > > + goto out_put; > > + } > > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > > + i += nr; > > + j += nr; > > + } > > + /* TODO: can this be less conservative? */ > > + pr_debug("Dropping regs user sample as possible ASLR leak\n"); > > + ret = 0; > > + goto out_put; > > Is this mean you drop samples if it contains registers? Yep, as noted by the TODO. > > > + } > > + if (sample_type & PERF_SAMPLE_STACK_USER) { > > + u64 size; > > + > > + CHECK_BOUNDS(1, 1); > > + size = out_array[j++] = in_array[i++]; > > + if (size > 0) { > > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > > + > > + if (u64_words > max_i - i || u64_words > max_j - j) { > > + ret = -EFAULT; > > + goto out_put; > > + } > > + memcpy(&out_array[j], &in_array[i], size); > > + if (size % 8) { > > + size_t pad = 8 - (size % 8); > > + > > + memset(((char *)&out_array[j]) + size, 0, pad); > > + } > > + i += u64_words; > > + j += u64_words; > > + > > + COPY_U64(); /* dyn_size */ > > + } > > + /* TODO: can this be less conservative? */ > > + pr_debug("Dropping stack user sample as possible ASLR leak\n"); > > + ret = 0; > > + goto out_put; > > + } > > + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) > > + COPY_U64(); /* perf_sample_weight */ > > + if (sample_type & PERF_SAMPLE_DATA_SRC) > > + COPY_U64(); /* data_src */ > > + if (sample_type & PERF_SAMPLE_TRANSACTION) > > + COPY_U64(); /* transaction */ > > + if (sample_type & PERF_SAMPLE_REGS_INTR) { > > + u64 abi; > > + > > + COPY_U64(); /* abi */ > > + abi = out_array[j-1]; > > + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { > > + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); > > + > > + if (nr > max_i - i || nr > max_j - j) { > > + ret = -EFAULT; > > + goto out_put; > > + } > > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > > + i += nr; > > + j += nr; > > + } > > + /* TODO: can this be less conservative? */ > > + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); > > + ret = 0; > > + goto out_put; > > + } > > + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { > > + COPY_U64(); /* phys_addr */ > > + /* TODO: can this be less conservative? */ > > + pr_debug("Dropping physical address sample as possible ASLR leak\n"); > > + ret = 0; > > + goto out_put; > > + } > > + if (sample_type & PERF_SAMPLE_CGROUP) > > + COPY_U64(); /* cgroup */ > > + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) > > + COPY_U64(); /* data_page_size */ > > + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) > > + COPY_U64(); /* code_page_size */ > > + > > + if (sample_type & PERF_SAMPLE_AUX) { > > + u64 size; > > + > > + CHECK_BOUNDS(1, 1); > > + size = out_array[j++] = in_array[i++]; > > + if (size > 0) { > > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > > + > > + if (u64_words > max_i - i || u64_words > max_j - j) { > > + ret = -EFAULT; > > + goto out_put; > > + } > > + memcpy(&out_array[j], &in_array[i], size); > > + if (size % 8) { > > + size_t pad = 8 - (size % 8); > > + > > + memset(((char *)&out_array[j]) + size, 0, pad); > > + } > > + i += u64_words; > > + j += u64_words; > > + } > > + /* TODO: can this be less conservative? */ > > + pr_debug("Dropping aux sample as possible ASLR leak\n"); > > + ret = 0; > > + goto out_put; > > + } > > Can you use perf_event__synthesize_sample()? I'd prefer not to as I think working field by field is worthwhile in convincing ourselves of a lack of ASLR leaks. Punting this into perf_sample and synthesis opens up gaps for bugs to be introduced. > > + > > + if (evsel__is_offcpu_event(evsel)) { > > + /* TODO: can this be less conservative? */ > > + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); > > Why not remap the address? No reason, but support wasn't a first priority, hence the TODO. Thanks, Ian > Thanks, > Namhyung > > > + ret = 0; > > + goto out_put; > > + } > > + > > + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); > > + > > + perf_sample__init(&new_sample, /*all=*/ true); > > + ret = evsel__parse_sample(evsel, new_event, &new_sample); > > + if (ret) { > > + perf_sample__exit(&new_sample); > > + goto out_put; > > + } > > + > > + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); > > + perf_sample__exit(&new_sample); > > + > > +out_put: > > + thread__put(thread); > > + return ret; > > +} > > + > > +#undef CHECK_BOUNDS > > +#undef COPY_U64 > > +#undef REMAP_U64 > > + > > + > > +static int aslr_tool__process_attr(const struct perf_tool *tool, > > + union perf_event *event, > > + struct evlist **pevlist) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct perf_tool *delegate; > > + union perf_event *new_event; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + delegate = aslr->tool.delegate; > > + new_event = (union perf_event *)aslr->event_copy; > > + > > + memcpy(&new_event->attr, &event->attr, event->attr.header.size); > > + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) > > + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ > > + > > + return delegate->attr(delegate, new_event, pevlist); > > +} > > + > > +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, > > + struct perf_session *session, > > + union perf_event *event) > > +{ > > + if (perf_data__is_pipe(session->data)) { > > + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); > > + > > + if (err < 0) > > + return err; > > + } > > + return event->auxtrace.size; > > +} > > + > > +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, > > + struct perf_session *session __maybe_unused, > > + union perf_event *event __maybe_unused) > > +{ > > + return 0; > > +} > > + > > +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, > > + struct perf_session *session __maybe_unused, > > + union perf_event *event __maybe_unused) > > +{ > > + return 0; > > +} > > + > > +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) > > +{ > > + delegate_tool__init(&aslr->tool, delegate); > > + aslr->tool.tool.ordered_events = true; > > + > > + machines__init(&aslr->machines); > > + > > + hashmap__init(&aslr->remap_addresses, > > + remap_addresses__hash, remap_addresses__equal, > > + /*ctx=*/NULL); > > + hashmap__init(&aslr->top_addresses, > > + top_addresses__hash, top_addresses__equal, > > + /*ctx=*/NULL); > > + aslr->first_kernel_mapping = true; > > + > > + aslr->tool.tool.sample = aslr_tool__process_sample; > > + /* read - reads a counter, okay to delegate. */ > > + aslr->tool.tool.mmap = aslr_tool__process_mmap; > > + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; > > + aslr->tool.tool.comm = aslr_tool__process_comm; > > + aslr->tool.tool.fork = aslr_tool__process_fork; > > + aslr->tool.tool.exit = aslr_tool__process_exit; > > + /* namesspaces, cgroup, lost, lost_sample, aux, */ > > + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ > > + /* - no virtual addresses. */ > > + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; > > + /* bpf - no virtual address. */ > > + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; > > + aslr->tool.tool.attr = aslr_tool__process_attr; > > + /* event_update, tracing_data, finished_round, build_id, id_index, */ > > + /* event_update, tracing_data, finished_round, build_id, id_index, */ > > + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ > > + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ > > + /* auxtrace - no virtual addresses. */ > > + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; > > + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; > > + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; > > +} > > + > > +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) > > +{ > > + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); > > + > > + if (!aslr) > > + return NULL; > > + > > + aslr_tool__init(aslr, delegate); > > + return &aslr->tool.tool; > > +} > > + > > +void aslr_tool__delete(struct perf_tool *tool) > > +{ > > + struct delegate_tool *del_tool; > > + struct aslr_tool *aslr; > > + struct hashmap_entry *cur; > > + size_t bkt; > > + > > + if (!tool) > > + return; > > + > > + del_tool = container_of(tool, struct delegate_tool, tool); > > + aslr = container_of(del_tool, struct aslr_tool, tool); > > + > > + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { > > + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; > > + > > + if (key) > > + dso__put(key->dso); > > + zfree(&cur->pkey); > > + zfree(&cur->pvalue); > > + } > > + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { > > + zfree(&cur->pvalue); > > + } > > + > > + hashmap__clear(&aslr->remap_addresses); > > + hashmap__clear(&aslr->top_addresses); > > + machines__destroy_kernel_maps(&aslr->machines); > > + machines__exit(&aslr->machines); > > + free(aslr); > > +} > > diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h > > new file mode 100644 > > index 000000000000..ea984d82681f > > --- /dev/null > > +++ b/tools/perf/util/aslr.h > > @@ -0,0 +1,10 @@ > > +/* SPDX-License-Identifier: GPL-2.0 */ > > +#ifndef __PERF_ASLR_H > > +#define __PERF_ASLR_H > > + > > +struct perf_tool; > > + > > +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); > > +void aslr_tool__delete(struct perf_tool *aslr); > > + > > +#endif /* __PERF_ASLR_H */ > > -- > > 2.54.0.545.g6539524ca2-goog > > ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v5 5/5] perf test: Add inject ASLR test 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers ` (3 preceding siblings ...) 2026-05-06 0:45 ` [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-06 0:45 ` Ian Rogers 2026-05-07 15:58 ` James Clark 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-06 0:45 UTC (permalink / raw) To: irogers, acme, gmx, namhyung Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. - Callchain address remapping. - Consistency of `perf report` output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (skipping gracefully if permissions restrict recording the kernel map). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> --- v5: Harden test suite verification pipelines by upgrading report checks to strict sorted line-by-line diff comparisons to accommodate remapped pointer shifts. Append || true fallback operators to grep-v filtering pipelines to prevent the shell test from spuriously aborting under set -o pipefail on empty inputs, ensuring graceful failure checks trigger correctly. v4: Reorder set -e/pipefail to prevent temp file leakage in root directory on unprivileged record failures when run as root. Ensure grep report filters have || true suffixes to avoid aborts under pipefail. Add comprehensive pipe stdout injection attributes validation case. v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, callchain empty data asserts, baseline sample verification, and grep report abort protections. Reorder set -e/pipefail to prevent stack leaks in mktemp failures. v2: Add sum comparison for kernel overhead and 32-bit math corrections. Add awk with gsub for trailing dots and brackets normalizations. Trap EXIT, prevent race conditions and avoid hardcoded perf binary. --- tools/perf/tests/shell/inject_aslr.sh | 459 ++++++++++++++++++++++++++ 1 file changed, 459 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..cdc3aa94de63 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,459 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) +data="${temp_dir}/perf.data" +data2="${temp_dir}/perf.data2" + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 + + + +cleanup() { + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + cleanup + exit 1 +} + +trap cleanup EXIT +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.545.g6539524ca2-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v5 5/5] perf test: Add inject ASLR test 2026-05-06 0:45 ` [PATCH v5 5/5] perf test: Add inject ASLR test Ian Rogers @ 2026-05-07 15:58 ` James Clark 2026-05-07 16:17 ` Ian Rogers 0 siblings, 1 reply; 183+ messages in thread From: James Clark @ 2026-05-07 15:58 UTC (permalink / raw) To: Ian Rogers Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, gmx, namhyung On 06/05/2026 1:45 am, Ian Rogers wrote: > Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` > feature. The test covers: > - Basic address remapping for user space samples. > - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. > - Callchain address remapping. > - Consistency of `perf report` output before and after injection. > - Pipe mode report consistency. > - Dropping of samples that leak ASLR info (physical addresses). > - Kernel address remapping (skipping gracefully if permissions restrict > recording the kernel map). > - Kernel report consistency with address normalization. > > The test suite is hardened with global 'set -o pipefail' assertions to catch > pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, > and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout > streams. > > Assisted-by: Gemini-CLI:Google Gemini 3 > Signed-off-by: Ian Rogers <irogers@google.com> > --- > v5: Harden test suite verification pipelines by upgrading report checks to > strict sorted line-by-line diff comparisons to accommodate remapped pointer > shifts. Append || true fallback operators to grep-v filtering pipelines to > prevent the shell test from spuriously aborting under set -o pipefail on > empty inputs, ensuring graceful failure checks trigger correctly. > v4: Reorder set -e/pipefail to prevent temp file leakage in root directory on > unprivileged record failures when run as root. Ensure grep report filters > have || true suffixes to avoid aborts under pipefail. Add comprehensive > pipe stdout injection attributes validation case. > v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, callchain empty > data asserts, baseline sample verification, and grep report abort > protections. Reorder set -e/pipefail to prevent stack leaks in mktemp > failures. > v2: Add sum comparison for kernel overhead and 32-bit math corrections. Add > awk with gsub for trailing dots and brackets normalizations. Trap EXIT, > prevent race conditions and avoid hardcoded perf binary. > --- > tools/perf/tests/shell/inject_aslr.sh | 459 ++++++++++++++++++++++++++ > 1 file changed, 459 insertions(+) > create mode 100755 tools/perf/tests/shell/inject_aslr.sh > > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 000000000000..cdc3aa94de63 > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh > @@ -0,0 +1,459 @@ > +#!/bin/bash > +# perf inject --aslr test > +# SPDX-License-Identifier: GPL-2.0 > + > +set -e > +set -o pipefail > + > +shelldir=$(dirname "$0") > +# shellcheck source=lib/perf_has_symbol.sh > +. "${shelldir}"/lib/perf_has_symbol.sh > + > +sym="noploop" > + > +skip_test_missing_symbol ${sym} > + > +# Create global temp directory > +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) > +data="${temp_dir}/perf.data" > +data2="${temp_dir}/perf.data2" > + > +prog="perf test -w noploop" > +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" > +err=0 > + > + > + > +cleanup() { > + # Check if temp_dir is set and looks sane before removing > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > + rm -rf "${temp_dir}" > + fi > +} > + > +trap_cleanup() { > + cleanup > + exit 1 > +} > + > +trap cleanup EXIT > +trap trap_cleanup TERM INT > + > +get_noploop_addr() { > + local file=$1 > + perf script -i "$file" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<=NF; i++) { > + if ($i ~ /noploop\+/) { > + if (!found) { > + print $(i-1) > + found=1 > + } > + } > + } > + }' > +} > + > +test_basic_aslr() { > + echo "Test basic ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + perf inject -v --aslr -i "${data}" -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Basic ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Basic ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Basic ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Basic ASLR test [Success]" > + fi > +} > + > +test_pipe_aslr() { > + echo "Test pipe mode ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") > + > + # Use tee to save the original pipe data for comparison > + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Pipe ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Pipe ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Pipe ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Pipe ASLR test [Success]" > + fi > +} > + > +test_callchain_aslr() { > + echo "Test Callchain ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") > + > + perf record -g -e task-clock:u -o "${data}" ${prog} > + perf inject --aslr -i "${data}" -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Callchain ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Callchain ASLR test [Failed - could not find remapped address]" Hi Ian, This test fails on Arm. I believe it's because on Arm we request the link register to be sampled with frame pointer unwinds. Then the aslr tool drops all the samples because it sees that user regs were sampled: /* TODO: can this be less conservative? */ pr_debug("Dropping regs user sample as possible ASLR leak\n"); ret = 0; goto out_put; I think maybe that comment is onto something. Perhaps the user regs can be zeroed instead of dropping the sample. Then the frame pointer unwind will still work on Arm and the aslr test will pass. We just won't be able to use the link register to add the leaf frame caller, but that's not a big deal. James > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Callchain ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + # Extract callchain addresses (indented lines starting with hex addresses) > + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > + > + if [ -z "$orig_callchain" ]; then > + echo "Callchain ASLR test [Failed - no callchain samples in original file]" > + err=1 > + elif [ -z "$new_callchain" ]; then > + echo "Callchain ASLR test [Failed - callchain data was dropped]" > + err=1 > + elif [ "$orig_callchain" = "$new_callchain" ]; then > + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" > + err=1 > + else > + echo "Callchain ASLR test [Success]" > + fi > + fi > +} > + > +test_report_aslr() { > + echo "Test perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + # Use -b to inject build-ids and force ordered events processing in both > + perf inject -b -i "${data}" -o "${data_clean}" > + perf inject -v -b --aslr -i "${data}" -o "${data2}" > + > + local report1="${temp_dir}/report1" > + local report2="${temp_dir}/report2" > + local report1_clean="${temp_dir}/report1.clean" > + local report2_clean="${temp_dir}/report2.clean" > + local diff_file="${temp_dir}/diff" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Report ASLR test [Success]" > + fi > +} > + > +test_pipe_report_aslr() { > + echo "Test pipe mode perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + # Use tee to save the original pipe data, then process it with inject -b > + perf record -e task-clock:u -o - ${prog} | \ > + tee "${data}" | \ > + perf inject -b --aslr -o "${data2}" > + perf inject -b -i "${data}" -o "${data_clean}" > + > + local report1="${temp_dir}/report1" > + local report2="${temp_dir}/report2" > + local report1_clean="${temp_dir}/report1.clean" > + local report2_clean="${temp_dir}/report2.clean" > + local diff_file="${temp_dir}/diff" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Pipe Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Pipe Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Pipe Report ASLR test [Success]" > + fi > +} > + > +test_pipe_out_report_aslr() { > + echo "Test pipe output mode perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + perf inject -b -i "${data}" -o "${data_clean}" > + > + local report1="${temp_dir}/report1" > + local report2="${temp_dir}/report2" > + local report1_clean="${temp_dir}/report1.clean" > + local report2_clean="${temp_dir}/report2.clean" > + local diff_file="${temp_dir}/diff" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Pipe Output Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Pipe Output Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Pipe Output Report ASLR test [Success]" > + fi > +} > + > +test_dropped_samples() { > + echo "Test dropped samples (phys-data)" > + local data > + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") > + > + # Check if --phys-data is supported by recording a short run > + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then > + echo "Skipping dropped samples test as --phys-data is not supported" > + return > + fi > + > + perf record -e task-clock:u --phys-data -o "${data}" ${prog} > + perf inject --aslr -i "${data}" -o "${data2}" > + > + # Verify that the original file actually contained samples! > + orig_samples=$(perf script -i "${data}" | wc -l) > + if [ "$orig_samples" -eq 0 ]; then > + echo "Dropped samples test [Failed - no samples in original file]" > + err=1 > + else > + # Verify that samples are dropped. > + samples_count=$(perf script -i "${data2}" | wc -l) > + > + if [ "$samples_count" -gt 0 ]; then > + echo "Dropped samples test [Failed - samples were not dropped]" > + err=1 > + else > + echo "Dropped samples test [Success]" > + fi > + fi > +} > + > +test_kernel_aslr() { > + echo "Test kernel ASLR remapping" > + local kdata > + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") > + local kdata2 > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") > + local log_file > + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") > + > + # Try to record kernel samples > + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then > + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" > + return > + fi > + > + # Check for warning about kernel map restriction > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" > + return > + fi > + > + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" > + > + # Check if kernel addresses are remapped. > + # Find the field that ends with :k: (the event name) and take the next field! > + orig_addr=$(perf script -i "${kdata}" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<NF; i++) { > + if ($i ~ /:[k]+:?$/) { > + if (!found) { > + print $(i+1) > + found=1 > + } > + } > + } > + }') > + new_addr=$(perf script -i "${kdata2}" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<NF; i++) { > + if ($i ~ /:[k]+:?$/) { > + if (!found) { > + print $(i+1) > + found=1 > + } > + } > + } > + }') > + > + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Kernel ASLR test [Failed - no kernel samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Kernel ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Kernel ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Kernel ASLR test [Success]" > + fi > +} > + > +test_kernel_report_aslr() { > + echo "Test kernel perf report consistency" > + local kdata > + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") > + local kdata2 > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + local log_file > + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") > + > + # Try to record kernel samples > + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then > + echo "Skipping kernel report test as recording failed (maybe no permissions)" > + return > + fi > + > + # Check for warning about kernel map restriction > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" > + return > + fi > + > + # Use -b to inject build-ids and force ordered events processing in both > + perf inject -b -i "${kdata}" -o "${data_clean}" > + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" > + > + local report1="${temp_dir}/report_kernel1" > + local report2="${temp_dir}/report_kernel2" > + local report1_clean="${temp_dir}/report_kernel1.clean" > + local report2_clean="${temp_dir}/report_kernel2.clean" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${kdata2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true > + > + # Normalize kernel DSOs and addresses in clean reports > + # This allows kernel modules to be either a module or kernel.kallsyms > + local report1_norm="${temp_dir}/report_kernel1.norm" > + local report2_norm="${temp_dir}/report_kernel2.norm" > + local diff_file="${temp_dir}/diff_kernel" > + > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | sort > "${report1_norm}" || true > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | sort > "${report2_norm}" || true > + > + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true > + > + if [ ! -s "${report1_norm}" ]; then > + echo "Kernel Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Kernel Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Kernel Report ASLR test [Success]" > + fi > +} > + > +test_basic_aslr > +test_pipe_aslr > +test_callchain_aslr > +test_report_aslr > +test_pipe_report_aslr > +test_pipe_out_report_aslr > +test_dropped_samples > +test_kernel_aslr > +test_kernel_report_aslr > + > +cleanup > +exit $err ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v5 5/5] perf test: Add inject ASLR test 2026-05-07 15:58 ` James Clark @ 2026-05-07 16:17 ` Ian Rogers 2026-05-08 10:42 ` James Clark 0 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-07 16:17 UTC (permalink / raw) To: James Clark Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, gmx, namhyung On Thu, May 7, 2026 at 8:58 AM James Clark <james.clark@linaro.org> wrote: > > > > On 06/05/2026 1:45 am, Ian Rogers wrote: > > Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` > > feature. The test covers: > > - Basic address remapping for user space samples. > > - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. > > - Callchain address remapping. > > - Consistency of `perf report` output before and after injection. > > - Pipe mode report consistency. > > - Dropping of samples that leak ASLR info (physical addresses). > > - Kernel address remapping (skipping gracefully if permissions restrict > > recording the kernel map). > > - Kernel report consistency with address normalization. > > > > The test suite is hardened with global 'set -o pipefail' assertions to catch > > pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, > > and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout > > streams. > > > > Assisted-by: Gemini-CLI:Google Gemini 3 > > Signed-off-by: Ian Rogers <irogers@google.com> > > --- > > v5: Harden test suite verification pipelines by upgrading report checks to > > strict sorted line-by-line diff comparisons to accommodate remapped pointer > > shifts. Append || true fallback operators to grep-v filtering pipelines to > > prevent the shell test from spuriously aborting under set -o pipefail on > > empty inputs, ensuring graceful failure checks trigger correctly. > > v4: Reorder set -e/pipefail to prevent temp file leakage in root directory on > > unprivileged record failures when run as root. Ensure grep report filters > > have || true suffixes to avoid aborts under pipefail. Add comprehensive > > pipe stdout injection attributes validation case. > > v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, callchain empty > > data asserts, baseline sample verification, and grep report abort > > protections. Reorder set -e/pipefail to prevent stack leaks in mktemp > > failures. > > v2: Add sum comparison for kernel overhead and 32-bit math corrections. Add > > awk with gsub for trailing dots and brackets normalizations. Trap EXIT, > > prevent race conditions and avoid hardcoded perf binary. > > --- > > tools/perf/tests/shell/inject_aslr.sh | 459 ++++++++++++++++++++++++++ > > 1 file changed, 459 insertions(+) > > create mode 100755 tools/perf/tests/shell/inject_aslr.sh > > > > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > > new file mode 100755 > > index 000000000000..cdc3aa94de63 > > --- /dev/null > > +++ b/tools/perf/tests/shell/inject_aslr.sh > > @@ -0,0 +1,459 @@ > > +#!/bin/bash > > +# perf inject --aslr test > > +# SPDX-License-Identifier: GPL-2.0 > > + > > +set -e > > +set -o pipefail > > + > > +shelldir=$(dirname "$0") > > +# shellcheck source=lib/perf_has_symbol.sh > > +. "${shelldir}"/lib/perf_has_symbol.sh > > + > > +sym="noploop" > > + > > +skip_test_missing_symbol ${sym} > > + > > +# Create global temp directory > > +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) > > +data="${temp_dir}/perf.data" > > +data2="${temp_dir}/perf.data2" > > + > > +prog="perf test -w noploop" > > +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" > > +err=0 > > + > > + > > + > > +cleanup() { > > + # Check if temp_dir is set and looks sane before removing > > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > > + rm -rf "${temp_dir}" > > + fi > > +} > > + > > +trap_cleanup() { > > + cleanup > > + exit 1 > > +} > > + > > +trap cleanup EXIT > > +trap trap_cleanup TERM INT > > + > > +get_noploop_addr() { > > + local file=$1 > > + perf script -i "$file" | awk ' > > + BEGIN { found=0 } > > + { > > + for (i=1; i<=NF; i++) { > > + if ($i ~ /noploop\+/) { > > + if (!found) { > > + print $(i-1) > > + found=1 > > + } > > + } > > + } > > + }' > > +} > > + > > +test_basic_aslr() { > > + echo "Test basic ASLR remapping" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") > > + > > + perf record -e task-clock:u -o "${data}" ${prog} > > + perf inject -v --aslr -i "${data}" -o "${data2}" > > + > > + orig_addr=$(get_noploop_addr "${data}") > > + new_addr=$(get_noploop_addr "${data2}") > > + > > + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > > + > > + if [ -z "$orig_addr" ]; then > > + echo "Basic ASLR test [Failed - no noploop samples in original file]" > > + err=1 > > + elif [ -z "$new_addr" ]; then > > + echo "Basic ASLR test [Failed - could not find remapped address]" > > + err=1 > > + elif [ "$orig_addr" = "$new_addr" ]; then > > + echo "Basic ASLR test [Failed - addresses are not remapped]" > > + err=1 > > + else > > + echo "Basic ASLR test [Success]" > > + fi > > +} > > + > > +test_pipe_aslr() { > > + echo "Test pipe mode ASLR remapping" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") > > + > > + # Use tee to save the original pipe data for comparison > > + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" > > + > > + orig_addr=$(get_noploop_addr "${data}") > > + new_addr=$(get_noploop_addr "${data2}") > > + > > + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > > + > > + if [ -z "$orig_addr" ]; then > > + echo "Pipe ASLR test [Failed - no noploop samples in original file]" > > + err=1 > > + elif [ -z "$new_addr" ]; then > > + echo "Pipe ASLR test [Failed - could not find remapped address]" > > + err=1 > > + elif [ "$orig_addr" = "$new_addr" ]; then > > + echo "Pipe ASLR test [Failed - addresses are not remapped]" > > + err=1 > > + else > > + echo "Pipe ASLR test [Success]" > > + fi > > +} > > + > > +test_callchain_aslr() { > > + echo "Test Callchain ASLR remapping" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") > > + > > + perf record -g -e task-clock:u -o "${data}" ${prog} > > + perf inject --aslr -i "${data}" -o "${data2}" > > + > > + orig_addr=$(get_noploop_addr "${data}") > > + new_addr=$(get_noploop_addr "${data2}") > > + > > + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > > + > > + if [ -z "$orig_addr" ]; then > > + echo "Callchain ASLR test [Failed - no noploop samples in original file]" > > + err=1 > > + elif [ -z "$new_addr" ]; then > > + echo "Callchain ASLR test [Failed - could not find remapped address]" > > Hi Ian, > > This test fails on Arm. I believe it's because on Arm we request the > link register to be sampled with frame pointer unwinds. Then the aslr > tool drops all the samples because it sees that user regs were sampled: > > /* TODO: can this be less conservative? */ > pr_debug("Dropping regs user sample as possible ASLR leak\n"); > ret = 0; > goto out_put; > > I think maybe that comment is onto something. Perhaps the user regs can > be zeroed instead of dropping the sample. Then the frame pointer unwind > will still work on Arm and the aslr test will pass. We just won't be > able to use the link register to add the leaf frame caller, but that's > not a big deal. Thanks James. I'm working on a new version of the patches, but I'm having delays getting the AI to approve the changes. ARM does what? Ah, I knew this and also it didn't really register. I'm wondering now if we can put the machinery behind "EM_HOST == EM_AARCH64": https://lore.kernel.org/all/20211217154521.80603-2-german.gomez@arm.com/ as it seems a mechanism that would benefit other architectures such as ARM32 :-) And I have my mission to make tools/perf/arch disappear as much as is humanly possible. I also imagine the problem the link register solves for perf happens for BPF, so perhaps this ability shouldn't be encouraged. I think rather than zeroing the register values it would be better to just remove them from the output events. I'll try to add that support as having this test break on ARM isn't desirable. Thanks, Ian > James > > > + err=1 > > + elif [ "$orig_addr" = "$new_addr" ]; then > > + echo "Callchain ASLR test [Failed - addresses are not remapped]" > > + err=1 > > + else > > + # Extract callchain addresses (indented lines starting with hex addresses) > > + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > > + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > > + > > + if [ -z "$orig_callchain" ]; then > > + echo "Callchain ASLR test [Failed - no callchain samples in original file]" > > + err=1 > > + elif [ -z "$new_callchain" ]; then > > + echo "Callchain ASLR test [Failed - callchain data was dropped]" > > + err=1 > > + elif [ "$orig_callchain" = "$new_callchain" ]; then > > + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" > > + err=1 > > + else > > + echo "Callchain ASLR test [Success]" > > + fi > > + fi > > +} > > + > > +test_report_aslr() { > > + echo "Test perf report consistency" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") > > + local data_clean > > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > > + > > + perf record -e task-clock:u -o "${data}" ${prog} > > + # Use -b to inject build-ids and force ordered events processing in both > > + perf inject -b -i "${data}" -o "${data_clean}" > > + perf inject -v -b --aslr -i "${data}" -o "${data2}" > > + > > + local report1="${temp_dir}/report1" > > + local report2="${temp_dir}/report2" > > + local report1_clean="${temp_dir}/report1.clean" > > + local report2_clean="${temp_dir}/report2.clean" > > + local diff_file="${temp_dir}/diff" > > + > > + perf report -i "${data_clean}" --stdio > "${report1}" > > + perf report -i "${data2}" --stdio > "${report2}" > > + > > + # Strip headers and compare lines with percentages > > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > > + > > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_clean}" ]; then > > + echo "Report ASLR test [Failed - no samples captured]" > > + err=1 > > + elif [ -s "${diff_file}" ]; then > > + echo "Report ASLR test [Failed - reports differ]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + else > > + echo "Report ASLR test [Success]" > > + fi > > +} > > + > > +test_pipe_report_aslr() { > > + echo "Test pipe mode perf report consistency" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") > > + local data_clean > > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > > + > > + # Use tee to save the original pipe data, then process it with inject -b > > + perf record -e task-clock:u -o - ${prog} | \ > > + tee "${data}" | \ > > + perf inject -b --aslr -o "${data2}" > > + perf inject -b -i "${data}" -o "${data_clean}" > > + > > + local report1="${temp_dir}/report1" > > + local report2="${temp_dir}/report2" > > + local report1_clean="${temp_dir}/report1.clean" > > + local report2_clean="${temp_dir}/report2.clean" > > + local diff_file="${temp_dir}/diff" > > + > > + perf report -i "${data_clean}" --stdio > "${report1}" > > + perf report -i "${data2}" --stdio > "${report2}" > > + > > + # Strip headers and compare lines with percentages > > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > > + > > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_clean}" ]; then > > + echo "Pipe Report ASLR test [Failed - no samples captured]" > > + err=1 > > + elif [ -s "${diff_file}" ]; then > > + echo "Pipe Report ASLR test [Failed - reports differ]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + else > > + echo "Pipe Report ASLR test [Success]" > > + fi > > +} > > + > > +test_pipe_out_report_aslr() { > > + echo "Test pipe output mode perf report consistency" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") > > + local data_clean > > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > > + > > + perf record -e task-clock:u -o "${data}" ${prog} > > + perf inject -b -i "${data}" -o "${data_clean}" > > + > > + local report1="${temp_dir}/report1" > > + local report2="${temp_dir}/report2" > > + local report1_clean="${temp_dir}/report1.clean" > > + local report2_clean="${temp_dir}/report2.clean" > > + local diff_file="${temp_dir}/diff" > > + > > + perf report -i "${data_clean}" --stdio > "${report1}" > > + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" > > + > > + # Strip headers and compare lines with percentages > > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > > + > > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_clean}" ]; then > > + echo "Pipe Output Report ASLR test [Failed - no samples captured]" > > + err=1 > > + elif [ -s "${diff_file}" ]; then > > + echo "Pipe Output Report ASLR test [Failed - reports differ]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + else > > + echo "Pipe Output Report ASLR test [Success]" > > + fi > > +} > > + > > +test_dropped_samples() { > > + echo "Test dropped samples (phys-data)" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") > > + > > + # Check if --phys-data is supported by recording a short run > > + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then > > + echo "Skipping dropped samples test as --phys-data is not supported" > > + return > > + fi > > + > > + perf record -e task-clock:u --phys-data -o "${data}" ${prog} > > + perf inject --aslr -i "${data}" -o "${data2}" > > + > > + # Verify that the original file actually contained samples! > > + orig_samples=$(perf script -i "${data}" | wc -l) > > + if [ "$orig_samples" -eq 0 ]; then > > + echo "Dropped samples test [Failed - no samples in original file]" > > + err=1 > > + else > > + # Verify that samples are dropped. > > + samples_count=$(perf script -i "${data2}" | wc -l) > > + > > + if [ "$samples_count" -gt 0 ]; then > > + echo "Dropped samples test [Failed - samples were not dropped]" > > + err=1 > > + else > > + echo "Dropped samples test [Success]" > > + fi > > + fi > > +} > > + > > +test_kernel_aslr() { > > + echo "Test kernel ASLR remapping" > > + local kdata > > + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") > > + local kdata2 > > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") > > + local log_file > > + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") > > + > > + # Try to record kernel samples > > + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then > > + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" > > + return > > + fi > > + > > + # Check for warning about kernel map restriction > > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > > + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" > > + return > > + fi > > + > > + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" > > + > > + # Check if kernel addresses are remapped. > > + # Find the field that ends with :k: (the event name) and take the next field! > > + orig_addr=$(perf script -i "${kdata}" | awk ' > > + BEGIN { found=0 } > > + { > > + for (i=1; i<NF; i++) { > > + if ($i ~ /:[k]+:?$/) { > > + if (!found) { > > + print $(i+1) > > + found=1 > > + } > > + } > > + } > > + }') > > + new_addr=$(perf script -i "${kdata2}" | awk ' > > + BEGIN { found=0 } > > + { > > + for (i=1; i<NF; i++) { > > + if ($i ~ /:[k]+:?$/) { > > + if (!found) { > > + print $(i+1) > > + found=1 > > + } > > + } > > + } > > + }') > > + > > + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > > + > > + if [ -z "$orig_addr" ]; then > > + echo "Kernel ASLR test [Failed - no kernel samples in original file]" > > + err=1 > > + elif [ -z "$new_addr" ]; then > > + echo "Kernel ASLR test [Failed - could not find remapped address]" > > + err=1 > > + elif [ "$orig_addr" = "$new_addr" ]; then > > + echo "Kernel ASLR test [Failed - addresses are not remapped]" > > + err=1 > > + else > > + echo "Kernel ASLR test [Success]" > > + fi > > +} > > + > > +test_kernel_report_aslr() { > > + echo "Test kernel perf report consistency" > > + local kdata > > + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") > > + local kdata2 > > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") > > + local data_clean > > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > > + local log_file > > + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") > > + > > + # Try to record kernel samples > > + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then > > + echo "Skipping kernel report test as recording failed (maybe no permissions)" > > + return > > + fi > > + > > + # Check for warning about kernel map restriction > > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > > + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" > > + return > > + fi > > + > > + # Use -b to inject build-ids and force ordered events processing in both > > + perf inject -b -i "${kdata}" -o "${data_clean}" > > + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" > > + > > + local report1="${temp_dir}/report_kernel1" > > + local report2="${temp_dir}/report_kernel2" > > + local report1_clean="${temp_dir}/report_kernel1.clean" > > + local report2_clean="${temp_dir}/report_kernel2.clean" > > + > > + perf report -i "${data_clean}" --stdio > "${report1}" > > + perf report -i "${kdata2}" --stdio > "${report2}" > > + > > + # Strip headers and compare lines with percentages > > + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true > > + > > + # Normalize kernel DSOs and addresses in clean reports > > + # This allows kernel modules to be either a module or kernel.kallsyms > > + local report1_norm="${temp_dir}/report_kernel1.norm" > > + local report2_norm="${temp_dir}/report_kernel2.norm" > > + local diff_file="${temp_dir}/diff_kernel" > > + > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ > > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | sort > "${report1_norm}" || true > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ > > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | sort > "${report2_norm}" || true > > + > > + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_norm}" ]; then > > + echo "Kernel Report ASLR test [Failed - no samples captured]" > > + err=1 > > + elif [ -s "${diff_file}" ]; then > > + echo "Kernel Report ASLR test [Failed - reports differ]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + else > > + echo "Kernel Report ASLR test [Success]" > > + fi > > +} > > + > > +test_basic_aslr > > +test_pipe_aslr > > +test_callchain_aslr > > +test_report_aslr > > +test_pipe_report_aslr > > +test_pipe_out_report_aslr > > +test_dropped_samples > > +test_kernel_aslr > > +test_kernel_report_aslr > > + > > +cleanup > > +exit $err > ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v5 5/5] perf test: Add inject ASLR test 2026-05-07 16:17 ` Ian Rogers @ 2026-05-08 10:42 ` James Clark 2026-05-08 10:49 ` James Clark 0 siblings, 1 reply; 183+ messages in thread From: James Clark @ 2026-05-08 10:42 UTC (permalink / raw) To: Ian Rogers Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, gmx, namhyung On 07/05/2026 5:17 pm, Ian Rogers wrote: > On Thu, May 7, 2026 at 8:58 AM James Clark <james.clark@linaro.org> wrote: >> >> >> >> On 06/05/2026 1:45 am, Ian Rogers wrote: >>> Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` >>> feature. The test covers: >>> - Basic address remapping for user space samples. >>> - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. >>> - Callchain address remapping. >>> - Consistency of `perf report` output before and after injection. >>> - Pipe mode report consistency. >>> - Dropping of samples that leak ASLR info (physical addresses). >>> - Kernel address remapping (skipping gracefully if permissions restrict >>> recording the kernel map). >>> - Kernel report consistency with address normalization. >>> >>> The test suite is hardened with global 'set -o pipefail' assertions to catch >>> pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, >>> and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout >>> streams. >>> >>> Assisted-by: Gemini-CLI:Google Gemini 3 >>> Signed-off-by: Ian Rogers <irogers@google.com> >>> --- >>> v5: Harden test suite verification pipelines by upgrading report checks to >>> strict sorted line-by-line diff comparisons to accommodate remapped pointer >>> shifts. Append || true fallback operators to grep-v filtering pipelines to >>> prevent the shell test from spuriously aborting under set -o pipefail on >>> empty inputs, ensuring graceful failure checks trigger correctly. >>> v4: Reorder set -e/pipefail to prevent temp file leakage in root directory on >>> unprivileged record failures when run as root. Ensure grep report filters >>> have || true suffixes to avoid aborts under pipefail. Add comprehensive >>> pipe stdout injection attributes validation case. >>> v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, callchain empty >>> data asserts, baseline sample verification, and grep report abort >>> protections. Reorder set -e/pipefail to prevent stack leaks in mktemp >>> failures. >>> v2: Add sum comparison for kernel overhead and 32-bit math corrections. Add >>> awk with gsub for trailing dots and brackets normalizations. Trap EXIT, >>> prevent race conditions and avoid hardcoded perf binary. >>> --- >>> tools/perf/tests/shell/inject_aslr.sh | 459 ++++++++++++++++++++++++++ >>> 1 file changed, 459 insertions(+) >>> create mode 100755 tools/perf/tests/shell/inject_aslr.sh >>> >>> diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh >>> new file mode 100755 >>> index 000000000000..cdc3aa94de63 >>> --- /dev/null >>> +++ b/tools/perf/tests/shell/inject_aslr.sh >>> @@ -0,0 +1,459 @@ >>> +#!/bin/bash >>> +# perf inject --aslr test >>> +# SPDX-License-Identifier: GPL-2.0 >>> + >>> +set -e >>> +set -o pipefail >>> + >>> +shelldir=$(dirname "$0") >>> +# shellcheck source=lib/perf_has_symbol.sh >>> +. "${shelldir}"/lib/perf_has_symbol.sh >>> + >>> +sym="noploop" >>> + >>> +skip_test_missing_symbol ${sym} >>> + >>> +# Create global temp directory >>> +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) >>> +data="${temp_dir}/perf.data" >>> +data2="${temp_dir}/perf.data2" >>> + >>> +prog="perf test -w noploop" >>> +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" >>> +err=0 >>> + >>> + >>> + >>> +cleanup() { >>> + # Check if temp_dir is set and looks sane before removing >>> + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then >>> + rm -rf "${temp_dir}" >>> + fi >>> +} >>> + >>> +trap_cleanup() { >>> + cleanup >>> + exit 1 >>> +} >>> + >>> +trap cleanup EXIT >>> +trap trap_cleanup TERM INT >>> + >>> +get_noploop_addr() { >>> + local file=$1 >>> + perf script -i "$file" | awk ' >>> + BEGIN { found=0 } >>> + { >>> + for (i=1; i<=NF; i++) { >>> + if ($i ~ /noploop\+/) { >>> + if (!found) { >>> + print $(i-1) >>> + found=1 >>> + } >>> + } >>> + } >>> + }' >>> +} >>> + >>> +test_basic_aslr() { >>> + echo "Test basic ASLR remapping" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") >>> + >>> + perf record -e task-clock:u -o "${data}" ${prog} >>> + perf inject -v --aslr -i "${data}" -o "${data2}" >>> + >>> + orig_addr=$(get_noploop_addr "${data}") >>> + new_addr=$(get_noploop_addr "${data2}") >>> + >>> + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>> + >>> + if [ -z "$orig_addr" ]; then >>> + echo "Basic ASLR test [Failed - no noploop samples in original file]" >>> + err=1 >>> + elif [ -z "$new_addr" ]; then >>> + echo "Basic ASLR test [Failed - could not find remapped address]" >>> + err=1 >>> + elif [ "$orig_addr" = "$new_addr" ]; then >>> + echo "Basic ASLR test [Failed - addresses are not remapped]" >>> + err=1 >>> + else >>> + echo "Basic ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_pipe_aslr() { >>> + echo "Test pipe mode ASLR remapping" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") >>> + >>> + # Use tee to save the original pipe data for comparison >>> + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" >>> + >>> + orig_addr=$(get_noploop_addr "${data}") >>> + new_addr=$(get_noploop_addr "${data2}") >>> + >>> + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>> + >>> + if [ -z "$orig_addr" ]; then >>> + echo "Pipe ASLR test [Failed - no noploop samples in original file]" >>> + err=1 >>> + elif [ -z "$new_addr" ]; then >>> + echo "Pipe ASLR test [Failed - could not find remapped address]" >>> + err=1 >>> + elif [ "$orig_addr" = "$new_addr" ]; then >>> + echo "Pipe ASLR test [Failed - addresses are not remapped]" >>> + err=1 >>> + else >>> + echo "Pipe ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_callchain_aslr() { >>> + echo "Test Callchain ASLR remapping" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") >>> + >>> + perf record -g -e task-clock:u -o "${data}" ${prog} >>> + perf inject --aslr -i "${data}" -o "${data2}" >>> + >>> + orig_addr=$(get_noploop_addr "${data}") >>> + new_addr=$(get_noploop_addr "${data2}") >>> + >>> + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>> + >>> + if [ -z "$orig_addr" ]; then >>> + echo "Callchain ASLR test [Failed - no noploop samples in original file]" >>> + err=1 >>> + elif [ -z "$new_addr" ]; then >>> + echo "Callchain ASLR test [Failed - could not find remapped address]" >> >> Hi Ian, >> >> This test fails on Arm. I believe it's because on Arm we request the >> link register to be sampled with frame pointer unwinds. Then the aslr >> tool drops all the samples because it sees that user regs were sampled: >> >> /* TODO: can this be less conservative? */ >> pr_debug("Dropping regs user sample as possible ASLR leak\n"); >> ret = 0; >> goto out_put; >> >> I think maybe that comment is onto something. Perhaps the user regs can >> be zeroed instead of dropping the sample. Then the frame pointer unwind >> will still work on Arm and the aslr test will pass. We just won't be >> able to use the link register to add the leaf frame caller, but that's >> not a big deal. > > Thanks James. I'm working on a new version of the patches, but I'm > having delays getting the AI to approve the changes. > > ARM does what? Ah, I knew this and also it didn't really register. I'm > wondering now if we can put the machinery behind "EM_HOST == > EM_AARCH64": > https://lore.kernel.org/all/20211217154521.80603-2-german.gomez@arm.com/ > as it seems a mechanism that would benefit other architectures such as > ARM32 :-) And I have my mission to make tools/perf/arch disappear as > much as is humanly possible. Yeah that makes sense, the change you sent looks good. > I also imagine the problem the link register solves for perf happens > for BPF, so perhaps this ability shouldn't be encouraged. Not sure what you mean by this, do you mean adding the link register shoudln't be encouraged, or the compiler dropping the stack frame? Or just the weak function style? > > I think rather than zeroing the register values it would be better to > just remove them from the output events. I'll try to add that support > as having this test break on ARM isn't desirable. > Makes sense too. I suppose data being there but zeroed could be slightly more confusing than just dropping the sample. I don't know if modifying the sample type to remove PERF_SAMPLE_REGS_USER and emitting the rest could be an option? It might be more robust to cases when things are auto added to the sample by Perf. For example all the aux stuff has custom setup functions that add who knows what options to the events. > Thanks, > Ian > >> James >> >>> + err=1 >>> + elif [ "$orig_addr" = "$new_addr" ]; then >>> + echo "Callchain ASLR test [Failed - addresses are not remapped]" >>> + err=1 >>> + else >>> + # Extract callchain addresses (indented lines starting with hex addresses) >>> + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') >>> + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') >>> + >>> + if [ -z "$orig_callchain" ]; then >>> + echo "Callchain ASLR test [Failed - no callchain samples in original file]" >>> + err=1 >>> + elif [ -z "$new_callchain" ]; then >>> + echo "Callchain ASLR test [Failed - callchain data was dropped]" >>> + err=1 >>> + elif [ "$orig_callchain" = "$new_callchain" ]; then >>> + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" >>> + err=1 >>> + else >>> + echo "Callchain ASLR test [Success]" >>> + fi >>> + fi >>> +} >>> + >>> +test_report_aslr() { >>> + echo "Test perf report consistency" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") >>> + local data_clean >>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>> + >>> + perf record -e task-clock:u -o "${data}" ${prog} >>> + # Use -b to inject build-ids and force ordered events processing in both >>> + perf inject -b -i "${data}" -o "${data_clean}" >>> + perf inject -v -b --aslr -i "${data}" -o "${data2}" >>> + >>> + local report1="${temp_dir}/report1" >>> + local report2="${temp_dir}/report2" >>> + local report1_clean="${temp_dir}/report1.clean" >>> + local report2_clean="${temp_dir}/report2.clean" >>> + local diff_file="${temp_dir}/diff" >>> + >>> + perf report -i "${data_clean}" --stdio > "${report1}" >>> + perf report -i "${data2}" --stdio > "${report2}" >>> + >>> + # Strip headers and compare lines with percentages >>> + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true >>> + >>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_clean}" ]; then >>> + echo "Report ASLR test [Failed - no samples captured]" >>> + err=1 >>> + elif [ -s "${diff_file}" ]; then >>> + echo "Report ASLR test [Failed - reports differ]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + else >>> + echo "Report ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_pipe_report_aslr() { >>> + echo "Test pipe mode perf report consistency" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") >>> + local data_clean >>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>> + >>> + # Use tee to save the original pipe data, then process it with inject -b >>> + perf record -e task-clock:u -o - ${prog} | \ >>> + tee "${data}" | \ >>> + perf inject -b --aslr -o "${data2}" >>> + perf inject -b -i "${data}" -o "${data_clean}" >>> + >>> + local report1="${temp_dir}/report1" >>> + local report2="${temp_dir}/report2" >>> + local report1_clean="${temp_dir}/report1.clean" >>> + local report2_clean="${temp_dir}/report2.clean" >>> + local diff_file="${temp_dir}/diff" >>> + >>> + perf report -i "${data_clean}" --stdio > "${report1}" >>> + perf report -i "${data2}" --stdio > "${report2}" >>> + >>> + # Strip headers and compare lines with percentages >>> + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true >>> + >>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_clean}" ]; then >>> + echo "Pipe Report ASLR test [Failed - no samples captured]" >>> + err=1 >>> + elif [ -s "${diff_file}" ]; then >>> + echo "Pipe Report ASLR test [Failed - reports differ]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + else >>> + echo "Pipe Report ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_pipe_out_report_aslr() { >>> + echo "Test pipe output mode perf report consistency" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") >>> + local data_clean >>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>> + >>> + perf record -e task-clock:u -o "${data}" ${prog} >>> + perf inject -b -i "${data}" -o "${data_clean}" >>> + >>> + local report1="${temp_dir}/report1" >>> + local report2="${temp_dir}/report2" >>> + local report1_clean="${temp_dir}/report1.clean" >>> + local report2_clean="${temp_dir}/report2.clean" >>> + local diff_file="${temp_dir}/diff" >>> + >>> + perf report -i "${data_clean}" --stdio > "${report1}" >>> + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" >>> + >>> + # Strip headers and compare lines with percentages >>> + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true >>> + >>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_clean}" ]; then >>> + echo "Pipe Output Report ASLR test [Failed - no samples captured]" >>> + err=1 >>> + elif [ -s "${diff_file}" ]; then >>> + echo "Pipe Output Report ASLR test [Failed - reports differ]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + else >>> + echo "Pipe Output Report ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_dropped_samples() { >>> + echo "Test dropped samples (phys-data)" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") >>> + >>> + # Check if --phys-data is supported by recording a short run >>> + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then >>> + echo "Skipping dropped samples test as --phys-data is not supported" >>> + return >>> + fi >>> + >>> + perf record -e task-clock:u --phys-data -o "${data}" ${prog} >>> + perf inject --aslr -i "${data}" -o "${data2}" >>> + >>> + # Verify that the original file actually contained samples! >>> + orig_samples=$(perf script -i "${data}" | wc -l) >>> + if [ "$orig_samples" -eq 0 ]; then >>> + echo "Dropped samples test [Failed - no samples in original file]" >>> + err=1 >>> + else >>> + # Verify that samples are dropped. >>> + samples_count=$(perf script -i "${data2}" | wc -l) >>> + >>> + if [ "$samples_count" -gt 0 ]; then >>> + echo "Dropped samples test [Failed - samples were not dropped]" >>> + err=1 >>> + else >>> + echo "Dropped samples test [Success]" >>> + fi >>> + fi >>> +} >>> + >>> +test_kernel_aslr() { >>> + echo "Test kernel ASLR remapping" >>> + local kdata >>> + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") >>> + local kdata2 >>> + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") >>> + local log_file >>> + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") >>> + >>> + # Try to record kernel samples >>> + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then >>> + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" >>> + return >>> + fi >>> + >>> + # Check for warning about kernel map restriction >>> + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then >>> + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" >>> + return >>> + fi >>> + >>> + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" >>> + >>> + # Check if kernel addresses are remapped. >>> + # Find the field that ends with :k: (the event name) and take the next field! >>> + orig_addr=$(perf script -i "${kdata}" | awk ' >>> + BEGIN { found=0 } >>> + { >>> + for (i=1; i<NF; i++) { >>> + if ($i ~ /:[k]+:?$/) { >>> + if (!found) { >>> + print $(i+1) >>> + found=1 >>> + } >>> + } >>> + } >>> + }') >>> + new_addr=$(perf script -i "${kdata2}" | awk ' >>> + BEGIN { found=0 } >>> + { >>> + for (i=1; i<NF; i++) { >>> + if ($i ~ /:[k]+:?$/) { >>> + if (!found) { >>> + print $(i+1) >>> + found=1 >>> + } >>> + } >>> + } >>> + }') >>> + >>> + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>> + >>> + if [ -z "$orig_addr" ]; then >>> + echo "Kernel ASLR test [Failed - no kernel samples in original file]" >>> + err=1 >>> + elif [ -z "$new_addr" ]; then >>> + echo "Kernel ASLR test [Failed - could not find remapped address]" >>> + err=1 >>> + elif [ "$orig_addr" = "$new_addr" ]; then >>> + echo "Kernel ASLR test [Failed - addresses are not remapped]" >>> + err=1 >>> + else >>> + echo "Kernel ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_kernel_report_aslr() { >>> + echo "Test kernel perf report consistency" >>> + local kdata >>> + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") >>> + local kdata2 >>> + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") >>> + local data_clean >>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>> + local log_file >>> + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") >>> + >>> + # Try to record kernel samples >>> + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > "${log_file}" 2>&1; then >>> + echo "Skipping kernel report test as recording failed (maybe no permissions)" >>> + return >>> + fi >>> + >>> + # Check for warning about kernel map restriction >>> + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then >>> + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" >>> + return >>> + fi >>> + >>> + # Use -b to inject build-ids and force ordered events processing in both >>> + perf inject -b -i "${kdata}" -o "${data_clean}" >>> + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" >>> + >>> + local report1="${temp_dir}/report_kernel1" >>> + local report2="${temp_dir}/report_kernel2" >>> + local report1_clean="${temp_dir}/report_kernel1.clean" >>> + local report2_clean="${temp_dir}/report_kernel2.clean" >>> + >>> + perf report -i "${data_clean}" --stdio > "${report1}" >>> + perf report -i "${kdata2}" --stdio > "${report2}" >>> + >>> + # Strip headers and compare lines with percentages >>> + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true >>> + >>> + # Normalize kernel DSOs and addresses in clean reports >>> + # This allows kernel modules to be either a module or kernel.kallsyms >>> + local report1_norm="${temp_dir}/report_kernel1.norm" >>> + local report2_norm="${temp_dir}/report_kernel2.norm" >>> + local diff_file="${temp_dir}/diff_kernel" >>> + >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ >>> + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | sort > "${report1_norm}" || true >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ >>> + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | sort > "${report2_norm}" || true >>> + >>> + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_norm}" ]; then >>> + echo "Kernel Report ASLR test [Failed - no samples captured]" >>> + err=1 >>> + elif [ -s "${diff_file}" ]; then >>> + echo "Kernel Report ASLR test [Failed - reports differ]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + else >>> + echo "Kernel Report ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_basic_aslr >>> +test_pipe_aslr >>> +test_callchain_aslr >>> +test_report_aslr >>> +test_pipe_report_aslr >>> +test_pipe_out_report_aslr >>> +test_dropped_samples >>> +test_kernel_aslr >>> +test_kernel_report_aslr >>> + >>> +cleanup >>> +exit $err >> ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v5 5/5] perf test: Add inject ASLR test 2026-05-08 10:42 ` James Clark @ 2026-05-08 10:49 ` James Clark 0 siblings, 0 replies; 183+ messages in thread From: James Clark @ 2026-05-08 10:49 UTC (permalink / raw) To: Ian Rogers Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, gmx, namhyung On 08/05/2026 11:42 am, James Clark wrote: > > > On 07/05/2026 5:17 pm, Ian Rogers wrote: >> On Thu, May 7, 2026 at 8:58 AM James Clark <james.clark@linaro.org> >> wrote: >>> >>> >>> >>> On 06/05/2026 1:45 am, Ian Rogers wrote: >>>> Add a new shell test `inject_aslr.sh` to verify the `perf inject -- >>>> aslr` >>>> feature. The test covers: >>>> - Basic address remapping for user space samples. >>>> - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. >>>> - Callchain address remapping. >>>> - Consistency of `perf report` output before and after injection. >>>> - Pipe mode report consistency. >>>> - Dropping of samples that leak ASLR info (physical addresses). >>>> - Kernel address remapping (skipping gracefully if permissions restrict >>>> recording the kernel map). >>>> - Kernel report consistency with address normalization. >>>> >>>> The test suite is hardened with global 'set -o pipefail' assertions >>>> to catch >>>> pipeline failures, stream-consuming awk processors to handle SIGPIPE >>>> signals, >>>> and a dedicated pipe output scenario validating raw 'perf inject -o >>>> -' stdout >>>> streams. >>>> >>>> Assisted-by: Gemini-CLI:Google Gemini 3 >>>> Signed-off-by: Ian Rogers <irogers@google.com> >>>> --- >>>> v5: Harden test suite verification pipelines by upgrading report >>>> checks to >>>> strict sorted line-by-line diff comparisons to accommodate >>>> remapped pointer >>>> shifts. Append || true fallback operators to grep-v filtering >>>> pipelines to >>>> prevent the shell test from spuriously aborting under set -o >>>> pipefail on >>>> empty inputs, ensuring graceful failure checks trigger correctly. >>>> v4: Reorder set -e/pipefail to prevent temp file leakage in root >>>> directory on >>>> unprivileged record failures when run as root. Ensure grep >>>> report filters >>>> have || true suffixes to avoid aborts under pipefail. Add >>>> comprehensive >>>> pipe stdout injection attributes validation case. >>>> v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, >>>> callchain empty >>>> data asserts, baseline sample verification, and grep report abort >>>> protections. Reorder set -e/pipefail to prevent stack leaks in >>>> mktemp >>>> failures. >>>> v2: Add sum comparison for kernel overhead and 32-bit math >>>> corrections. Add >>>> awk with gsub for trailing dots and brackets normalizations. >>>> Trap EXIT, >>>> prevent race conditions and avoid hardcoded perf binary. >>>> --- >>>> tools/perf/tests/shell/inject_aslr.sh | 459 +++++++++++++++++++++ >>>> +++++ >>>> 1 file changed, 459 insertions(+) >>>> create mode 100755 tools/perf/tests/shell/inject_aslr.sh >>>> >>>> diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/ >>>> tests/shell/inject_aslr.sh >>>> new file mode 100755 >>>> index 000000000000..cdc3aa94de63 >>>> --- /dev/null >>>> +++ b/tools/perf/tests/shell/inject_aslr.sh >>>> @@ -0,0 +1,459 @@ >>>> +#!/bin/bash >>>> +# perf inject --aslr test >>>> +# SPDX-License-Identifier: GPL-2.0 >>>> + >>>> +set -e >>>> +set -o pipefail >>>> + >>>> +shelldir=$(dirname "$0") >>>> +# shellcheck source=lib/perf_has_symbol.sh >>>> +. "${shelldir}"/lib/perf_has_symbol.sh >>>> + >>>> +sym="noploop" >>>> + >>>> +skip_test_missing_symbol ${sym} >>>> + >>>> +# Create global temp directory >>>> +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) >>>> +data="${temp_dir}/perf.data" >>>> +data2="${temp_dir}/perf.data2" >>>> + >>>> +prog="perf test -w noploop" >>>> +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" >>>> +err=0 >>>> + >>>> + >>>> + >>>> +cleanup() { >>>> + # Check if temp_dir is set and looks sane before removing >>>> + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then >>>> + rm -rf "${temp_dir}" >>>> + fi >>>> +} >>>> + >>>> +trap_cleanup() { >>>> + cleanup >>>> + exit 1 >>>> +} >>>> + >>>> +trap cleanup EXIT >>>> +trap trap_cleanup TERM INT >>>> + >>>> +get_noploop_addr() { >>>> + local file=$1 >>>> + perf script -i "$file" | awk ' >>>> + BEGIN { found=0 } >>>> + { >>>> + for (i=1; i<=NF; i++) { >>>> + if ($i ~ /noploop\+/) { >>>> + if (!found) { >>>> + print $(i-1) >>>> + found=1 >>>> + } >>>> + } >>>> + } >>>> + }' >>>> +} >>>> + >>>> +test_basic_aslr() { >>>> + echo "Test basic ASLR remapping" >>>> + local data >>>> + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") >>>> + local data2 >>>> + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") >>>> + >>>> + perf record -e task-clock:u -o "${data}" ${prog} >>>> + perf inject -v --aslr -i "${data}" -o "${data2}" >>>> + >>>> + orig_addr=$(get_noploop_addr "${data}") >>>> + new_addr=$(get_noploop_addr "${data2}") >>>> + >>>> + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>>> + >>>> + if [ -z "$orig_addr" ]; then >>>> + echo "Basic ASLR test [Failed - no noploop samples in original >>>> file]" >>>> + err=1 >>>> + elif [ -z "$new_addr" ]; then >>>> + echo "Basic ASLR test [Failed - could not find remapped address]" >>>> + err=1 >>>> + elif [ "$orig_addr" = "$new_addr" ]; then >>>> + echo "Basic ASLR test [Failed - addresses are not remapped]" >>>> + err=1 >>>> + else >>>> + echo "Basic ASLR test [Success]" >>>> + fi >>>> +} >>>> + >>>> +test_pipe_aslr() { >>>> + echo "Test pipe mode ASLR remapping" >>>> + local data >>>> + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") >>>> + local data2 >>>> + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") >>>> + >>>> + # Use tee to save the original pipe data for comparison >>>> + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf >>>> inject --aslr -o "${data2}" >>>> + >>>> + orig_addr=$(get_noploop_addr "${data}") >>>> + new_addr=$(get_noploop_addr "${data2}") >>>> + >>>> + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>>> + >>>> + if [ -z "$orig_addr" ]; then >>>> + echo "Pipe ASLR test [Failed - no noploop samples in original >>>> file]" >>>> + err=1 >>>> + elif [ -z "$new_addr" ]; then >>>> + echo "Pipe ASLR test [Failed - could not find remapped address]" >>>> + err=1 >>>> + elif [ "$orig_addr" = "$new_addr" ]; then >>>> + echo "Pipe ASLR test [Failed - addresses are not remapped]" >>>> + err=1 >>>> + else >>>> + echo "Pipe ASLR test [Success]" >>>> + fi >>>> +} >>>> + >>>> +test_callchain_aslr() { >>>> + echo "Test Callchain ASLR remapping" >>>> + local data >>>> + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") >>>> + local data2 >>>> + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") >>>> + >>>> + perf record -g -e task-clock:u -o "${data}" ${prog} >>>> + perf inject --aslr -i "${data}" -o "${data2}" >>>> + >>>> + orig_addr=$(get_noploop_addr "${data}") >>>> + new_addr=$(get_noploop_addr "${data2}") >>>> + >>>> + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>>> + >>>> + if [ -z "$orig_addr" ]; then >>>> + echo "Callchain ASLR test [Failed - no noploop samples in >>>> original file]" >>>> + err=1 >>>> + elif [ -z "$new_addr" ]; then >>>> + echo "Callchain ASLR test [Failed - could not find remapped >>>> address]" >>> >>> Hi Ian, >>> >>> This test fails on Arm. I believe it's because on Arm we request the >>> link register to be sampled with frame pointer unwinds. Then the aslr >>> tool drops all the samples because it sees that user regs were sampled: >>> >>> /* TODO: can this be less conservative? */ >>> pr_debug("Dropping regs user sample as possible ASLR leak\n"); >>> ret = 0; >>> goto out_put; >>> >>> I think maybe that comment is onto something. Perhaps the user regs can >>> be zeroed instead of dropping the sample. Then the frame pointer unwind >>> will still work on Arm and the aslr test will pass. We just won't be >>> able to use the link register to add the leaf frame caller, but that's >>> not a big deal. >> >> Thanks James. I'm working on a new version of the patches, but I'm >> having delays getting the AI to approve the changes. >> >> ARM does what? Ah, I knew this and also it didn't really register. I'm >> wondering now if we can put the machinery behind "EM_HOST == >> EM_AARCH64": >> https://lore.kernel.org/all/20211217154521.80603-2-german.gomez@arm.com/ >> as it seems a mechanism that would benefit other architectures such as >> ARM32 :-) And I have my mission to make tools/perf/arch disappear as >> much as is humanly possible. > > Yeah that makes sense, the change you sent looks good. > >> I also imagine the problem the link register solves for perf happens >> for BPF, so perhaps this ability shouldn't be encouraged. > > Not sure what you mean by this, do you mean adding the link register > shoudln't be encouraged, or the compiler dropping the stack frame? Or > just the weak function style? > >> >> I think rather than zeroing the register values it would be better to >> just remove them from the output events. I'll try to add that support >> as having this test break on ARM isn't desirable. >> > > Makes sense too. I suppose data being there but zeroed could be slightly > more confusing than just dropping the sample. > > I don't know if modifying the sample type to remove > PERF_SAMPLE_REGS_USER and emitting the rest could be an option? It might > be more robust to cases when things are auto added to the sample by > Perf. For example all the aux stuff has custom setup functions that add > who knows what options to the events. Nevermind, I see this is what is done on V6 > > >> Thanks, >> Ian >> >>> James >>> >>>> + err=1 >>>> + elif [ "$orig_addr" = "$new_addr" ]; then >>>> + echo "Callchain ASLR test [Failed - addresses are not remapped]" >>>> + err=1 >>>> + else >>>> + # Extract callchain addresses (indented lines starting with hex >>>> addresses) >>>> + orig_callchain=$(perf script -i "${data}" | awk '/ >>>> ^[[:space:]]+[0-9a-f]+/ {print $1}') >>>> + new_callchain=$(perf script -i "${data2}" | awk '/ >>>> ^[[:space:]]+[0-9a-f]+/ {print $1}') >>>> + >>>> + if [ -z "$orig_callchain" ]; then >>>> + echo "Callchain ASLR test [Failed - no callchain samples in >>>> original file]" >>>> + err=1 >>>> + elif [ -z "$new_callchain" ]; then >>>> + echo "Callchain ASLR test [Failed - callchain data was dropped]" >>>> + err=1 >>>> + elif [ "$orig_callchain" = "$new_callchain" ]; then >>>> + echo "Callchain ASLR test [Failed - callchain addresses were >>>> not remapped]" >>>> + err=1 >>>> + else >>>> + echo "Callchain ASLR test [Success]" >>>> + fi >>>> + fi >>>> +} >>>> + >>>> +test_report_aslr() { >>>> + echo "Test perf report consistency" >>>> + local data >>>> + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") >>>> + local data2 >>>> + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") >>>> + local data_clean >>>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>>> + >>>> + perf record -e task-clock:u -o "${data}" ${prog} >>>> + # Use -b to inject build-ids and force ordered events processing >>>> in both >>>> + perf inject -b -i "${data}" -o "${data_clean}" >>>> + perf inject -v -b --aslr -i "${data}" -o "${data2}" >>>> + >>>> + local report1="${temp_dir}/report1" >>>> + local report2="${temp_dir}/report2" >>>> + local report1_clean="${temp_dir}/report1.clean" >>>> + local report2_clean="${temp_dir}/report2.clean" >>>> + local diff_file="${temp_dir}/diff" >>>> + >>>> + perf report -i "${data_clean}" --stdio > "${report1}" >>>> + perf report -i "${data2}" --stdio > "${report2}" >>>> + >>>> + # Strip headers and compare lines with percentages >>>> + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" >>>> || true >>>> + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" >>>> || true >>>> + >>>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" >>>> || true >>>> + >>>> + if [ ! -s "${report1_clean}" ]; then >>>> + echo "Report ASLR test [Failed - no samples captured]" >>>> + err=1 >>>> + elif [ -s "${diff_file}" ]; then >>>> + echo "Report ASLR test [Failed - reports differ]" >>>> + echo "Showing first 20 lines of diff:" >>>> + head -n 20 "${diff_file}" >>>> + err=1 >>>> + else >>>> + echo "Report ASLR test [Success]" >>>> + fi >>>> +} >>>> + >>>> +test_pipe_report_aslr() { >>>> + echo "Test pipe mode perf report consistency" >>>> + local data >>>> + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") >>>> + local data2 >>>> + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") >>>> + local data_clean >>>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>>> + >>>> + # Use tee to save the original pipe data, then process it with >>>> inject -b >>>> + perf record -e task-clock:u -o - ${prog} | \ >>>> + tee "${data}" | \ >>>> + perf inject -b --aslr -o "${data2}" >>>> + perf inject -b -i "${data}" -o "${data_clean}" >>>> + >>>> + local report1="${temp_dir}/report1" >>>> + local report2="${temp_dir}/report2" >>>> + local report1_clean="${temp_dir}/report1.clean" >>>> + local report2_clean="${temp_dir}/report2.clean" >>>> + local diff_file="${temp_dir}/diff" >>>> + >>>> + perf report -i "${data_clean}" --stdio > "${report1}" >>>> + perf report -i "${data2}" --stdio > "${report2}" >>>> + >>>> + # Strip headers and compare lines with percentages >>>> + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" >>>> || true >>>> + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" >>>> || true >>>> + >>>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" >>>> || true >>>> + >>>> + if [ ! -s "${report1_clean}" ]; then >>>> + echo "Pipe Report ASLR test [Failed - no samples captured]" >>>> + err=1 >>>> + elif [ -s "${diff_file}" ]; then >>>> + echo "Pipe Report ASLR test [Failed - reports differ]" >>>> + echo "Showing first 20 lines of diff:" >>>> + head -n 20 "${diff_file}" >>>> + err=1 >>>> + else >>>> + echo "Pipe Report ASLR test [Success]" >>>> + fi >>>> +} >>>> + >>>> +test_pipe_out_report_aslr() { >>>> + echo "Test pipe output mode perf report consistency" >>>> + local data >>>> + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") >>>> + local data_clean >>>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>>> + >>>> + perf record -e task-clock:u -o "${data}" ${prog} >>>> + perf inject -b -i "${data}" -o "${data_clean}" >>>> + >>>> + local report1="${temp_dir}/report1" >>>> + local report2="${temp_dir}/report2" >>>> + local report1_clean="${temp_dir}/report1.clean" >>>> + local report2_clean="${temp_dir}/report2.clean" >>>> + local diff_file="${temp_dir}/diff" >>>> + >>>> + perf report -i "${data_clean}" --stdio > "${report1}" >>>> + perf inject -b --aslr -i "${data}" -o - | perf report -i - -- >>>> stdio > "${report2}" >>>> + >>>> + # Strip headers and compare lines with percentages >>>> + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" >>>> || true >>>> + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" >>>> || true >>>> + >>>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" >>>> || true >>>> + >>>> + if [ ! -s "${report1_clean}" ]; then >>>> + echo "Pipe Output Report ASLR test [Failed - no samples captured]" >>>> + err=1 >>>> + elif [ -s "${diff_file}" ]; then >>>> + echo "Pipe Output Report ASLR test [Failed - reports differ]" >>>> + echo "Showing first 20 lines of diff:" >>>> + head -n 20 "${diff_file}" >>>> + err=1 >>>> + else >>>> + echo "Pipe Output Report ASLR test [Success]" >>>> + fi >>>> +} >>>> + >>>> +test_dropped_samples() { >>>> + echo "Test dropped samples (phys-data)" >>>> + local data >>>> + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") >>>> + local data2 >>>> + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") >>>> + >>>> + # Check if --phys-data is supported by recording a short run >>>> + if ! perf record -e task-clock:u --phys-data -o "${data}" -- >>>> sleep 0.1 > /dev/null 2>&1; then >>>> + echo "Skipping dropped samples test as --phys-data is not >>>> supported" >>>> + return >>>> + fi >>>> + >>>> + perf record -e task-clock:u --phys-data -o "${data}" ${prog} >>>> + perf inject --aslr -i "${data}" -o "${data2}" >>>> + >>>> + # Verify that the original file actually contained samples! >>>> + orig_samples=$(perf script -i "${data}" | wc -l) >>>> + if [ "$orig_samples" -eq 0 ]; then >>>> + echo "Dropped samples test [Failed - no samples in original file]" >>>> + err=1 >>>> + else >>>> + # Verify that samples are dropped. >>>> + samples_count=$(perf script -i "${data2}" | wc -l) >>>> + >>>> + if [ "$samples_count" -gt 0 ]; then >>>> + echo "Dropped samples test [Failed - samples were not dropped]" >>>> + err=1 >>>> + else >>>> + echo "Dropped samples test [Success]" >>>> + fi >>>> + fi >>>> +} >>>> + >>>> +test_kernel_aslr() { >>>> + echo "Test kernel ASLR remapping" >>>> + local kdata >>>> + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") >>>> + local kdata2 >>>> + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") >>>> + local log_file >>>> + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") >>>> + >>>> + # Try to record kernel samples >>>> + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > >>>> "${log_file}" 2>&1; then >>>> + echo "Skipping kernel ASLR test as recording failed (maybe no >>>> permissions)" >>>> + return >>>> + fi >>>> + >>>> + # Check for warning about kernel map restriction >>>> + if grep -q "Couldn't record kernel reference relocation symbol" >>>> "${log_file}"; then >>>> + echo "Skipping kernel ASLR test as kernel map could not be >>>> recorded (permissions restricted)" >>>> + return >>>> + fi >>>> + >>>> + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" >>>> + >>>> + # Check if kernel addresses are remapped. >>>> + # Find the field that ends with :k: (the event name) and take the >>>> next field! >>>> + orig_addr=$(perf script -i "${kdata}" | awk ' >>>> + BEGIN { found=0 } >>>> + { >>>> + for (i=1; i<NF; i++) { >>>> + if ($i ~ /:[k]+:?$/) { >>>> + if (!found) { >>>> + print $(i+1) >>>> + found=1 >>>> + } >>>> + } >>>> + } >>>> + }') >>>> + new_addr=$(perf script -i "${kdata2}" | awk ' >>>> + BEGIN { found=0 } >>>> + { >>>> + for (i=1; i<NF; i++) { >>>> + if ($i ~ /:[k]+:?$/) { >>>> + if (!found) { >>>> + print $(i+1) >>>> + found=1 >>>> + } >>>> + } >>>> + } >>>> + }') >>>> + >>>> + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>>> + >>>> + if [ -z "$orig_addr" ]; then >>>> + echo "Kernel ASLR test [Failed - no kernel samples in original >>>> file]" >>>> + err=1 >>>> + elif [ -z "$new_addr" ]; then >>>> + echo "Kernel ASLR test [Failed - could not find remapped address]" >>>> + err=1 >>>> + elif [ "$orig_addr" = "$new_addr" ]; then >>>> + echo "Kernel ASLR test [Failed - addresses are not remapped]" >>>> + err=1 >>>> + else >>>> + echo "Kernel ASLR test [Success]" >>>> + fi >>>> +} >>>> + >>>> +test_kernel_report_aslr() { >>>> + echo "Test kernel perf report consistency" >>>> + local kdata >>>> + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") >>>> + local kdata2 >>>> + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") >>>> + local data_clean >>>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>>> + local log_file >>>> + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") >>>> + >>>> + # Try to record kernel samples >>>> + if ! perf record -e task-clock:k -o "${kdata}" ${prog} > >>>> "${log_file}" 2>&1; then >>>> + echo "Skipping kernel report test as recording failed (maybe no >>>> permissions)" >>>> + return >>>> + fi >>>> + >>>> + # Check for warning about kernel map restriction >>>> + if grep -q "Couldn't record kernel reference relocation symbol" >>>> "${log_file}"; then >>>> + echo "Skipping kernel report test as kernel map could not be >>>> recorded (permissions restricted)" >>>> + return >>>> + fi >>>> + >>>> + # Use -b to inject build-ids and force ordered events processing >>>> in both >>>> + perf inject -b -i "${kdata}" -o "${data_clean}" >>>> + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" >>>> + >>>> + local report1="${temp_dir}/report_kernel1" >>>> + local report2="${temp_dir}/report_kernel2" >>>> + local report1_clean="${temp_dir}/report_kernel1.clean" >>>> + local report2_clean="${temp_dir}/report_kernel2.clean" >>>> + >>>> + perf report -i "${data_clean}" --stdio > "${report1}" >>>> + perf report -i "${kdata2}" --stdio > "${report2}" >>>> + >>>> + # Strip headers and compare lines with percentages >>>> + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true >>>> + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true >>>> + >>>> + # Normalize kernel DSOs and addresses in clean reports >>>> + # This allows kernel modules to be either a module or >>>> kernel.kallsyms >>>> + local report1_norm="${temp_dir}/report_kernel1.norm" >>>> + local report2_norm="${temp_dir}/report_kernel2.norm" >>>> + local diff_file="${temp_dir}/diff_kernel" >>>> + >>>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ >>>> + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, >>>> "[kernel]", $0); print}' | sort > "${report1_norm}" || true >>>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ >>>> + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, >>>> "[kernel]", $0); print}' | sort > "${report2_norm}" || true >>>> + >>>> + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" >>>> || true >>>> + >>>> + if [ ! -s "${report1_norm}" ]; then >>>> + echo "Kernel Report ASLR test [Failed - no samples captured]" >>>> + err=1 >>>> + elif [ -s "${diff_file}" ]; then >>>> + echo "Kernel Report ASLR test [Failed - reports differ]" >>>> + echo "Showing first 20 lines of diff:" >>>> + head -n 20 "${diff_file}" >>>> + err=1 >>>> + else >>>> + echo "Kernel Report ASLR test [Success]" >>>> + fi >>>> +} >>>> + >>>> +test_basic_aslr >>>> +test_pipe_aslr >>>> +test_callchain_aslr >>>> +test_report_aslr >>>> +test_pipe_report_aslr >>>> +test_pipe_out_report_aslr >>>> +test_dropped_samples >>>> +test_kernel_aslr >>>> +test_kernel_report_aslr >>>> + >>>> +cleanup >>>> +exit $err >>> > ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers ` (4 preceding siblings ...) 2026-05-06 0:45 ` [PATCH v5 5/5] perf test: Add inject ASLR test Ian Rogers @ 2026-05-08 8:27 ` Ian Rogers 2026-05-08 8:27 ` [PATCH v6 1/6] perf sched: Add missing mmap2 handler in timehist Ian Rogers ` (6 more replies) 5 siblings, 7 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-08 8:27 UTC (permalink / raw) To: irogers, acme, gmx, james.clark, namhyung Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature are three independent, critical bug fixes inside core event dispatching and map tracking tools that harden perf session analysis against dynamic crashes, concurrent lookup data races, and callchain mapping failures. Core Feature: 'perf inject --aslr' (Patches 4, 5, and 6) Transferring perf.data files across environments introduces a potential leak of virtual address footprints, weakening Address Space Layout Randomization (ASLR) on the originating machine. To mitigate this, we introduce the --aslr flag into perf inject. Unknown or unhandled events are dropped conservatively, while handled samples and branch loops undergo systematic virtual memory offset obfuscation. Events carrying virtual memory layouts are conservatively remap-processed or dropped, while zero-address-risk lifecycle metadata records (such as namespaces, cgroups, and BPF program info) are intentionally delegated to preserve comprehensive downstream trace tool analysis compatibility. The ASLR tracking tool virtualizes process and machine namespaces using 'struct machines' to safely isolate host mappings from unprivileged KVM guest address spaces. Memory space layouts are tracked globally per process context to ensure linear, continuous space allocations across successive mapping runs. The topological invariant coordinate dso + invariant (start - pgoff) is tracked to uniquely index binary section frameworks, providing complete collision safety against separate overlapping shared-invariant libraries while remaining perfectly immune to boundary shifts or split fragmentations. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced in Patch 5 with a comprehensive POSIX shell suite ('inject_aslr.sh'), hardened against SIGPIPE signal exits with stream consuming awk loops and robust 'set -o pipefail' assertions. The suite utilizes a highly dense, system-call intensive VFS byte block loop workload (dd count=500) to guarantee deterministic hardware timer interrupts sampling streams inside kernel privilege states. Prerequisite Bug Fixes (Patches 1, 2, and 3) During development, three core event delegation and map indexing issues were identified and resolved to prevent crashes, live-locks, and data-loss during analysis: 1. perf sched: 'timehist' registers standard MMAP, COMM, EXIT, and FORK stubs, but completely omitted registering MMAP2 callbacks. Because modern environments output maps primarily via MMAP2 frames, this caused timehist sessions to silently drop shared library mappings, causing dynamic callchain symbol resolutions to fail. Patch 1 corrects this by properly registering perf_event__process_mmap2. 2. perf tool: Patch 2 fixes missing copies of schedstat callbacks inside delegated wrapper tools (which caused segfaults on NULL stubs) and properly initializes/copies the 'dont_split_sample_group' grouping parameters to prevent stack garbage from triggering silent non-leader events drops during split deliver streams. 3. perf symbols: Patch 3 replaces old remove-reinsert map boundary update cycles with a high-performance, thread-safe transactional framework maps__mutate_mapping() that enforces write semaphore lock closures around all in-place virtual address mutations and sorting invalidations, completely closing concurrent lookup race condition windows. It explicitly executes DWARF address space cache invalidation (libdw__invalidate_dwfl()) to keep debugger unwinding frames perfectly synchronized. Changes since v5: - Core Concurrency Fix (Patch 3): Refactor map address boundary mutations across ELF loaders, proc kallsyms parsers, and dynamic module managers to utilize a thread-safe, synchronized transactional framework maps__mutate_mapping() that encapsulates mutations and sorting invalidations under write lock closures, eliminating concurrent lookup race condition windows. Cites intention-revealing callbacks names (remap_kernel_cb). - Feature Exclusivity (Patch 4): Inject strict command-line validation checks enforcing mutual exclusivity between --aslr and --convert-callchain to prevent silent trace unwind failures since ASLR stack dropping conflicts directly with DWARF parsing needs. - KASLR Hardening (Patch 4): Secure mmap.pgoff unconditionally for all host and guest kernel text mapping regions to prevent unredacted active KASLR base deltas leakage. - TEXT_POKE Drops (Patch 4): Conservatively drop PERF_RECORD_TEXT_POKE events completely via a local static drop stub to prevent unredacted absolute 64-bit kernel virtual pointer immediate operands leakage. - Parsing Invariants (Patch 4): Inject explicit array-end bounds validation check blocks before consuming trailing PERF_CONTEXT_USER_DEFERRED callchain cookies to completely eliminate out-of-bounds reads and parser desynchronization faults. - Commit Records Alignment (Patch 4): Precisely clarify commit descriptions to reflect that zero-address metadata events are intentionally delegated to protect downstream trace tool processing backward compatibility. - Telemetry Stabilization (Patch 5): Upgrade kernel space tracking workloads to utilize a dedicated system-call intensive VFS byte block loop workload (dd count=500) instead of purely userspace-bound tight loops, guaranteeing high-density kernel privilege state sampling streams and eliminating intermittent execution flakiness dropouts. - Profile Retention Optimizer (Patch 6): Refactor sample processor to dynamically strip out ONLY register dump words out of sample payloads while shrinking output header sizes, overwriting ABI words to NONE, and scrubbing attributes up front. This completely rescues trace profiles from complete sample drop starvation, which happened by default on ARM64. Ian Rogers (6): perf sched: Add missing mmap2 handler in timehist perf tool: Missing delegate_tool schedstat delegates and dont_split_sample_group perf maps: Add maps__mutate_mapping perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 47 +- tools/perf/builtin-sched.c | 1 + tools/perf/tests/shell/inject_aslr.sh | 511 ++++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1035 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 26 + tools/perf/util/maps.h | 2 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- tools/perf/util/tool.c | 6 + 12 files changed, 1697 insertions(+), 32 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.563.g4f69b47b94-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v6 1/6] perf sched: Add missing mmap2 handler in timehist 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers @ 2026-05-08 8:27 ` Ian Rogers 2026-05-08 8:27 ` [PATCH v6 2/6] perf tool: Missing delegate_tool schedstat delegates and dont_split_sample_group Ian Rogers ` (5 subsequent siblings) 6 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-08 8:27 UTC (permalink / raw) To: irogers, acme, gmx, james.clark, namhyung Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz perf_sched__timehist() registers event handlers for options using the sched->tool struct. It registers handlers for MMAP, COMM, EXIT, FORK, etc. but completely omits registering a handler for MMAP2 events. Failing to register both MMAP and MMAP2 handlers causes modern systems (which primarily output MMAP2 records) to silently drop VMA map mappings. This results in uninitialized machine/thread mapping structures, making it impossible to resolve shared library instruction pointers (IPs) to dynamic symbols/DSOs during timehist callchain analysis. Fix this by correctly registering perf_event__process_mmap2 in sched->tool inside perf_sched__timehist(). Assisted-by: Gemini-CLI:Google Gemini 3 Fixes: 49394a2a24c78ce0 ("perf sched timehist: Introduce timehist command") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-sched.c | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 555247568e7a..241c2f808f7b 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -3299,6 +3299,7 @@ static int perf_sched__timehist(struct perf_sched *sched) */ sched->tool.sample = perf_timehist__process_sample; sched->tool.mmap = perf_event__process_mmap; + sched->tool.mmap2 = perf_event__process_mmap2; sched->tool.comm = perf_event__process_comm; sched->tool.exit = perf_event__process_exit; sched->tool.fork = perf_event__process_fork; -- 2.54.0.563.g4f69b47b94-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v6 2/6] perf tool: Missing delegate_tool schedstat delegates and dont_split_sample_group 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-08 8:27 ` [PATCH v6 1/6] perf sched: Add missing mmap2 handler in timehist Ian Rogers @ 2026-05-08 8:27 ` Ian Rogers 2026-05-08 8:27 ` [PATCH v6 3/6] perf maps: Add maps__mutate_mapping Ian Rogers ` (4 subsequent siblings) 6 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-08 8:27 UTC (permalink / raw) To: irogers, acme, gmx, james.clark, namhyung Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz delegate_tool was missing the delegate overrides for schedstat_cpu and schedstat_domain. As a result, when allocated with zalloc, these callbacks defaulted to NULL, causing a segmentation fault crash if any schedstat events were delivered during event processing. Fix this by adding delegate_schedstat_cpu and delegate_schedstat_domain via the CREATE_DELEGATE_OP2 macro, and ensuring delegate_tool__init correctly registers them. Additionally, delegate_tool__init completely omitted copying the dont_split_sample_group property from the delegate. This would cause wrapper tools to default the flag to false, which corrupts piped event processing (e.g., in perf inject) by triggering duplicate event deliveries on split sample values in deliver_sample_group(). Similarly, perf_tool__init() omitted the initialization of this boolean field. On stack-allocated tools that rely on this initializer (like intel-tpebs or __cmd_evlist), this could result in uninitialized stack garbage evaluating to true—silently dropping non-leader event members in deliver_sample_group(). Fix both issues by properly copying the field in delegate_tool__init and initializing it to false in perf_tool__init. Assisted-by: Gemini-CLI:Google Gemini 3 Fixes: 6331b2669359 ("perf tool: Add a delegate_tool that just delegates actions to another tool") Fixes: 79bcd34e0f3d ("perf inject: Fix leader sampling inserting additional samples") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/tool.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/tools/perf/util/tool.c b/tools/perf/util/tool.c index 013c7839e2cf..ff2150517b75 100644 --- a/tools/perf/util/tool.c +++ b/tools/perf/util/tool.c @@ -285,6 +285,7 @@ void perf_tool__init(struct perf_tool *tool, bool ordered_events) tool->no_warn = false; tool->show_feat_hdr = SHOW_FEAT_NO_HEADER; tool->merge_deferred_callchains = true; + tool->dont_split_sample_group = false; tool->sample = process_event_sample_stub; tool->mmap = process_event_stub; @@ -433,6 +434,8 @@ CREATE_DELEGATE_OP2(stat_config); CREATE_DELEGATE_OP2(stat_round); CREATE_DELEGATE_OP2(thread_map); CREATE_DELEGATE_OP2(time_conv); +CREATE_DELEGATE_OP2(schedstat_cpu); +CREATE_DELEGATE_OP2(schedstat_domain); CREATE_DELEGATE_OP2(tracing_data); #define CREATE_DELEGATE_OP3(name) \ @@ -470,6 +473,7 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) tool->tool.no_warn = delegate->no_warn; tool->tool.show_feat_hdr = delegate->show_feat_hdr; tool->tool.merge_deferred_callchains = delegate->merge_deferred_callchains; + tool->tool.dont_split_sample_group = delegate->dont_split_sample_group; tool->tool.sample = delegate_sample; tool->tool.read = delegate_read; @@ -516,4 +520,6 @@ void delegate_tool__init(struct delegate_tool *tool, struct perf_tool *delegate) tool->tool.bpf_metadata = delegate_bpf_metadata; tool->tool.compressed = delegate_compressed; tool->tool.auxtrace = delegate_auxtrace; + tool->tool.schedstat_cpu = delegate_schedstat_cpu; + tool->tool.schedstat_domain = delegate_schedstat_domain; } -- 2.54.0.563.g4f69b47b94-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v6 3/6] perf maps: Add maps__mutate_mapping 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-08 8:27 ` [PATCH v6 1/6] perf sched: Add missing mmap2 handler in timehist Ian Rogers 2026-05-08 8:27 ` [PATCH v6 2/6] perf tool: Missing delegate_tool schedstat delegates and dont_split_sample_group Ian Rogers @ 2026-05-08 8:27 ` Ian Rogers 2026-05-08 10:57 ` James Clark ` (2 more replies) 2026-05-08 8:27 ` [PATCH v6 4/6] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers ` (3 subsequent siblings) 6 siblings, 3 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-08 8:27 UTC (permalink / raw) To: irogers, acme, gmx, james.clark, namhyung Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookups failure. Fix this by introducing a thread-safe, atomic transactional framework routine maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under full lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees absolute atomic synchronization invariants, completely closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. There is a potential for self deadlock if maps__mutate_mapping is called with the lock held, such as with maps__for_each_map but this problem also existed with the previous remove and insert approaches. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++++++++++++++----------- tools/perf/util/maps.c | 26 +++++++++++++++++++++++ tools/perf/util/maps.h | 2 ++ tools/perf/util/symbol-elf.c | 41 +++++++++++++++++++++++------------- tools/perf/util/symbol.c | 17 +++++++++++---- 5 files changed, 87 insertions(+), 31 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index e76f8c86e62a..8d4452c70cb5 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1522,22 +1522,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 81a97ac34077..91345a773aa2 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -575,6 +575,32 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 20c52084ba9e..de74ccbb8a12 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -61,6 +61,8 @@ size_t maps__fprintf(struct maps *maps, FILE *fp); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 7afa8a117139..dc4ab58857b3 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1341,6 +1341,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1371,22 +1389,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index fcaeeddbbb6b..09b93e844887 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); static bool symbol__is_idle(const char *name); @@ -2121,10 +2128,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2164,10 +2172,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.563.g4f69b47b94-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v6 3/6] perf maps: Add maps__mutate_mapping 2026-05-08 8:27 ` [PATCH v6 3/6] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-05-08 10:57 ` James Clark 2026-05-08 20:37 ` sashiko-bot 2026-05-11 7:07 ` Namhyung Kim 2 siblings, 0 replies; 183+ messages in thread From: James Clark @ 2026-05-08 10:57 UTC (permalink / raw) To: Ian Rogers Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, gmx, namhyung On 08/05/2026 9:27 am, Ian Rogers wrote: > During kernel ELF symbol parsing (dso__process_kernel_symbol), proc > kallsyms image loading (dso__load_kernel_sym, > dso__load_guest_kernel_sym), and dynamic kernel memory map alignment > updates (machine__update_kernel_mmap), the loader directly modifies > live virtual address boundary keys fields on map objects. If these > boundaries are mutated while the map pointer actively resides inside > the parent maps cache array list (kmaps) outside of any lock closure, > an unsafe concurrent window is exposed where parallel worker lookup > threads (e.g., inside perf top) can mistakenly assume the cache > remains sorted based on stale parameters, executing binary search > queries (bsearch) across an unsorted range and triggering lookups > failure. > > Fix this by introducing a thread-safe, atomic transactional framework > routine maps__mutate_mapping() that explicitly acquires the parent > maps write semaphore lock, executes an incoming mutation callback > block to perform the field updates under full lock protection, and > invalidates the sorted tracking flags prior to releasing the write > lock. This guarantees absolute atomic synchronization invariants, > completely closing the concurrent lookup race window. The adjacent > module alignment pass inside machine__create_kernel_maps() is safely > preserved as a high-performance lockless pass, as its invocation > lifecycle bounds remain strictly single-threaded by contract during > session initialization construction. There is a potential for self > deadlock if maps__mutate_mapping is called with the lock held, such as > with maps__for_each_map but this problem also existed with the > previous remove and insert approaches. > > Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") > Signed-off-by: Ian Rogers <irogers@google.com> > --- > tools/perf/util/machine.c | 32 +++++++++++++++++----------- > tools/perf/util/maps.c | 26 +++++++++++++++++++++++ > tools/perf/util/maps.h | 2 ++ > tools/perf/util/symbol-elf.c | 41 +++++++++++++++++++++++------------- > tools/perf/util/symbol.c | 17 +++++++++++---- > 5 files changed, 87 insertions(+), 31 deletions(-) > > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c > index e76f8c86e62a..8d4452c70cb5 100644 > --- a/tools/perf/util/machine.c > +++ b/tools/perf/util/machine.c > @@ -1522,22 +1522,30 @@ static void machine__set_kernel_mmap(struct machine *machine, > map__set_end(machine->vmlinux_map, ~0ULL); > } > > -static int machine__update_kernel_mmap(struct machine *machine, > - u64 start, u64 end) > +struct kernel_mmap_mutation_ctx { > + u64 start; > + u64 end; > +}; > + > +static int kernel_mmap_mutate_cb(struct map *map, void *data) > { > - struct map *orig, *updated; > - int err; > + struct kernel_mmap_mutation_ctx *ctx = data; > > - orig = machine->vmlinux_map; > - updated = map__get(orig); > + map__set_start(map, ctx->start); > + map__set_end(map, ctx->end); > + if (ctx->start == 0 && ctx->end == 0) > + map__set_end(map, ~0ULL); > + return 0; > +} > > - machine->vmlinux_map = updated; > - maps__remove(machine__kernel_maps(machine), orig); > - machine__set_kernel_mmap(machine, start, end); > - err = maps__insert(machine__kernel_maps(machine), updated); > - map__put(orig); > +static int machine__update_kernel_mmap(struct machine *machine, > + u64 start, u64 end) > +{ > + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; > > - return err; > + return maps__mutate_mapping(machine__kernel_maps(machine), > + machine->vmlinux_map, > + kernel_mmap_mutate_cb, &ctx); > } > > int machine__create_kernel_maps(struct machine *machine) > diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c > index 81a97ac34077..91345a773aa2 100644 > --- a/tools/perf/util/maps.c > +++ b/tools/perf/util/maps.c > @@ -575,6 +575,32 @@ void maps__remove(struct maps *maps, struct map *map) > #endif > } > > +int maps__mutate_mapping(struct maps *maps, struct map *map, > + int (*mutate_cb)(struct map *map, void *data), void *data) > +{ > + int err = 0; > + > + if (maps) > + down_write(maps__lock(maps)); > + > + err = mutate_cb(map, data); Hi Ian, I get this error when building with LLVM=1 on Ubuntu clang version 18.1.8 (11~20.04.2): util/maps.c:586:8: error: mutex 'maps__lock(maps)' is not held on every path through here [-Werror,-Wthread-safety-analysis] 586 | err = mutate_cb(map, data); | ^ util/maps.c:584:3: note: mutex acquired here 584 | down_write(maps__lock(maps)); | ^ util/maps.c:594:3: error: releasing mutex 'maps__lock(maps)' that was not held [-Werror,-Wthread-safety-analysis] 594 | up_write(maps__lock(maps)); | ^ 2 errors generated. > + > + if (maps) { > + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; > + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; > + } > + > + if (maps) > + up_write(maps__lock(maps)); > + > +#ifdef HAVE_LIBDW_SUPPORT > + if (maps) > + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); > +#endif > + > + return err; > +} > + > bool maps__empty(struct maps *maps) > { > bool res; > diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h > index 20c52084ba9e..de74ccbb8a12 100644 > --- a/tools/perf/util/maps.h > +++ b/tools/perf/util/maps.h > @@ -61,6 +61,8 @@ size_t maps__fprintf(struct maps *maps, FILE *fp); > > int maps__insert(struct maps *maps, struct map *map); > void maps__remove(struct maps *maps, struct map *map); > +int maps__mutate_mapping(struct maps *maps, struct map *map, > + int (*mutate_cb)(struct map *map, void *data), void *data); > > struct map *maps__find(struct maps *maps, u64 addr); > struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); > diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c > index 7afa8a117139..dc4ab58857b3 100644 > --- a/tools/perf/util/symbol-elf.c > +++ b/tools/perf/util/symbol-elf.c > @@ -1341,6 +1341,24 @@ static u64 ref_reloc(struct kmap *kmap) > void __weak arch__sym_update(struct symbol *s __maybe_unused, > GElf_Sym *sym __maybe_unused) { } > > +struct remap_kernel_ctx { > + u64 sh_addr; > + u64 sh_size; > + u64 sh_offset; > + struct kmap *kmap; > +}; > + > +static int remap_kernel_cb(struct map *map, void *data) > +{ > + struct remap_kernel_ctx *ctx = data; > + > + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); > + map__set_end(map, map__start(map) + ctx->sh_size); > + map__set_pgoff(map, ctx->sh_offset); > + map__set_mapping_type(map, MAPPING_TYPE__DSO); > + return 0; > +} > + > static int dso__process_kernel_symbol(struct dso *dso, struct map *map, > GElf_Sym *sym, GElf_Shdr *shdr, > struct maps *kmaps, struct kmap *kmap, > @@ -1371,22 +1389,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, > * map to the kernel dso. > */ > if (*remap_kernel && dso__kernel(dso) && !kmodule) { > + struct remap_kernel_ctx ctx = { > + .sh_addr = shdr->sh_addr, > + .sh_size = shdr->sh_size, > + .sh_offset = shdr->sh_offset, > + .kmap = kmap > + }; > + > *remap_kernel = false; > - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); > - map__set_end(map, map__start(map) + shdr->sh_size); > - map__set_pgoff(map, shdr->sh_offset); > - map__set_mapping_type(map, MAPPING_TYPE__DSO); > - /* Ensure maps are correctly ordered */ > - if (kmaps) { > - int err; > - struct map *tmp = map__get(map); > - > - maps__remove(kmaps, map); > - err = maps__insert(kmaps, map); > - map__put(tmp); > - if (err) > - return err; > - } > + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); > } > > /* > diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c > index fcaeeddbbb6b..09b93e844887 100644 > --- a/tools/perf/util/symbol.c > +++ b/tools/perf/util/symbol.c > @@ -48,6 +48,13 @@ > #include <symbol/kallsyms.h> > #include <sys/utsname.h> > > +static int map_fixup_cb(struct map *map, void *data __maybe_unused) > +{ > + map__fixup_start(map); > + map__fixup_end(map); > + return 0; > +} > + > static int dso__load_kernel_sym(struct dso *dso, struct map *map); > static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); > static bool symbol__is_idle(const char *name); > @@ -2121,10 +2128,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) > free(kallsyms_allocated_filename); > > if (err > 0 && !dso__is_kcore(dso)) { > + struct maps *kmaps = map__kmaps(map); > + > dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); > dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); > - map__fixup_start(map); > - map__fixup_end(map); > + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); > } > > return err; > @@ -2164,10 +2172,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) > if (err > 0) > pr_debug("Using %s for symbols\n", kallsyms_filename); > if (err > 0 && !dso__is_kcore(dso)) { > + struct maps *kmaps = map__kmaps(map); > + > dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); > dso__set_long_name(dso, machine->mmap_name, false); > - map__fixup_start(map); > - map__fixup_end(map); > + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); > } > > return err; ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v6 3/6] perf maps: Add maps__mutate_mapping 2026-05-08 8:27 ` [PATCH v6 3/6] perf maps: Add maps__mutate_mapping Ian Rogers 2026-05-08 10:57 ` James Clark @ 2026-05-08 20:37 ` sashiko-bot 2026-05-11 7:07 ` Namhyung Kim 2 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-08 20:37 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] Self-deadlock in the kallsyms symbol loading path. -- commit 1340dc1c4b3b633e8a032536f0c7948f87b17ce3 Author: Ian Rogers <irogers@google.com> perf maps: Add maps__mutate_mapping This commit introduces maps__mutate_mapping to safely update map boundaries by acquiring a write lock on the maps collection. It applies this new synchronization to kernel ELF parsing, kallsyms image loading, and dynamic memory map updates to close concurrent lookup races. > There is a potential for self deadlock if maps__mutate_mapping is called > with the lock held, such as with maps__for_each_map but this problem also > existed with the previous remove and insert approaches. Is this commit message accurate for all modified paths? Prior to this patch, dso__load_kernel_sym and dso__load_guest_kernel_sym used map__fixup_start and map__fixup_end directly without locking. > diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c > index fcaeeddbbb6b9..09b93e844887d 100644 > --- a/tools/perf/util/symbol.c > +++ b/tools/perf/util/symbol.c [ ... ] > @@ -2121,10 +2128,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) > free(kallsyms_allocated_filename); > > if (err > 0 && !dso__is_kcore(dso)) { > + struct maps *kmaps = map__kmaps(map); > + > dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); > dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); > - map__fixup_start(map); > - map__fixup_end(map); > + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); Can this cause a self-deadlock when resolving kernel symbols by name? If a tool resolves a symbol upon first load, the call chain appears to hold a read lock while attempting to acquire a write lock on the exact same maps collection: maps__find_symbol_by_name() maps__for_each_map() <-- acquires read lock maps__find_symbol_by_name_cb() map__load() dso__load_kernel_sym() maps__mutate_mapping() <-- attempts to acquire write lock Since rw_semaphore cannot be upgraded from read to write without releasing the read lock first, won't this immediately deadlock on the kmaps lock? > } > > return err; [ ... ] > @@ -2164,10 +2172,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) > if (err > 0) > pr_debug("Using %s for symbols\n", kallsyms_filename); > if (err > 0 && !dso__is_kcore(dso)) { > + struct maps *kmaps = map__kmaps(map); > + > dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); > dso__set_long_name(dso, machine->mmap_name, false); > - map__fixup_start(map); > - map__fixup_end(map); > + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); Does this guest kernel symbol loading path suffer from the exact same self-deadlock sequence? > } > > return err; -- Sashiko AI review · https://sashiko.dev/#/patchset/20260508082726.2795191-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v6 3/6] perf maps: Add maps__mutate_mapping 2026-05-08 8:27 ` [PATCH v6 3/6] perf maps: Add maps__mutate_mapping Ian Rogers 2026-05-08 10:57 ` James Clark 2026-05-08 20:37 ` sashiko-bot @ 2026-05-11 7:07 ` Namhyung Kim 2026-06-10 10:40 ` James Clark 2 siblings, 1 reply; 183+ messages in thread From: Namhyung Kim @ 2026-05-11 7:07 UTC (permalink / raw) To: Ian Rogers Cc: acme, gmx, james.clark, adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Fri, May 08, 2026 at 01:27:23AM -0700, Ian Rogers wrote: > During kernel ELF symbol parsing (dso__process_kernel_symbol), proc > kallsyms image loading (dso__load_kernel_sym, > dso__load_guest_kernel_sym), and dynamic kernel memory map alignment > updates (machine__update_kernel_mmap), the loader directly modifies > live virtual address boundary keys fields on map objects. If these > boundaries are mutated while the map pointer actively resides inside > the parent maps cache array list (kmaps) outside of any lock closure, > an unsafe concurrent window is exposed where parallel worker lookup > threads (e.g., inside perf top) can mistakenly assume the cache > remains sorted based on stale parameters, executing binary search > queries (bsearch) across an unsorted range and triggering lookups > failure. > > Fix this by introducing a thread-safe, atomic transactional framework > routine maps__mutate_mapping() that explicitly acquires the parent > maps write semaphore lock, executes an incoming mutation callback > block to perform the field updates under full lock protection, and > invalidates the sorted tracking flags prior to releasing the write > lock. This guarantees absolute atomic synchronization invariants, > completely closing the concurrent lookup race window. The adjacent > module alignment pass inside machine__create_kernel_maps() is safely > preserved as a high-performance lockless pass, as its invocation > lifecycle bounds remain strictly single-threaded by contract during > session initialization construction. There is a potential for self > deadlock if maps__mutate_mapping is called with the lock held, such as > with maps__for_each_map but this problem also existed with the > previous remove and insert approaches. > > Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") > Signed-off-by: Ian Rogers <irogers@google.com> > --- > tools/perf/util/machine.c | 32 +++++++++++++++++----------- > tools/perf/util/maps.c | 26 +++++++++++++++++++++++ > tools/perf/util/maps.h | 2 ++ > tools/perf/util/symbol-elf.c | 41 +++++++++++++++++++++++------------- > tools/perf/util/symbol.c | 17 +++++++++++---- > 5 files changed, 87 insertions(+), 31 deletions(-) > > diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c > index e76f8c86e62a..8d4452c70cb5 100644 > --- a/tools/perf/util/machine.c > +++ b/tools/perf/util/machine.c > @@ -1522,22 +1522,30 @@ static void machine__set_kernel_mmap(struct machine *machine, > map__set_end(machine->vmlinux_map, ~0ULL); > } > > -static int machine__update_kernel_mmap(struct machine *machine, > - u64 start, u64 end) > +struct kernel_mmap_mutation_ctx { > + u64 start; > + u64 end; > +}; > + > +static int kernel_mmap_mutate_cb(struct map *map, void *data) > { > - struct map *orig, *updated; > - int err; > + struct kernel_mmap_mutation_ctx *ctx = data; > > - orig = machine->vmlinux_map; > - updated = map__get(orig); > + map__set_start(map, ctx->start); > + map__set_end(map, ctx->end); > + if (ctx->start == 0 && ctx->end == 0) > + map__set_end(map, ~0ULL); > + return 0; > +} > > - machine->vmlinux_map = updated; > - maps__remove(machine__kernel_maps(machine), orig); > - machine__set_kernel_mmap(machine, start, end); > - err = maps__insert(machine__kernel_maps(machine), updated); > - map__put(orig); > +static int machine__update_kernel_mmap(struct machine *machine, > + u64 start, u64 end) > +{ > + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; > > - return err; > + return maps__mutate_mapping(machine__kernel_maps(machine), > + machine->vmlinux_map, > + kernel_mmap_mutate_cb, &ctx); > } > > int machine__create_kernel_maps(struct machine *machine) > diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c > index 81a97ac34077..91345a773aa2 100644 > --- a/tools/perf/util/maps.c > +++ b/tools/perf/util/maps.c > @@ -575,6 +575,32 @@ void maps__remove(struct maps *maps, struct map *map) > #endif > } > > +int maps__mutate_mapping(struct maps *maps, struct map *map, > + int (*mutate_cb)(struct map *map, void *data), void *data) > +{ > + int err = 0; > + > + if (maps) > + down_write(maps__lock(maps)); > + > + err = mutate_cb(map, data); > + > + if (maps) { > + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; > + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; > + } > + > + if (maps) > + up_write(maps__lock(maps)); > + > +#ifdef HAVE_LIBDW_SUPPORT > + if (maps) > + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); > +#endif > + > + return err; > +} Could be simplified by checking 'maps' once. But I'm not sure if there's a case it doesn't have the maps. Thanks, Namhyung > + > bool maps__empty(struct maps *maps) > { > bool res; > diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h > index 20c52084ba9e..de74ccbb8a12 100644 > --- a/tools/perf/util/maps.h > +++ b/tools/perf/util/maps.h > @@ -61,6 +61,8 @@ size_t maps__fprintf(struct maps *maps, FILE *fp); > > int maps__insert(struct maps *maps, struct map *map); > void maps__remove(struct maps *maps, struct map *map); > +int maps__mutate_mapping(struct maps *maps, struct map *map, > + int (*mutate_cb)(struct map *map, void *data), void *data); > > struct map *maps__find(struct maps *maps, u64 addr); > struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); > diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c > index 7afa8a117139..dc4ab58857b3 100644 > --- a/tools/perf/util/symbol-elf.c > +++ b/tools/perf/util/symbol-elf.c > @@ -1341,6 +1341,24 @@ static u64 ref_reloc(struct kmap *kmap) > void __weak arch__sym_update(struct symbol *s __maybe_unused, > GElf_Sym *sym __maybe_unused) { } > > +struct remap_kernel_ctx { > + u64 sh_addr; > + u64 sh_size; > + u64 sh_offset; > + struct kmap *kmap; > +}; > + > +static int remap_kernel_cb(struct map *map, void *data) > +{ > + struct remap_kernel_ctx *ctx = data; > + > + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); > + map__set_end(map, map__start(map) + ctx->sh_size); > + map__set_pgoff(map, ctx->sh_offset); > + map__set_mapping_type(map, MAPPING_TYPE__DSO); > + return 0; > +} > + > static int dso__process_kernel_symbol(struct dso *dso, struct map *map, > GElf_Sym *sym, GElf_Shdr *shdr, > struct maps *kmaps, struct kmap *kmap, > @@ -1371,22 +1389,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, > * map to the kernel dso. > */ > if (*remap_kernel && dso__kernel(dso) && !kmodule) { > + struct remap_kernel_ctx ctx = { > + .sh_addr = shdr->sh_addr, > + .sh_size = shdr->sh_size, > + .sh_offset = shdr->sh_offset, > + .kmap = kmap > + }; > + > *remap_kernel = false; > - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); > - map__set_end(map, map__start(map) + shdr->sh_size); > - map__set_pgoff(map, shdr->sh_offset); > - map__set_mapping_type(map, MAPPING_TYPE__DSO); > - /* Ensure maps are correctly ordered */ > - if (kmaps) { > - int err; > - struct map *tmp = map__get(map); > - > - maps__remove(kmaps, map); > - err = maps__insert(kmaps, map); > - map__put(tmp); > - if (err) > - return err; > - } > + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); > } > > /* > diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c > index fcaeeddbbb6b..09b93e844887 100644 > --- a/tools/perf/util/symbol.c > +++ b/tools/perf/util/symbol.c > @@ -48,6 +48,13 @@ > #include <symbol/kallsyms.h> > #include <sys/utsname.h> > > +static int map_fixup_cb(struct map *map, void *data __maybe_unused) > +{ > + map__fixup_start(map); > + map__fixup_end(map); > + return 0; > +} > + > static int dso__load_kernel_sym(struct dso *dso, struct map *map); > static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); > static bool symbol__is_idle(const char *name); > @@ -2121,10 +2128,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) > free(kallsyms_allocated_filename); > > if (err > 0 && !dso__is_kcore(dso)) { > + struct maps *kmaps = map__kmaps(map); > + > dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); > dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); > - map__fixup_start(map); > - map__fixup_end(map); > + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); > } > > return err; > @@ -2164,10 +2172,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) > if (err > 0) > pr_debug("Using %s for symbols\n", kallsyms_filename); > if (err > 0 && !dso__is_kcore(dso)) { > + struct maps *kmaps = map__kmaps(map); > + > dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); > dso__set_long_name(dso, machine->mmap_name, false); > - map__fixup_start(map); > - map__fixup_end(map); > + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); > } > > return err; > -- > 2.54.0.563.g4f69b47b94-goog > ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v6 3/6] perf maps: Add maps__mutate_mapping 2026-05-11 7:07 ` Namhyung Kim @ 2026-06-10 10:40 ` James Clark 0 siblings, 0 replies; 183+ messages in thread From: James Clark @ 2026-06-10 10:40 UTC (permalink / raw) To: Namhyung Kim, Ian Rogers Cc: acme, gmx, adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz On 11/05/2026 8:07 am, Namhyung Kim wrote: > On Fri, May 08, 2026 at 01:27:23AM -0700, Ian Rogers wrote: >> During kernel ELF symbol parsing (dso__process_kernel_symbol), proc >> kallsyms image loading (dso__load_kernel_sym, >> dso__load_guest_kernel_sym), and dynamic kernel memory map alignment >> updates (machine__update_kernel_mmap), the loader directly modifies >> live virtual address boundary keys fields on map objects. If these >> boundaries are mutated while the map pointer actively resides inside >> the parent maps cache array list (kmaps) outside of any lock closure, >> an unsafe concurrent window is exposed where parallel worker lookup >> threads (e.g., inside perf top) can mistakenly assume the cache >> remains sorted based on stale parameters, executing binary search >> queries (bsearch) across an unsorted range and triggering lookups >> failure. >> >> Fix this by introducing a thread-safe, atomic transactional framework >> routine maps__mutate_mapping() that explicitly acquires the parent >> maps write semaphore lock, executes an incoming mutation callback >> block to perform the field updates under full lock protection, and >> invalidates the sorted tracking flags prior to releasing the write >> lock. This guarantees absolute atomic synchronization invariants, >> completely closing the concurrent lookup race window. The adjacent >> module alignment pass inside machine__create_kernel_maps() is safely >> preserved as a high-performance lockless pass, as its invocation >> lifecycle bounds remain strictly single-threaded by contract during >> session initialization construction. There is a potential for self >> deadlock if maps__mutate_mapping is called with the lock held, such as >> with maps__for_each_map but this problem also existed with the >> previous remove and insert approaches. >> >> Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") >> Signed-off-by: Ian Rogers <irogers@google.com> >> --- >> tools/perf/util/machine.c | 32 +++++++++++++++++----------- >> tools/perf/util/maps.c | 26 +++++++++++++++++++++++ >> tools/perf/util/maps.h | 2 ++ >> tools/perf/util/symbol-elf.c | 41 +++++++++++++++++++++++------------- >> tools/perf/util/symbol.c | 17 +++++++++++---- >> 5 files changed, 87 insertions(+), 31 deletions(-) >> >> diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c >> index e76f8c86e62a..8d4452c70cb5 100644 >> --- a/tools/perf/util/machine.c >> +++ b/tools/perf/util/machine.c >> @@ -1522,22 +1522,30 @@ static void machine__set_kernel_mmap(struct machine *machine, >> map__set_end(machine->vmlinux_map, ~0ULL); >> } >> >> -static int machine__update_kernel_mmap(struct machine *machine, >> - u64 start, u64 end) >> +struct kernel_mmap_mutation_ctx { >> + u64 start; >> + u64 end; >> +}; >> + >> +static int kernel_mmap_mutate_cb(struct map *map, void *data) >> { >> - struct map *orig, *updated; >> - int err; >> + struct kernel_mmap_mutation_ctx *ctx = data; >> >> - orig = machine->vmlinux_map; >> - updated = map__get(orig); >> + map__set_start(map, ctx->start); >> + map__set_end(map, ctx->end); >> + if (ctx->start == 0 && ctx->end == 0) >> + map__set_end(map, ~0ULL); >> + return 0; >> +} >> >> - machine->vmlinux_map = updated; >> - maps__remove(machine__kernel_maps(machine), orig); >> - machine__set_kernel_mmap(machine, start, end); >> - err = maps__insert(machine__kernel_maps(machine), updated); >> - map__put(orig); >> +static int machine__update_kernel_mmap(struct machine *machine, >> + u64 start, u64 end) >> +{ >> + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; >> >> - return err; >> + return maps__mutate_mapping(machine__kernel_maps(machine), >> + machine->vmlinux_map, >> + kernel_mmap_mutate_cb, &ctx); >> } >> >> int machine__create_kernel_maps(struct machine *machine) >> diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c >> index 81a97ac34077..91345a773aa2 100644 >> --- a/tools/perf/util/maps.c >> +++ b/tools/perf/util/maps.c >> @@ -575,6 +575,32 @@ void maps__remove(struct maps *maps, struct map *map) >> #endif >> } >> >> +int maps__mutate_mapping(struct maps *maps, struct map *map, >> + int (*mutate_cb)(struct map *map, void *data), void *data) >> +{ >> + int err = 0; >> + >> + if (maps) >> + down_write(maps__lock(maps)); >> + >> + err = mutate_cb(map, data); >> + >> + if (maps) { >> + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; >> + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; >> + } >> + >> + if (maps) >> + up_write(maps__lock(maps)); >> + >> +#ifdef HAVE_LIBDW_SUPPORT >> + if (maps) >> + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); >> +#endif >> + >> + return err; >> +} > > Could be simplified by checking 'maps' once. But I'm not sure if > there's a case it doesn't have the maps. > > Thanks, > Namhyung > Hi Ian, The multiple maps checks after mutate_cb() still seem to be in V19. I noticed because I still get the -Wthread-safety-analysis error on x86 clang 15. Not sure why the compiler isn't able to see that maps doesn't change, and no amount of re-arranging made it go away. The only way to fix it is to have the lock and unlock in the same block, but I don't think it looks bad: if (maps) { down_write(maps__lock(maps)); err = mutate_cb(map, data); RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; up_write(maps__lock(maps)); } else { err = mutate_cb(map, data); } Thanks James >> + >> bool maps__empty(struct maps *maps) >> { >> bool res; >> diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h >> index 20c52084ba9e..de74ccbb8a12 100644 >> --- a/tools/perf/util/maps.h >> +++ b/tools/perf/util/maps.h >> @@ -61,6 +61,8 @@ size_t maps__fprintf(struct maps *maps, FILE *fp); >> >> int maps__insert(struct maps *maps, struct map *map); >> void maps__remove(struct maps *maps, struct map *map); >> +int maps__mutate_mapping(struct maps *maps, struct map *map, >> + int (*mutate_cb)(struct map *map, void *data), void *data); >> >> struct map *maps__find(struct maps *maps, u64 addr); >> struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); >> diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c >> index 7afa8a117139..dc4ab58857b3 100644 >> --- a/tools/perf/util/symbol-elf.c >> +++ b/tools/perf/util/symbol-elf.c >> @@ -1341,6 +1341,24 @@ static u64 ref_reloc(struct kmap *kmap) >> void __weak arch__sym_update(struct symbol *s __maybe_unused, >> GElf_Sym *sym __maybe_unused) { } >> >> +struct remap_kernel_ctx { >> + u64 sh_addr; >> + u64 sh_size; >> + u64 sh_offset; >> + struct kmap *kmap; >> +}; >> + >> +static int remap_kernel_cb(struct map *map, void *data) >> +{ >> + struct remap_kernel_ctx *ctx = data; >> + >> + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); >> + map__set_end(map, map__start(map) + ctx->sh_size); >> + map__set_pgoff(map, ctx->sh_offset); >> + map__set_mapping_type(map, MAPPING_TYPE__DSO); >> + return 0; >> +} >> + >> static int dso__process_kernel_symbol(struct dso *dso, struct map *map, >> GElf_Sym *sym, GElf_Shdr *shdr, >> struct maps *kmaps, struct kmap *kmap, >> @@ -1371,22 +1389,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, >> * map to the kernel dso. >> */ >> if (*remap_kernel && dso__kernel(dso) && !kmodule) { >> + struct remap_kernel_ctx ctx = { >> + .sh_addr = shdr->sh_addr, >> + .sh_size = shdr->sh_size, >> + .sh_offset = shdr->sh_offset, >> + .kmap = kmap >> + }; >> + >> *remap_kernel = false; >> - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); >> - map__set_end(map, map__start(map) + shdr->sh_size); >> - map__set_pgoff(map, shdr->sh_offset); >> - map__set_mapping_type(map, MAPPING_TYPE__DSO); >> - /* Ensure maps are correctly ordered */ >> - if (kmaps) { >> - int err; >> - struct map *tmp = map__get(map); >> - >> - maps__remove(kmaps, map); >> - err = maps__insert(kmaps, map); >> - map__put(tmp); >> - if (err) >> - return err; >> - } >> + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); >> } >> >> /* >> diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c >> index fcaeeddbbb6b..09b93e844887 100644 >> --- a/tools/perf/util/symbol.c >> +++ b/tools/perf/util/symbol.c >> @@ -48,6 +48,13 @@ >> #include <symbol/kallsyms.h> >> #include <sys/utsname.h> >> >> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) >> +{ >> + map__fixup_start(map); >> + map__fixup_end(map); >> + return 0; >> +} >> + >> static int dso__load_kernel_sym(struct dso *dso, struct map *map); >> static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); >> static bool symbol__is_idle(const char *name); >> @@ -2121,10 +2128,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) >> free(kallsyms_allocated_filename); >> >> if (err > 0 && !dso__is_kcore(dso)) { >> + struct maps *kmaps = map__kmaps(map); >> + >> dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); >> dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); >> - map__fixup_start(map); >> - map__fixup_end(map); >> + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); >> } >> >> return err; >> @@ -2164,10 +2172,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) >> if (err > 0) >> pr_debug("Using %s for symbols\n", kallsyms_filename); >> if (err > 0 && !dso__is_kcore(dso)) { >> + struct maps *kmaps = map__kmaps(map); >> + >> dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); >> dso__set_long_name(dso, machine->mmap_name, false); >> - map__fixup_start(map); >> - map__fixup_end(map); >> + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); >> } >> >> return err; >> -- >> 2.54.0.563.g4f69b47b94-goog >> ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v6 4/6] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (2 preceding siblings ...) 2026-05-08 8:27 ` [PATCH v6 3/6] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-05-08 8:27 ` Ian Rogers 2026-05-08 21:22 ` sashiko-bot 2026-05-11 7:32 ` Namhyung Kim 2026-05-08 8:27 ` [PATCH v6 5/6] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 6 siblings, 2 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-08 8:27 UTC (permalink / raw) To: irogers, acme, gmx, james.clark, namhyung Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. Events carrying virtual memory layouts are conservatively remap-processed or dropped, while zero-address-risk lifecycle metadata records (such as namespaces, cgroups, and BPF program info) are intentionally delegated to preserve comprehensive downstream trace tool analysis compatibility. The ASLR tracking tool virtualizes process and machine namespaces using 'struct machines' to safely isolate host mappings from unprivileged KVM guest address spaces. Memory space layouts are tracked globally per process context to ensure linear, continuous space allocations across successive mapping runs. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- v6: Enforce strict command-line validation mutual exclusivity between --aslr and --convert-callchain to prevent silent unwind failures. Secure mmap.pgoff unconditionally for all host and guest kernel text mapping regions to completely prevent active KASLR load deltas leakage. Conservatively drop PERF_RECORD_TEXT_POKE events completely via a local static drop stub to prevent absolute 64-bit kernel virtual pointer immediate operands leaks. Inject explicit array-end bounds validation check blocks before consuming trailing PERF_CONTEXT_USER_DEFERRED callchain cookies to eliminate out-of-bounds reads and parser desynchronization faults. Simplify ASLR mapping remap logic. Ensure that encountering a PERF_CONTEXT_USER_DEFERRED context marker explicitly updates cpumode. v5: Add machine to remap addresss key so that it is guest/host safe. Add 'first_kernel_mapping' tracking guard inside aslr.c to rewrite the core kernel pgoff virtual address while safely protecting module file offsets from corruption. Clean up breakpoint address (bp_addr) memory scrubbing by executing the scrubbing loop directly at core session initialization startup level, natively securing both file headers and streaming pipe channels while removing redundant runtime tool wrapper interception hooks layers. v4: Scrub bp_addr from headers/pipe synthesis attributes. Remove kernel mmap pgoff mathematical delta adjustment leaks to maintain secure base obfuscation bounds. Harden guest space contexts mapping loops, correct ksymbol map base invariants tracking, and plug tail-word padding heap leakage vectors in user stacks and AUX payloads. v3: Combine split-map fixes, guest namespaces, bounds checks, OOM rollbacks, hot path optimization, safe dso references, and I/O stream error handling from v3/v4 development. Drop raw auxtrace events. Fix thread reference leaks in event handlers. Fix 32-bit truncation bug in hashmaps using u64* values. Prevent leaking uninitialized heap memory by zeroing copy buffer. Correct bitmask checks for branch stack flags. Avoid PMU configuration corruption. v2: First review feedback adjustments. --- tools/perf/builtin-inject.c | 36 +- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1036 +++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 10 + 4 files changed, 1082 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6ab20df358c4..51dcf248b653 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -123,6 +124,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -304,6 +306,8 @@ static int perf_event__repipe(const struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } + + static int perf_event__drop(const struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, @@ -2459,6 +2463,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2565,6 +2571,8 @@ int cmd_inject(int argc, const char **argv) " instance has a subdir"), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2572,6 +2580,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2592,6 +2601,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2685,18 +2699,36 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2790,6 +2822,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 70cc91d00804..65b96f3b87e2 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..09b7f2f8fb85 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,1036 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <errno.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->machine ^ (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(aslr_thread->maps); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx " + "in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + u64 *pmax = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(aslr_thread->maps); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL) ? kernel_pid : aslr_thread->pid_; + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + remap_key.dso = map__dso(al.map); + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start - pgoff; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + remap_addr = *remapped_invariant_ptr + (al.map ? map__pgoff(al.map) : pgoff); + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { + if (calculated_max > *pmax) + *pmax = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { + /* Current max allocated mmap address within the process. */ + remap_addr = *pmax; + + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + if (remap_addr + len > *pmax) + *pmax = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL) ? + kernel_space_start : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + pmax = malloc(sizeof(u64)); + if (!tk || !pmax) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + *pmax = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, pmax, HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(pmax); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL) { + if (al.map) + *new_remap_val = remap_addr - (start - map__start(al.map)) - map__pgoff(al.map); + else + *new_remap_val = remap_addr; + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + do { \ + if (i + (required_i) > max_i || j + (required_j) > max_j) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + } while (0) + +#define COPY_U64() \ + do { \ + CHECK_BOUNDS(1, 1); \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + CHECK_BOUNDS(1, 1); \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + + for (u64 cntr = 0; cntr < nr; cntr++) { + CHECK_BOUNDS(1, 1); + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + CHECK_BOUNDS(1, 1); + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + CHECK_BOUNDS(1, 1); + nr = out_array[j++] = in_array[i++]; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + + COPY_U64(); /* abi */ + abi = out_array[j-1]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_user); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + CHECK_BOUNDS(1, 1); + size = out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + + COPY_U64(); /* dyn_size */ + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + + COPY_U64(); /* abi */ + abi = out_array[j-1]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + CHECK_BOUNDS(1, 1); + size = out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; +} + +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + + +static int aslr_tool__process_attr(const struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + memcpy(&new_event->attr, &event->attr, event->attr.header.size); + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + + return delegate->attr(delegate, new_event, pevlist); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min(n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + aslr->tool.tool.attr = aslr_tool__process_attr; + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ + /* auxtrace - no virtual addresses. */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..ea984d82681f --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +struct perf_tool; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.563.g4f69b47b94-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v6 4/6] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-08 8:27 ` [PATCH v6 4/6] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-08 21:22 ` sashiko-bot 2026-05-11 7:32 ` Namhyung Kim 1 sibling, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-08 21:22 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] The sample repacking logic silently drops unknown fields without clearing their corresponding UAPI presence flags, corrupting downstream parsing. - [High] Missing PERF_RECORD_MISC_GUEST_KERNEL checks in mapping and remapping logic break ASLR obfuscation for guest kernel samples. -- commit e5353f445ca316e60de67b84bb7594a8aa6daa49 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses This commit introduces a new ASLR obfuscation tool to perf inject that remaps or drops virtual addresses to prevent ASLR leaks when sharing perf.data files. It uses a delegate tool architecture to intercept and sanitize events and samples. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..09b7f2f8fb850 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, > + u64 addr) > +{ [ ... ] > + key.machine = maps__machine(aslr_thread->maps); > + key.dso = map__dso(al.map); > + key.invariant = map__start(al.map) - map__pgoff(al.map); > + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; Does this also need to check for PERF_RECORD_MISC_GUEST_KERNEL? If a guest kernel sample interrupts a user process, missing this check might cause key.pid to be assigned aslr_thread->pid_ instead of kernel_pid (-1). Could this mismatch prevent hashmap__find() from resolving the mapping? [ ... ] > +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, u64 start, > + u64 len, u64 pgoff) > +{ [ ... ] > + /* The key to look up an incoming address to the outgoing value. */ > + addr_location__init(&al); > + remap_key.machine = maps__machine(aslr_thread->maps); > + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL) ? kernel_pid : aslr_thread->pid_; Similarly here, if cpumode is PERF_RECORD_MISC_GUEST_KERNEL, shouldn't remap_key.pid also be set to kernel_pid? [ ... ] > + /* No mmap, create an entry from the top address. */ > + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { [ ... ] > + } else { > + /* First address of the process, allocate key and first top address. */ > + struct top_addresses_key *tk; > + > + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL) ? > + kernel_space_start : user_space_start; Would guest kernel maps erroneously be allocated in the user_space_start range here instead of kernel_space_start due to the missing check for PERF_RECORD_MISC_GUEST_KERNEL? [ ... ] > +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, > + struct perf_sample *sample, > + struct evsel *evsel, struct machine *machine) > +{ [ ... ] > + if (sample_type & PERF_SAMPLE_AUX) { > + u64 size; > + > + CHECK_BOUNDS(1, 1); > + size = out_array[j++] = in_array[i++]; > + if (size > 0) { > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > + > + if (u64_words > max_i - i || u64_words > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], size); > + if (size % 8) { > + size_t pad = 8 - (size % 8); > + > + memset(((char *)&out_array[j]) + size, 0, pad); > + } > + i += u64_words; > + j += u64_words; > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping aux sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + > + if (evsel__is_offcpu_event(evsel)) { > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + > + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); If the sample contains unknown UAPI fields (for instance, when processing a perf.data file generated by a newer kernel), they appear to be silently dropped from the payload during this repacking loop. However, since unrecognized bits aren't cleared from evsel->core.attr, will downstream parsers encounter missing fields while expecting them to be present? Could this lead to reading misaligned data or triggering -EFAULT parsing failures when analyzing the resulting trace? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260508082726.2795191-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v6 4/6] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-08 8:27 ` [PATCH v6 4/6] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-08 21:22 ` sashiko-bot @ 2026-05-11 7:32 ` Namhyung Kim 1 sibling, 0 replies; 183+ messages in thread From: Namhyung Kim @ 2026-05-11 7:32 UTC (permalink / raw) To: Ian Rogers Cc: acme, gmx, james.clark, adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Fri, May 08, 2026 at 01:27:24AM -0700, Ian Rogers wrote: > If perf.data files are taken from one machine to another they may > leak virtual addresses and so weaken ASLR on the machine they are > coming from. Add an aslr option for perf inject that remaps all > virtual addresses, or drops data/events, so that the virtual address > information isn't leaked. > > Events carrying virtual memory layouts are conservatively remap-processed > or dropped, while zero-address-risk lifecycle metadata records (such as > namespaces, cgroups, and BPF program info) are intentionally delegated > to preserve comprehensive downstream trace tool analysis compatibility. > > The ASLR tracking tool virtualizes process and machine namespaces using > 'struct machines' to safely isolate host mappings from unprivileged KVM guest > address spaces. Memory space layouts are tracked globally per process context to > ensure linear, continuous space allocations across successive mapping runs. > > To remain strictly conservative and guarantee security, the tool scrubs > breakpoint addresses (bp_addr) from all synthesized stream headers, completely > drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer > operands leaks, and drops unsupported complex payloads (such as user register > stacks, raw tracepoints, and hardware AUX tracing frames). > > Assisted-by: Gemini-CLI:Google Gemini 3 > Signed-off-by: Ian Rogers <irogers@google.com> > Co-developed-by: Gabriel Marin <gmx@google.com> > Signed-off-by: Gabriel Marin <gmx@google.com> > --- > v6: Enforce strict command-line validation mutual exclusivity between > --aslr and --convert-callchain to prevent silent unwind failures. > Secure mmap.pgoff unconditionally for all host and guest kernel text > mapping regions to completely prevent active KASLR load deltas leakage. > Conservatively drop PERF_RECORD_TEXT_POKE events completely via a local > static drop stub to prevent absolute 64-bit kernel virtual pointer immediate > operands leaks. Inject explicit array-end bounds validation check blocks > before consuming trailing PERF_CONTEXT_USER_DEFERRED callchain cookies > to eliminate out-of-bounds reads and parser desynchronization faults. > Simplify ASLR mapping remap logic. Ensure that encountering a > PERF_CONTEXT_USER_DEFERRED context marker explicitly updates cpumode. > > v5: Add machine to remap addresss key so that it is guest/host > safe. Add 'first_kernel_mapping' tracking guard inside aslr.c to > rewrite the core kernel pgoff virtual address while safely > protecting module file offsets from corruption. Clean up > breakpoint address (bp_addr) memory scrubbing by executing the > scrubbing loop directly at core session initialization startup > level, natively securing both file headers and streaming pipe > channels while removing redundant runtime tool wrapper > interception hooks layers. > > v4: Scrub bp_addr from headers/pipe synthesis attributes. Remove > kernel mmap pgoff mathematical delta adjustment leaks to maintain > secure base obfuscation bounds. Harden guest space contexts > mapping loops, correct ksymbol map base invariants tracking, and > plug tail-word padding heap leakage vectors in user stacks and AUX > payloads. > > v3: Combine split-map fixes, guest namespaces, bounds checks, OOM > rollbacks, hot path optimization, safe dso references, and I/O > stream error handling from v3/v4 development. Drop raw auxtrace > events. Fix thread reference leaks in event handlers. Fix 32-bit > truncation bug in hashmaps using u64* values. Prevent leaking > uninitialized heap memory by zeroing copy buffer. Correct bitmask > checks for branch stack flags. Avoid PMU configuration corruption. > > v2: First review feedback adjustments. > --- > tools/perf/builtin-inject.c | 36 +- > tools/perf/util/Build | 1 + > tools/perf/util/aslr.c | 1036 +++++++++++++++++++++++++++++++++++ > tools/perf/util/aslr.h | 10 + > 4 files changed, 1082 insertions(+), 1 deletion(-) > create mode 100644 tools/perf/util/aslr.c > create mode 100644 tools/perf/util/aslr.h > > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index 6ab20df358c4..51dcf248b653 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c > @@ -8,6 +8,7 @@ > */ > #include "builtin.h" > > +#include "util/aslr.h" > #include "util/color.h" > #include "util/dso.h" > #include "util/vdso.h" > @@ -123,6 +124,7 @@ struct perf_inject { > bool in_place_update_dry_run; > bool copy_kcore_dir; > bool convert_callchain; > + bool aslr; > const char *input_name; > struct perf_data output; > u64 bytes_written; > @@ -304,6 +306,8 @@ static int perf_event__repipe(const struct perf_tool *tool, > return perf_event__repipe_synth(tool, event); > } > > + > + Still have unnecessary blank lines. > static int perf_event__drop(const struct perf_tool *tool __maybe_unused, > union perf_event *event __maybe_unused, > struct perf_sample *sample __maybe_unused, > @@ -2459,6 +2463,8 @@ static int __cmd_inject(struct perf_inject *inject) > } > } > > + > + Ditto. > session->header.data_offset = output_data_offset; > session->header.data_size = inject->bytes_written; > perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, > @@ -2565,6 +2571,8 @@ int cmd_inject(int argc, const char **argv) > " instance has a subdir"), > OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, > "Generate callchains using DWARF and drop register/stack data"), > + OPT_BOOLEAN(0, "aslr", &inject.aslr, > + "Remap virtual memory addresses similar to ASLR"), > OPT_END() > }; > const char * const inject_usage[] = { > @@ -2572,6 +2580,7 @@ int cmd_inject(int argc, const char **argv) > NULL > }; > bool ordered_events; > + struct perf_tool *tool = &inject.tool; > > if (!inject.itrace_synth_opts.set) { > /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ > @@ -2592,6 +2601,11 @@ int cmd_inject(int argc, const char **argv) > if (argc) > usage_with_options(inject_usage, options); > > + if (inject.aslr && inject.convert_callchain) { > + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); > + return -EINVAL; > + } > + > if (inject.strip && !inject.itrace_synth_opts.set) { > pr_err("--strip option requires --itrace option\n"); > return -1; > @@ -2685,18 +2699,36 @@ int cmd_inject(int argc, const char **argv) > inject.tool.schedstat_domain = perf_event__repipe_op2_synth; > inject.tool.dont_split_sample_group = true; > inject.tool.merge_deferred_callchains = false; > - inject.session = __perf_session__new(&data, &inject.tool, > + if (inject.aslr) { > + tool = aslr_tool__new(&inject.tool); > + if (!tool) { > + ret = -ENOMEM; > + goto out_close_output; > + } > + } > + inject.session = __perf_session__new(&data, tool, > /*trace_event_repipe=*/inject.output.is_pipe, > /*host_env=*/NULL); > > if (IS_ERR(inject.session)) { > ret = PTR_ERR(inject.session); > + if (inject.aslr) > + aslr_tool__delete(tool); > goto out_close_output; > } > > if (zstd_init(&(inject.session->zstd_data), 0) < 0) > pr_warning("Decompression initialization failed.\n"); > > + if (inject.aslr) { > + struct evsel *evsel; > + > + evlist__for_each_entry(inject.session->evlist, evsel) { > + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) > + evsel->core.attr.bp_addr = 0; > + } > + } > + > /* Save original section info before feature bits change */ > ret = save_section_info(&inject); > if (ret) > @@ -2790,6 +2822,8 @@ int cmd_inject(int argc, const char **argv) > strlist__delete(inject.known_build_ids); > zstd_fini(&(inject.session->zstd_data)); > perf_session__delete(inject.session); > + if (inject.aslr) > + aslr_tool__delete(tool); > out_close_output: > if (!inject.in_place_update) > perf_data__close(&inject.output); > diff --git a/tools/perf/util/Build b/tools/perf/util/Build > index 70cc91d00804..65b96f3b87e2 100644 > --- a/tools/perf/util/Build > +++ b/tools/perf/util/Build > @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o > perf-util-y += addr2line.o > perf-util-y += addr_location.o > perf-util-y += annotate.o > +perf-util-y += aslr.o > perf-util-y += blake2s.o > perf-util-y += block-info.o > perf-util-y += block-range.o > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 000000000000..09b7f2f8fb85 > --- /dev/null > +++ b/tools/perf/util/aslr.c > @@ -0,0 +1,1036 @@ > +// SPDX-License-Identifier: GPL-2.0 > +#include "aslr.h" > + > +#include "addr_location.h" > +#include "debug.h" > +#include "event.h" > +#include "evsel.h" > +#include "machine.h" > +#include "map.h" > +#include "thread.h" > +#include "tool.h" > +#include "session.h" > +#include "data.h" > +#include "dso.h" > + > +#include <internal/lib.h> /* page_size */ > +#include <linux/compiler.h> > +#include <linux/zalloc.h> > +#include <errno.h> > +#include <inttypes.h> > +#include <unistd.h> > + > +/** > + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. > + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. > + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). > + * Calculated as `start - pgoff`. This value remains constant when > + * perf's internal `maps__fixup_overlap_and_insert` splits a map into > + * fragmented VMA pieces due to overlapping events, allowing us to > + * resolve split maps consistently back to the original VMA. > + * @pid: Process ID associated with the mapping. > + */ > +struct remap_addresses_key { > + struct machine *machine; > + struct dso *dso; > + u64 invariant; > + pid_t pid; > +}; > + > +struct aslr_mapping { > + struct list_head node; > + u64 orig_start; > + u64 len; > + u64 remap_start; > +}; > + > +struct aslr_tool { > + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ > + struct delegate_tool tool; > + /** @machines: The machines with the input, not remapped, virtual address layout. */ > + struct machines machines; > + /** @event_copy: Buffer used to create an event to pass to the delegate. */ > + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); > + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ > + struct hashmap remap_addresses; > + /** @top_addresses: mapping from process to max remapped address. */ > + struct hashmap top_addresses; > +}; > + > +static const pid_t kernel_pid = -1; > + > +/* Start remapping user processes from a small non-zero offset. */ > +static const u64 user_space_start = 0x200000; > +static const u64 kernel_space_start = 0xffff800010000000; > + > +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) > +{ > + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; > + > + return (size_t)key->machine ^ (size_t)key->dso ^ key->invariant ^ key->pid; > +} > + > +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) > +{ > + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; > + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; > + > + return key1->machine == key2->machine && > + RC_CHK_EQUAL(key1->dso, key2->dso) && > + key1->invariant == key2->invariant && > + key1->pid == key2->pid; > +} > + > +struct top_addresses_key { > + struct machine *machine; > + pid_t pid; > +}; > + > +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) > +{ > + struct top_addresses_key *key = (struct top_addresses_key *)_key; > + > + return (size_t)key->machine ^ key->pid; > +} > + > +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) > +{ > + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; > + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; > + > + return key1->machine == key2->machine && key1->pid == key2->pid; > +} > + > +static u64 round_up_to_page_size(u64 addr) > +{ > + return (addr + page_size - 1) & ~((u64)page_size - 1); > +} > + > +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, > + u64 addr) > +{ > + struct addr_location al; > + struct remap_addresses_key key; > + u64 *remapped_invariant_ptr = NULL; > + u64 remap_addr = 0; > + u8 effective_cpumode = cpumode; > + > + if (!aslr_thread) > + return 0; /* No thread. */ > + > + addr_location__init(&al); > + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { > + /* > + * If lookup fails with specified cpumode, try fallback to the other space > + * to be robust against bad cpumode in samples. > + */ > + if (cpumode == PERF_RECORD_MISC_KERNEL) > + effective_cpumode = PERF_RECORD_MISC_USER; > + else if (cpumode == PERF_RECORD_MISC_USER) > + effective_cpumode = PERF_RECORD_MISC_KERNEL; > + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) > + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; > + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) > + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > + > + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { > + addr_location__exit(&al); > + return 0; /* No mmap. */ > + } > + } > + > + key.machine = maps__machine(aslr_thread->maps); > + key.dso = map__dso(al.map); > + key.invariant = map__start(al.map) - map__pgoff(al.map); > + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; > + > + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { > + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + > + (addr - map__start(al.map)); > + } else { > + pr_debug("Cannot find a remapped entry for address %lx " > + "in mapping %lx(%lx) for pid=%d\n", > + addr, map__start(al.map), map__size(al.map), key.pid); > + } > + > + addr_location__exit(&al); > + return remap_addr; > +} > + > +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, u64 start, > + u64 len, u64 pgoff) > +{ > + /* Address location for dso lookup. */ > + struct addr_location al; > + /* Original ASLR address based key for the remap table. */ > + struct remap_addresses_key remap_key; > + /* The address in the ASLR sanitized address space less pg_off. */ > + u64 *remapped_invariant_ptr; > + /* Key for the maximum address in a process. */ > + struct top_addresses_key top_addr_key; > + /* Value in top address table. */ > + u64 *pmax = NULL; > + /* Address in ASLR sanitized address space. */ > + u64 remap_addr; > + /* Potentially allocated remap table key. */ > + struct remap_addresses_key *new_remap_key = NULL; > + /* > + * Potentially allocated remap table key. > + * TODO: Avoid allocation necessary for perf 32-bit binary support. > + */ > + u64 *new_remap_val = NULL; > + int err; > + > + if (!aslr_thread) > + return 0; > + > + /* The key to look up an incoming address to the outgoing value. */ > + addr_location__init(&al); > + remap_key.machine = maps__machine(aslr_thread->maps); > + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL) ? kernel_pid : aslr_thread->pid_; > + if (thread__find_map(aslr_thread, cpumode, start, &al)) { > + remap_key.dso = map__dso(al.map); > + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); > + } else { > + remap_key.dso = NULL; > + remap_key.invariant = start - pgoff; > + } > + > + /* The key to look up top allocated address. */ > + top_addr_key.machine = remap_key.machine; > + top_addr_key.pid = remap_key.pid; > + > + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { > + /* Mmap already exists. */ > + u64 calculated_max; > + > + remap_addr = *remapped_invariant_ptr + (al.map ? map__pgoff(al.map) : pgoff); > + calculated_max = remap_addr + len; > + > + /* See if top mapping was expanded. */ > + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { > + if (calculated_max > *pmax) > + *pmax = calculated_max; > + } > + addr_location__exit(&al); > + return remap_addr; > + } > + /* No mmap, create an entry from the top address. */ > + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { > + /* Current max allocated mmap address within the process. */ > + remap_addr = *pmax; > + > + /* Give 1 page gap from current max page. */ > + remap_addr = round_up_to_page_size(remap_addr); > + remap_addr += page_size; > + if (remap_addr + len > *pmax) > + *pmax = remap_addr + len; > + } else { > + /* First address of the process, allocate key and first top address. */ > + struct top_addresses_key *tk; > + > + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL) ? > + kernel_space_start : user_space_start; > + remap_addr = round_up_to_page_size(remap_addr); > + > + tk = malloc(sizeof(*tk)); > + pmax = malloc(sizeof(u64)); > + if (!tk || !pmax) { > + err = -ENOMEM; > + } else { > + *tk = top_addr_key; > + *pmax = remap_addr + len; > + err = hashmap__insert(&aslr->top_addresses, tk, pmax, HASHMAP_ADD, NULL, NULL); > + } > + if (err) { > + errno = -err; > + pr_err("Failure to add ASLR process top address %m\n"); > + free(tk); > + free(pmax); > + addr_location__exit(&al); > + return 0; > + } > + } > + /* Create rmeapping entry. */ > + new_remap_key = malloc(sizeof(*new_remap_key)); > + new_remap_val = malloc(sizeof(u64)); > + if (!new_remap_key || !new_remap_val) { > + err = -ENOMEM; > + } else { > + *new_remap_key = remap_key; > + new_remap_key->dso = dso__get(remap_key.dso); > + if (cpumode == PERF_RECORD_MISC_KERNEL) { > + if (al.map) > + *new_remap_val = remap_addr - (start - map__start(al.map)) - map__pgoff(al.map); A too long line. > + else > + *new_remap_val = remap_addr; > + } else { > + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); > + } > + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); > + if (err) > + dso__put(new_remap_key->dso); > + } > + if (err) { > + errno = -err; > + pr_err("Failure to add ASLR remapping %m\n"); > + free(new_remap_key); > + free(new_remap_val); > + addr_location__exit(&al); > + return 0; > + } > + addr_location__exit(&al); > + return remap_addr; > +} > + > +static int aslr_tool__process_mmap(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + u8 cpumode; > + struct thread *thread; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_mmap(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); > + if (!thread) > + return -ENOMEM; > + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); > + /* Remaps the mmap.start. */ > + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, > + event->mmap.start, > + event->mmap.len, > + event->mmap.pgoff); > + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) > + new_event->mmap.pgoff = new_event->mmap.start; > + err = delegate->mmap(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_mmap2(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + u8 cpumode; > + struct thread *thread; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); > + if (!thread) > + return -ENOMEM; > + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); > + /* Remaps the mmap.start. */ > + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, > + event->mmap2.start, > + event->mmap2.len, > + event->mmap2.pgoff); > + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) > + new_event->mmap2.pgoff = new_event->mmap2.start; > + err = delegate->mmap2(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_comm(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_comm(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + return delegate->comm(delegate, event, sample, machine); > +} > + > +static int aslr_tool__process_fork(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_fork(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + return delegate->fork(delegate, event, sample, machine); > +} > + > +static int aslr_tool__process_exit(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + /* Create the thread, map, etc. in the ASLR before virtual address space. */ > + err = perf_event__process_exit(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + return delegate->exit(delegate, event, sample, machine); > +} > + > +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, > + union perf_event *event __maybe_unused, > + struct perf_sample *sample __maybe_unused, > + struct machine *machine __maybe_unused) > +{ > + /* Drop in case the instruction encodes an ASLR revealing address. */ > + return 0; > +} > + > +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + struct thread *thread; > + struct machine *aslr_machine; > + int err; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); > + if (!thread) > + return -ENOMEM; > + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); > + /* Remaps the ksymbol.start */ > + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, > + PERF_RECORD_MISC_KERNEL, > + event->ksymbol.addr, > + event->ksymbol.len, > + /*pgoff=*/0); > + > + err = delegate->ksymbol(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_event *event, > + struct perf_sample *sample, > + struct evsel *evsel, struct machine *machine) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + int ret; > + u64 sample_type; > + struct thread *thread; > + struct machine *aslr_machine; > + __u64 max_i; > + __u64 max_j; > + union perf_event *new_event; > + struct perf_sample new_sample; > + __u64 *in_array, *out_array; > + u8 cpumode; > + u64 addr; > + size_t i; > + size_t j; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + ret = -EFAULT; > + sample_type = evsel->core.attr.sample_type; > + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); > + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); > + new_event = (union perf_event *)aslr->event_copy; > + cpumode = sample->cpumode; > + i = 0; > + j = 0; > + > + aslr_machine = machines__findnew(&aslr->machines, machine->pid); > + if (!aslr_machine) > + return -ENOMEM; > + > + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); > + > + if (!thread) > + return -ENOMEM; > + > + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) > + goto out_put; > + > + > + Here as well. > + new_event->sample.header = event->sample.header; > + > + in_array = &event->sample.array[0]; > + out_array = &new_event->sample.array[0]; > + > +#define CHECK_BOUNDS(required_i, required_j) \ > + do { \ > + if (i + (required_i) > max_i || j + (required_j) > max_j) { \ > + ret = -EFAULT; \ > + goto out_put; \ > + } \ > + } while (0) > + > +#define COPY_U64() \ > + do { \ > + CHECK_BOUNDS(1, 1); \ > + out_array[j++] = in_array[i++]; \ > + } while (0) > + > +#define REMAP_U64(addr_field) \ > + do { \ > + CHECK_BOUNDS(1, 1); \ > + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ > + i++; \ > + } while (0) > + I'm still not sure if it's a good idea to expose all the details of the sample layout here. It needs to be in sync with evsel__parse_sample() for any future changes. > + if (sample_type & PERF_SAMPLE_IDENTIFIER) > + COPY_U64(); /* id */ > + if (sample_type & PERF_SAMPLE_IP) > + REMAP_U64(sample->ip); > + if (sample_type & PERF_SAMPLE_TID) > + COPY_U64(); /* pid, tid */ > + if (sample_type & PERF_SAMPLE_TIME) > + COPY_U64(); /* time */ > + if (sample_type & PERF_SAMPLE_ADDR) > + REMAP_U64(sample->addr); > + if (sample_type & PERF_SAMPLE_ID) > + COPY_U64(); /* id */ > + if (sample_type & PERF_SAMPLE_STREAM_ID) > + COPY_U64(); /* stream_id */ > + if (sample_type & PERF_SAMPLE_CPU) > + COPY_U64(); /* cpu, res */ > + if (sample_type & PERF_SAMPLE_PERIOD) > + COPY_U64(); /* period */ > + if (sample_type & PERF_SAMPLE_READ) { > + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { > + COPY_U64(); /* value */ > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) > + COPY_U64(); /* time_enabled */ > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) > + COPY_U64(); /* time_running */ > + if (evsel->core.attr.read_format & PERF_FORMAT_ID) > + COPY_U64(); /* id */ > + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) > + COPY_U64(); /* lost */ > + } else { > + u64 nr; > + > + CHECK_BOUNDS(1, 1); > + nr = out_array[j++] = in_array[i++]; > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) > + COPY_U64(); /* time_enabled */ > + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) > + COPY_U64(); /* time_running */ > + for (u64 cntr = 0; cntr < nr; cntr++) { > + COPY_U64(); /* value */ > + if (evsel->core.attr.read_format & PERF_FORMAT_ID) > + COPY_U64(); /* id */ > + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) > + COPY_U64(); /* lost */ > + } > + } > + } > + if (sample_type & PERF_SAMPLE_CALLCHAIN) { > + u64 nr; > + > + CHECK_BOUNDS(1, 1); > + nr = out_array[j++] = in_array[i++]; > + > + for (u64 cntr = 0; cntr < nr; cntr++) { > + CHECK_BOUNDS(1, 1); > + addr = in_array[i++]; > + if (addr >= PERF_CONTEXT_MAX) { > + out_array[j++] = addr; > + switch (addr) { > + case PERF_CONTEXT_HV: > + cpumode = PERF_RECORD_MISC_HYPERVISOR; > + break; > + case PERF_CONTEXT_KERNEL: > + cpumode = PERF_RECORD_MISC_KERNEL; > + break; > + case PERF_CONTEXT_USER: > + cpumode = PERF_RECORD_MISC_USER; > + break; > + case PERF_CONTEXT_GUEST: > + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > + break; > + case PERF_CONTEXT_GUEST_KERNEL: > + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; > + break; > + case PERF_CONTEXT_GUEST_USER: > + cpumode = PERF_RECORD_MISC_GUEST_USER; > + break; > + case PERF_CONTEXT_USER_DEFERRED: > + if (cntr + 1 >= nr) { > + pr_debug("Truncated callchain deferred cookie context\n"); > + ret = 0; > + goto out_put; > + } > + /* > + * Immediately followed by a 64-bit > + * stitching cookie. Skip/Copy it! > + */ > + CHECK_BOUNDS(1, 1); > + out_array[j++] = in_array[i++]; > + cntr++; > + cpumode = PERF_RECORD_MISC_USER; > + break; > + default: > + pr_debug("invalid callchain context: %"PRIx64"\n", addr); > + ret = 0; > + goto out_put; > + } > + continue; > + } > + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); > + } > + } > + if (sample_type & PERF_SAMPLE_RAW) { > + size_t bytes = sizeof(u32) + sample->raw_size; > + size_t u64_words = (bytes + 7) / 8; > + > + if (i + u64_words > max_i || j + u64_words > max_j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], bytes); > + i += u64_words; > + j += u64_words; > + /* > + * TODO: certain raw samples can be remapped, such as > + * tracepoints by examining their fields. > + */ > + pr_debug("Dropping raw samples as possible ASLR leak\n"); > + ret = 0; > + goto out_put; There's not much point to do it when we drop all samples as the sample type flags will be the same for an evsel. Maybe better to check if it has unsupported flags earlier. > + } > + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { > + u64 nr; > + > + CHECK_BOUNDS(1, 1); > + nr = out_array[j++] = in_array[i++]; > + > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) > + COPY_U64(); /* hw_idx */ > + > + if (nr > (ULLONG_MAX / 3)) { > + ret = -EFAULT; > + goto out_put; > + } > + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + for (u64 cntr = 0; cntr < nr; cntr++) { > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > + sample->cpumode, > + in_array[i++]); /* from */ > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > + sample->cpumode, > + in_array[i++]); /* to */ > + out_array[j++] = in_array[i++]; /* flags */ > + } > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { > + if (nr > max_i - i || nr > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > + i += nr; > + j += nr; > + /* TODO: confirm branch counters don't leak ASLR information. */ > + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + } > + if (sample_type & PERF_SAMPLE_REGS_USER) { > + u64 abi; > + > + COPY_U64(); /* abi */ > + abi = out_array[j-1]; > + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { > + u64 nr = hweight64(evsel->core.attr.sample_regs_user); > + > + if (nr > max_i - i || nr > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > + i += nr; > + j += nr; > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping regs user sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + if (sample_type & PERF_SAMPLE_STACK_USER) { > + u64 size; > + > + CHECK_BOUNDS(1, 1); > + size = out_array[j++] = in_array[i++]; > + if (size > 0) { > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > + > + if (u64_words > max_i - i || u64_words > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], size); > + if (size % 8) { > + size_t pad = 8 - (size % 8); > + > + memset(((char *)&out_array[j]) + size, 0, pad); > + } > + i += u64_words; > + j += u64_words; > + > + COPY_U64(); /* dyn_size */ > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping stack user sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) > + COPY_U64(); /* perf_sample_weight */ > + if (sample_type & PERF_SAMPLE_DATA_SRC) > + COPY_U64(); /* data_src */ > + if (sample_type & PERF_SAMPLE_TRANSACTION) > + COPY_U64(); /* transaction */ > + if (sample_type & PERF_SAMPLE_REGS_INTR) { > + u64 abi; > + > + COPY_U64(); /* abi */ > + abi = out_array[j-1]; > + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { > + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); > + > + if (nr > max_i - i || nr > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > + i += nr; > + j += nr; > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { > + COPY_U64(); /* phys_addr */ > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping physical address sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + if (sample_type & PERF_SAMPLE_CGROUP) > + COPY_U64(); /* cgroup */ > + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) > + COPY_U64(); /* data_page_size */ > + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) > + COPY_U64(); /* code_page_size */ > + > + if (sample_type & PERF_SAMPLE_AUX) { > + u64 size; > + > + CHECK_BOUNDS(1, 1); > + size = out_array[j++] = in_array[i++]; > + if (size > 0) { > + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); > + > + if (u64_words > max_i - i || u64_words > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], size); > + if (size % 8) { > + size_t pad = 8 - (size % 8); > + > + memset(((char *)&out_array[j]) + size, 0, pad); > + } > + i += u64_words; > + j += u64_words; > + } > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping aux sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + > + if (evsel__is_offcpu_event(evsel)) { > + /* TODO: can this be less conservative? */ > + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } > + > + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); > + > + perf_sample__init(&new_sample, /*all=*/ true); > + ret = evsel__parse_sample(evsel, new_event, &new_sample); > + if (ret) { > + perf_sample__exit(&new_sample); > + goto out_put; > + } > + > + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); > + perf_sample__exit(&new_sample); > + > +out_put: > + thread__put(thread); > + return ret; > +} > + > +#undef CHECK_BOUNDS > +#undef COPY_U64 > +#undef REMAP_U64 > + > + > +static int aslr_tool__process_attr(const struct perf_tool *tool, > + union perf_event *event, > + struct evlist **pevlist) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + > + memcpy(&new_event->attr, &event->attr, event->attr.header.size); > + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) > + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ > + > + return delegate->attr(delegate, new_event, pevlist); > +} > + > +static int skipn(int fd, off_t n) > +{ > + char buf[4096]; > + ssize_t ret; > + > + while (n > 0) { > + ret = read(fd, buf, min(n, (off_t)sizeof(buf))); > + if (ret <= 0) > + return ret; > + n -= ret; > + } > + > + return 0; > +} > + > +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, > + struct perf_session *session, > + union perf_event *event) > +{ > + if (perf_data__is_pipe(session->data)) { > + /* Copy behavior of the stub by reading all pipe data. */ > + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); > + > + if (err < 0) > + return err; > + } > + return event->auxtrace.size; > +} > + > +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, > + struct perf_session *session __maybe_unused, > + union perf_event *event __maybe_unused) > +{ > + return 0; > +} > + > +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, > + struct perf_session *session __maybe_unused, > + union perf_event *event __maybe_unused) > +{ > + return 0; > +} > + > +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) > +{ > + delegate_tool__init(&aslr->tool, delegate); > + aslr->tool.tool.ordered_events = true; > + > + machines__init(&aslr->machines); > + > + hashmap__init(&aslr->remap_addresses, > + remap_addresses__hash, remap_addresses__equal, > + /*ctx=*/NULL); > + hashmap__init(&aslr->top_addresses, > + top_addresses__hash, top_addresses__equal, > + /*ctx=*/NULL); > + > + aslr->tool.tool.sample = aslr_tool__process_sample; > + /* read - reads a counter, okay to delegate. */ > + aslr->tool.tool.mmap = aslr_tool__process_mmap; > + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; > + aslr->tool.tool.comm = aslr_tool__process_comm; > + aslr->tool.tool.fork = aslr_tool__process_fork; > + aslr->tool.tool.exit = aslr_tool__process_exit; > + /* namesspaces, cgroup, lost, lost_sample, aux, */ > + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ > + /* - no virtual addresses. */ > + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; > + /* bpf - no virtual address. */ > + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; > + aslr->tool.tool.attr = aslr_tool__process_attr; > + /* event_update, tracing_data, finished_round, build_id, id_index, */ > + /* event_update, tracing_data, finished_round, build_id, id_index, */ The same line appears twice. > + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ > + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ > + /* auxtrace - no virtual addresses. */ The auxtrace related ones are listed but handled differently? Thanks, Namhyung > + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; > + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; > + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; > +} > + > +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) > +{ > + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); > + > + if (!aslr) > + return NULL; > + > + aslr_tool__init(aslr, delegate); > + return &aslr->tool.tool; > +} > + > +void aslr_tool__delete(struct perf_tool *tool) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct hashmap_entry *cur; > + size_t bkt; > + > + if (!tool) > + return; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + > + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { > + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; > + > + if (key) > + dso__put(key->dso); > + zfree(&cur->pkey); > + zfree(&cur->pvalue); > + } > + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { > + zfree(&cur->pkey); > + zfree(&cur->pvalue); > + } > + > + hashmap__clear(&aslr->remap_addresses); > + hashmap__clear(&aslr->top_addresses); > + machines__destroy_kernel_maps(&aslr->machines); > + machines__exit(&aslr->machines); > + free(aslr); > +} > diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h > new file mode 100644 > index 000000000000..ea984d82681f > --- /dev/null > +++ b/tools/perf/util/aslr.h > @@ -0,0 +1,10 @@ > +/* SPDX-License-Identifier: GPL-2.0 */ > +#ifndef __PERF_ASLR_H > +#define __PERF_ASLR_H > + > +struct perf_tool; > + > +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); > +void aslr_tool__delete(struct perf_tool *aslr); > + > +#endif /* __PERF_ASLR_H */ > -- > 2.54.0.563.g4f69b47b94-goog > ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v6 5/6] perf test: Add inject ASLR test 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (3 preceding siblings ...) 2026-05-08 8:27 ` [PATCH v6 4/6] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-08 8:27 ` Ian Rogers 2026-05-08 13:29 ` James Clark 2026-05-11 7:34 ` Namhyung Kim 2026-05-08 8:27 ` [PATCH v6 6/6] perf aslr: Strip sample registers Ian Rogers 2026-05-19 8:08 ` [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 6 siblings, 2 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-08 8:27 UTC (permalink / raw) To: irogers, acme, gmx, james.clark, namhyung Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. - Callchain address remapping. - Consistency of `perf report` output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> --- v6: Refactor kernel-space sampling test cases to utilize a dedicated system-call intensive VFS dd workload (kprog) instead of purely userspace-bound tight loops, guaranteeing high-density kernel privilege state sampling streams and eliminating intermittent execution flakiness dropouts. v5: Harden test suite verification pipelines by upgrading report checks to strict sorted line-by-line diff comparisons to accommodate remapped pointer shifts. Append || true fallback operators to grep-v filtering pipelines to prevent the shell test from spuriously aborting under set -o pipefail on empty inputs, ensuring graceful failure checks trigger correctly. v4: Reorder set -e/pipefail to prevent temp file leakage in root directory on unprivileged record failures when run as root. Ensure grep report filters have || true suffixes to avoid aborts under pipefail. Add comprehensive pipe stdout injection attributes validation case. v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, callchain empty data asserts, baseline sample verification, and grep report abort protections. Reorder set -e/pipefail to prevent stack leaks in mktemp failures. v2: Add sum comparison for kernel overhead and 32-bit math corrections. Add awk with gsub for trailing dots and brackets normalizations. Trap EXIT, prevent race conditions and avoid hardcoded perf binary. --- tools/perf/tests/shell/inject_aslr.sh | 460 ++++++++++++++++++++++++++ 1 file changed, 460 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..6363a0f69d2b --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,460 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) +data="${temp_dir}/perf.data" +data2="${temp_dir}/perf.data2" + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + cleanup + exit 1 +} + +trap cleanup EXIT +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.563.g4f69b47b94-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v6 5/6] perf test: Add inject ASLR test 2026-05-08 8:27 ` [PATCH v6 5/6] perf test: Add inject ASLR test Ian Rogers @ 2026-05-08 13:29 ` James Clark 2026-05-08 14:29 ` James Clark 2026-05-11 7:34 ` Namhyung Kim 1 sibling, 1 reply; 183+ messages in thread From: James Clark @ 2026-05-08 13:29 UTC (permalink / raw) To: Ian Rogers Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, gmx, namhyung On 08/05/2026 9:27 am, Ian Rogers wrote: > Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` > feature. The test covers: > - Basic address remapping for user space samples. > - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. > - Callchain address remapping. > - Consistency of `perf report` output before and after injection. > - Pipe mode report consistency. > - Dropping of samples that leak ASLR info (physical addresses). > - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload > to guarantee continuous timer interrupts sampling flow inside kernel privilege states). > - Kernel report consistency with address normalization. > > The test suite is hardened with global 'set -o pipefail' assertions to catch > pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, > and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout > streams. > > Assisted-by: Gemini-CLI:Google Gemini 3 > Signed-off-by: Ian Rogers <irogers@google.com> > --- > v6: Refactor kernel-space sampling test cases to utilize a dedicated > system-call intensive VFS dd workload (kprog) instead of purely > userspace-bound tight loops, guaranteeing high-density kernel > privilege state sampling streams and eliminating intermittent > execution flakiness dropouts. > Hi Ian, V5 passed on X86, but now I get this test failing about 50% of the time with output like: Test user register stripping User registers stripping test [Failed - report parsing differs] Showing first 20 lines of diff: --- /tmp/perf-test-aslr.ssH9urcfri/report_regs1.clean 2026-05-08 14:14:02.127298207 +0100 +++ /tmp/perf-test-aslr.ssH9urcfri/report_regs2.clean 2026-05-08 14:14:02.129298219 +0100 @@ -30,8 +30,8 @@ 0.02% perf ld-linux-x86-64.so.2 [.] mmap64 0.02% perf-noploop [kernel.kallsyms] [k] kmem_cache_free 0.02% perf-noploop [kernel.kallsyms] [k] nohz_balancer_kick - 0.02% perf-noploop [kernel.kallsyms] [k] pvclock_gtod_notify 0.02% perf-noploop [kernel.kallsyms] [k] try_to_wake_up + 0.02% perf-noploop [kvm] [k] pvclock_gtod_notify 0.02% perf-noploop libc.so.6 [.] __cxa_finalize 0.04% perf ld-linux-x86-64.so.2 [.] strcmp 0.05% perf libLLVM-15.so.1 [.] llvm::StringMapImpl::LookupBucketFor(llvm::StringRef) ---- end ---- or: Test user register stripping User registers stripping test [Failed - report parsing differs] Showing first 20 lines of diff: --- /tmp/perf-test-aslr.NoDUUXtHyh/report_regs1.clean 2026-05-08 14:05:31.109246491 +0100 +++ /tmp/perf-test-aslr.NoDUUXtHyh/report_regs2.clean 2026-05-08 14:05:31.111246503 +0100 @@ -2,8 +2,8 @@ 0.01% perf [kernel.kallsyms] [k] find_mergeable_anon_vma 0.01% perf [kernel.kallsyms] [k] finish_fault 0.01% perf [kernel.kallsyms] [k] pte_offset_map_rw_nolock + 0.02% perf [amdgpu] [k] amdgpu_device_rreg 0.02% perf [kernel.kallsyms] [k] __alloc_frozen_pages_noprof - 0.02% perf [kernel.kallsyms] [k] amdgpu_device_rreg 0.02% perf [kernel.kallsyms] [k] __build_id_parse.isra.0 0.02% perf [kernel.kallsyms] [k] filemap_get_entry 0.02% perf [kernel.kallsyms] [k] filemap_map_pages ---- end ---- And on Arm I get a hang/infinite loop every time in "Test kernel ASLR remapping". Looks like it could be related to the changes in V6 as I didn't see it on V5: #0 __read_once_size (size=4, res=0xffffe56c64a0, p=0xaaaaeaedbab8) at linux/tools/include/linux/compiler.h:180 #1 atomic_read (v=0xaaaaeaedbab8) at linux/tools/include/asm-generic/atomic-gcc.h:26 #2 0x0000aaaaaf65cd6c in refcount_read (r=0xaaaaeaedbab8) at linux/tools/include/linux/refcount.h:70 #3 0x0000aaaaaf65d9dc in check_invariants (maps=0xaaaae7e3b480) at util/maps.c:114 #4 0x0000aaaaaf65eef8 in maps__insert (maps=0xaaaae7e3b480, map=0xaaaaec2ccf10) at util/maps.c:536 #5 0x0000aaaaaf62a028 in maps__split_kallsyms (kmaps=0xaaaae7e3b480, dso=0xaaaae7e3f910, delta=1879048192, initial_map=0xaaaae7e3fab0) at util/symbol.c:986 #6 0x0000aaaaaf62b550 in __dso__load_kallsyms (dso=0xaaaae7e3f910, filename=0xaaaae7e55200 "/proc/kallsyms", map=0xaaaae7e3fab0, no_kcore=false) at util/symbol.c:1530 #7 0x0000aaaaaf62b5bc in dso__load_kallsyms (dso=0xaaaae7e3f910, filename=0xaaaae7e55200 "/proc/kallsyms", map=0xaaaae7e3fab0) at util/symbol.c:1536 #8 0x0000aaaaaf62cbc0 in dso__load_kernel_sym (dso=0xaaaae7e3f910, map=0xaaaae7e3fab0) at util/symbol.c:2125 #9 0x0000aaaaaf62bc5c in dso__load (dso=0xaaaae7e3f910, map=0xaaaae7e3fab0) at util/symbol.c:1721 #10 0x0000aaaaaf65b98c in map__load (map=0xaaaae7e3fab0) at util/map.c:351 #11 0x0000aaaaaf5e43cc in thread__find_map (thread=0xaaaae7e443b0, cpumode=1 '\001', addr=18446603336494207932, al=0xffffe56c8c28) at util/event.c:744 #12 0x0000aaaaaf5e4810 in machine__resolve (machine=0xaaaae7e3bee0, al=0xffffe56c8c28, sample=0xffffe56c8df0) at util/event.c:818 #13 0x0000aaaaaf41d850 in process_sample_event (tool=0xffffe56c93d0, event=0xffffb1091ec8, sample=0xffffe56c8df0, evsel=0xaaaae7e3b580, machine=0xaaaae7e3bee0) at builtin-script.c:2686 #14 0x0000aaaaaf6668f4 in evlist__deliver_sample (evlist=0xaaaae7e3c550, tool=0xffffe56c93d0, event=0xffffb1091ec8, sample=0xffffe56c8df0, evsel=0xaaaae7e3b580, machine=0xaaaae7e3bee0) at util/session.c:1335 #15 0x0000aaaaaf667000 in machines__deliver_event (machines=0xaaaae7e3bee0, evlist=0xaaaae7e3c550, event=0xffffb1091ec8, sample=0xffffe56c8df0, tool=0xffffe56c93d0, file_offset=3784, file_path=0xaaaae7e3b540 "/tmp/perf-test-aslr.J1XB8pvpFy/perf.data2.kernel.FA0Uvd") at util/session.c:1502 #16 0x0000aaaaaf667538 in perf_session__deliver_event (session=0xaaaae7e3bca0, event=0xffffb1091ec8, tool=0xffffe56c93d0, file_offset=3784, file_path=0xaaaae7e3b540 "/tmp/perf-test-aslr.J1XB8pvpFy/perf.data2.kernel.FA0Uvd") at util/session.c:1593 #17 0x0000aaaaaf662bbc in ordered_events__deliver_event (oe=0xaaaae7e3c460, event=0xaaaae7e44740) at util/session.c:134 #18 0x0000aaaaaf672c98 in do_flush (oe=0xaaaae7e3c460, show_progress=true) at util/ordered-events.c:245 #19 0x0000aaaaaf673048 in __ordered_events__flush (oe=0xaaaae7e3c460, how=OE_FLUSH__FINAL, timestamp=0) at util/ordered-events.c:324 #20 0x0000aaaaaf673154 in ordered_events__flush (oe=0xaaaae7e3c460, how=OE_FLUSH__FINAL) at util/ordered-events.c:342 #21 0x0000aaaaaf669e54 in __perf_session__process_events (session=0xaaaae7e3bca0) at util/session.c:2508 #22 0x0000aaaaaf66a790 in perf_session__process_events (session=0xaaaae7e3bca0) at util/session.c:2675 #23 0x0000aaaaaf41f59c in __cmd_script (script=0xffffe56c93d0) at builtin-script.c:3241 #24 0x0000aaaaaf4242b0 in cmd_script (argc=0, argv=0xffffe56cb370) at builtin-script.c:4586 #25 0x0000aaaaaf4a03f8 in run_builtin (p=0xaaaaafa14e60 <commands+480>, argc=3, argv=0xffffe56cb370) at perf.c:348 #26 0x0000aaaaaf4a066c in handle_internal_command (argc=3, argv=0xffffe56cb370) at perf.c:398 #27 0x0000aaaaaf4a0824 in run_argv (argcp=0xffffe56cb1ac, argv=0xffffe56cb1a0) at perf.c:442 #28 0x0000aaaaaf4a0b4c in main (argc=3, argv=0xffffe56cb370) at perf.c:549 ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v6 5/6] perf test: Add inject ASLR test 2026-05-08 13:29 ` James Clark @ 2026-05-08 14:29 ` James Clark 0 siblings, 0 replies; 183+ messages in thread From: James Clark @ 2026-05-08 14:29 UTC (permalink / raw) To: Ian Rogers Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, gmx, namhyung On 08/05/2026 2:29 pm, James Clark wrote: > > > On 08/05/2026 9:27 am, Ian Rogers wrote: >> Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` >> feature. The test covers: >> - Basic address remapping for user space samples. >> - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. >> - Callchain address remapping. >> - Consistency of `perf report` output before and after injection. >> - Pipe mode report consistency. >> - Dropping of samples that leak ASLR info (physical addresses). >> - Kernel address remapping (utilizing a dedicated kernel-intensive VFS >> dd workload >> to guarantee continuous timer interrupts sampling flow inside >> kernel privilege states). >> - Kernel report consistency with address normalization. >> >> The test suite is hardened with global 'set -o pipefail' assertions to >> catch >> pipeline failures, stream-consuming awk processors to handle SIGPIPE >> signals, >> and a dedicated pipe output scenario validating raw 'perf inject -o -' >> stdout >> streams. >> >> Assisted-by: Gemini-CLI:Google Gemini 3 >> Signed-off-by: Ian Rogers <irogers@google.com> >> --- >> v6: Refactor kernel-space sampling test cases to utilize a dedicated >> system-call intensive VFS dd workload (kprog) instead of purely >> userspace-bound tight loops, guaranteeing high-density kernel >> privilege state sampling streams and eliminating intermittent >> execution flakiness dropouts. >> > > > Hi Ian, > > V5 passed on X86, but now I get this test failing about 50% of the time > with output like: > > > Test user register stripping > User registers stripping test [Failed - report parsing differs] > Showing first 20 lines of diff: > --- /tmp/perf-test-aslr.ssH9urcfri/report_regs1.clean 2026-05-08 > 14:14:02.127298207 +0100 > +++ /tmp/perf-test-aslr.ssH9urcfri/report_regs2.clean 2026-05-08 > 14:14:02.129298219 +0100 > @@ -30,8 +30,8 @@ > 0.02% perf ld-linux-x86-64.so.2 [.] mmap64 > 0.02% perf-noploop [kernel.kallsyms] [k] kmem_cache_free > 0.02% perf-noploop [kernel.kallsyms] [k] nohz_balancer_kick > - 0.02% perf-noploop [kernel.kallsyms] [k] pvclock_gtod_notify > 0.02% perf-noploop [kernel.kallsyms] [k] try_to_wake_up > + 0.02% perf-noploop [kvm] [k] pvclock_gtod_notify > 0.02% perf-noploop libc.so.6 [.] __cxa_finalize > 0.04% perf ld-linux-x86-64.so.2 [.] strcmp > 0.05% perf libLLVM-15.so.1 [.] > llvm::StringMapImpl::LookupBucketFor(llvm::StringRef) > ---- end ---- > > or: > > Test user register stripping > User registers stripping test [Failed - report parsing differs] > Showing first 20 lines of diff: > --- /tmp/perf-test-aslr.NoDUUXtHyh/report_regs1.clean 2026-05-08 > 14:05:31.109246491 +0100 > +++ /tmp/perf-test-aslr.NoDUUXtHyh/report_regs2.clean 2026-05-08 > 14:05:31.111246503 +0100 > @@ -2,8 +2,8 @@ > 0.01% perf [kernel.kallsyms] [k] > find_mergeable_anon_vma > 0.01% perf [kernel.kallsyms] [k] finish_fault > 0.01% perf [kernel.kallsyms] [k] > pte_offset_map_rw_nolock > + 0.02% perf [amdgpu] [k] amdgpu_device_rreg > 0.02% perf [kernel.kallsyms] [k] > __alloc_frozen_pages_noprof > - 0.02% perf [kernel.kallsyms] [k] amdgpu_device_rreg > 0.02% perf [kernel.kallsyms] [k] > __build_id_parse.isra.0 > 0.02% perf [kernel.kallsyms] [k] filemap_get_entry > 0.02% perf [kernel.kallsyms] [k] filemap_map_pages > ---- end ---- > > > And on Arm I get a hang/infinite loop every time in "Test kernel ASLR > remapping". Looks like it could be related to the changes in V6 as I > didn't see it on V5: After around an hour it ended up passing successfully, so not an infinite loop, just very slow. Then after that, "User registers stripping test" failed the same way as on x86. > > #0 __read_once_size (size=4, res=0xffffe56c64a0, p=0xaaaaeaedbab8) > at linux/tools/include/linux/compiler.h:180 > #1 atomic_read (v=0xaaaaeaedbab8) at linux/tools/include/asm- > generic/atomic-gcc.h:26 > #2 0x0000aaaaaf65cd6c in refcount_read (r=0xaaaaeaedbab8) > at linux/tools/include/linux/refcount.h:70 > #3 0x0000aaaaaf65d9dc in check_invariants (maps=0xaaaae7e3b480) at > util/maps.c:114 > #4 0x0000aaaaaf65eef8 in maps__insert (maps=0xaaaae7e3b480, > map=0xaaaaec2ccf10) at util/maps.c:536 > #5 0x0000aaaaaf62a028 in maps__split_kallsyms (kmaps=0xaaaae7e3b480, > dso=0xaaaae7e3f910, delta=1879048192, > initial_map=0xaaaae7e3fab0) at util/symbol.c:986 > #6 0x0000aaaaaf62b550 in __dso__load_kallsyms (dso=0xaaaae7e3f910, > filename=0xaaaae7e55200 "/proc/kallsyms", > map=0xaaaae7e3fab0, no_kcore=false) at util/symbol.c:1530 > #7 0x0000aaaaaf62b5bc in dso__load_kallsyms (dso=0xaaaae7e3f910, > filename=0xaaaae7e55200 "/proc/kallsyms", > map=0xaaaae7e3fab0) at util/symbol.c:1536 > #8 0x0000aaaaaf62cbc0 in dso__load_kernel_sym (dso=0xaaaae7e3f910, > map=0xaaaae7e3fab0) at util/symbol.c:2125 > #9 0x0000aaaaaf62bc5c in dso__load (dso=0xaaaae7e3f910, > map=0xaaaae7e3fab0) at util/symbol.c:1721 > #10 0x0000aaaaaf65b98c in map__load (map=0xaaaae7e3fab0) at util/ > map.c:351 > #11 0x0000aaaaaf5e43cc in thread__find_map (thread=0xaaaae7e443b0, > cpumode=1 '\001', addr=18446603336494207932, > al=0xffffe56c8c28) at util/event.c:744 > #12 0x0000aaaaaf5e4810 in machine__resolve (machine=0xaaaae7e3bee0, > al=0xffffe56c8c28, sample=0xffffe56c8df0) > at util/event.c:818 > #13 0x0000aaaaaf41d850 in process_sample_event (tool=0xffffe56c93d0, > event=0xffffb1091ec8, sample=0xffffe56c8df0, > evsel=0xaaaae7e3b580, machine=0xaaaae7e3bee0) at builtin- > script.c:2686 > #14 0x0000aaaaaf6668f4 in evlist__deliver_sample > (evlist=0xaaaae7e3c550, tool=0xffffe56c93d0, event=0xffffb1091ec8, > sample=0xffffe56c8df0, evsel=0xaaaae7e3b580, > machine=0xaaaae7e3bee0) at util/session.c:1335 > #15 0x0000aaaaaf667000 in machines__deliver_event > (machines=0xaaaae7e3bee0, evlist=0xaaaae7e3c550, event=0xffffb1091ec8, > sample=0xffffe56c8df0, tool=0xffffe56c93d0, file_offset=3784, > file_path=0xaaaae7e3b540 "/tmp/perf-test-aslr.J1XB8pvpFy/ > perf.data2.kernel.FA0Uvd") at util/session.c:1502 > #16 0x0000aaaaaf667538 in perf_session__deliver_event > (session=0xaaaae7e3bca0, event=0xffffb1091ec8, > tool=0xffffe56c93d0, file_offset=3784, > file_path=0xaaaae7e3b540 "/tmp/perf-test-aslr.J1XB8pvpFy/ > perf.data2.kernel.FA0Uvd") at util/session.c:1593 > #17 0x0000aaaaaf662bbc in ordered_events__deliver_event > (oe=0xaaaae7e3c460, event=0xaaaae7e44740) at util/session.c:134 > #18 0x0000aaaaaf672c98 in do_flush (oe=0xaaaae7e3c460, > show_progress=true) at util/ordered-events.c:245 > #19 0x0000aaaaaf673048 in __ordered_events__flush (oe=0xaaaae7e3c460, > how=OE_FLUSH__FINAL, timestamp=0) > at util/ordered-events.c:324 > #20 0x0000aaaaaf673154 in ordered_events__flush (oe=0xaaaae7e3c460, > how=OE_FLUSH__FINAL) at util/ordered-events.c:342 > #21 0x0000aaaaaf669e54 in __perf_session__process_events > (session=0xaaaae7e3bca0) at util/session.c:2508 > #22 0x0000aaaaaf66a790 in perf_session__process_events > (session=0xaaaae7e3bca0) at util/session.c:2675 > #23 0x0000aaaaaf41f59c in __cmd_script (script=0xffffe56c93d0) at > builtin-script.c:3241 > #24 0x0000aaaaaf4242b0 in cmd_script (argc=0, argv=0xffffe56cb370) at > builtin-script.c:4586 > #25 0x0000aaaaaf4a03f8 in run_builtin (p=0xaaaaafa14e60 > <commands+480>, argc=3, argv=0xffffe56cb370) at perf.c:348 > #26 0x0000aaaaaf4a066c in handle_internal_command (argc=3, > argv=0xffffe56cb370) at perf.c:398 > #27 0x0000aaaaaf4a0824 in run_argv (argcp=0xffffe56cb1ac, > argv=0xffffe56cb1a0) at perf.c:442 > #28 0x0000aaaaaf4a0b4c in main (argc=3, argv=0xffffe56cb370) at > perf.c:549 > ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v6 5/6] perf test: Add inject ASLR test 2026-05-08 8:27 ` [PATCH v6 5/6] perf test: Add inject ASLR test Ian Rogers 2026-05-08 13:29 ` James Clark @ 2026-05-11 7:34 ` Namhyung Kim 1 sibling, 0 replies; 183+ messages in thread From: Namhyung Kim @ 2026-05-11 7:34 UTC (permalink / raw) To: Ian Rogers Cc: acme, gmx, james.clark, adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Fri, May 08, 2026 at 01:27:25AM -0700, Ian Rogers wrote: > Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` > feature. The test covers: > - Basic address remapping for user space samples. > - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. > - Callchain address remapping. > - Consistency of `perf report` output before and after injection. > - Pipe mode report consistency. > - Dropping of samples that leak ASLR info (physical addresses). > - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload > to guarantee continuous timer interrupts sampling flow inside kernel privilege states). > - Kernel report consistency with address normalization. > > The test suite is hardened with global 'set -o pipefail' assertions to catch > pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, > and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout > streams. > > Assisted-by: Gemini-CLI:Google Gemini 3 > Signed-off-by: Ian Rogers <irogers@google.com> > --- > v6: Refactor kernel-space sampling test cases to utilize a dedicated > system-call intensive VFS dd workload (kprog) instead of purely > userspace-bound tight loops, guaranteeing high-density kernel > privilege state sampling streams and eliminating intermittent > execution flakiness dropouts. > > v5: Harden test suite verification pipelines by upgrading report > checks to strict sorted line-by-line diff comparisons to > accommodate remapped pointer shifts. Append || true fallback > operators to grep-v filtering pipelines to prevent the shell test > from spuriously aborting under set -o pipefail on empty inputs, > ensuring graceful failure checks trigger correctly. > > v4: Reorder set -e/pipefail to prevent temp file leakage in root > directory on unprivileged record failures when run as root. Ensure > grep report filters have || true suffixes to avoid aborts under > pipefail. Add comprehensive pipe stdout injection attributes > validation case. > > v3: Harden script with pipefail, SIGPIPE awk pipeline fixes, callchain > empty data asserts, baseline sample verification, and grep report > abort protections. Reorder set -e/pipefail to prevent stack leaks > in mktemp failures. > > v2: Add sum comparison for kernel overhead and 32-bit math corrections. Add > awk with gsub for trailing dots and brackets normalizations. Trap EXIT, > prevent race conditions and avoid hardcoded perf binary. > --- > tools/perf/tests/shell/inject_aslr.sh | 460 ++++++++++++++++++++++++++ > 1 file changed, 460 insertions(+) > create mode 100755 tools/perf/tests/shell/inject_aslr.sh > > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 000000000000..6363a0f69d2b > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh > @@ -0,0 +1,460 @@ > +#!/bin/bash > +# perf inject --aslr test > +# SPDX-License-Identifier: GPL-2.0 > + > +set -e > +set -o pipefail > + > +shelldir=$(dirname "$0") > +# shellcheck source=lib/perf_has_symbol.sh > +. "${shelldir}"/lib/perf_has_symbol.sh > + > +sym="noploop" > + > +skip_test_missing_symbol ${sym} > + > +# Create global temp directory > +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) > +data="${temp_dir}/perf.data" > +data2="${temp_dir}/perf.data2" > + > +prog="perf test -w noploop" > +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" > +err=0 > +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" > + > +cleanup() { > + # Check if temp_dir is set and looks sane before removing > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > + rm -rf "${temp_dir}" > + fi > +} > + > +trap_cleanup() { > + cleanup > + exit 1 > +} > + > +trap cleanup EXIT > +trap trap_cleanup TERM INT > + > +get_noploop_addr() { > + local file=$1 > + perf script -i "$file" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<=NF; i++) { > + if ($i ~ /noploop\+/) { > + if (!found) { > + print $(i-1) > + found=1 > + } > + } > + } > + }' > +} > + > +test_basic_aslr() { > + echo "Test basic ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") Why not use the globally defined data and data2 here and below? Thanks, Namhyung > + > + perf record -e task-clock:u -o "${data}" ${prog} > + perf inject -v --aslr -i "${data}" -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Basic ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Basic ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Basic ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Basic ASLR test [Success]" > + fi > +} > + > +test_pipe_aslr() { > + echo "Test pipe mode ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") > + > + # Use tee to save the original pipe data for comparison > + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Pipe ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Pipe ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Pipe ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Pipe ASLR test [Success]" > + fi > +} > + > +test_callchain_aslr() { > + echo "Test Callchain ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") > + > + perf record -g -e task-clock:u -o "${data}" ${prog} > + perf inject --aslr -i "${data}" -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Callchain ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Callchain ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Callchain ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + # Extract callchain addresses (indented lines starting with hex addresses) > + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > + > + if [ -z "$orig_callchain" ]; then > + echo "Callchain ASLR test [Failed - no callchain samples in original file]" > + err=1 > + elif [ -z "$new_callchain" ]; then > + echo "Callchain ASLR test [Failed - callchain data was dropped]" > + err=1 > + elif [ "$orig_callchain" = "$new_callchain" ]; then > + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" > + err=1 > + else > + echo "Callchain ASLR test [Success]" > + fi > + fi > +} > + > +test_report_aslr() { > + echo "Test perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + # Use -b to inject build-ids and force ordered events processing in both > + perf inject -b -i "${data}" -o "${data_clean}" > + perf inject -v -b --aslr -i "${data}" -o "${data2}" > + > + local report1="${temp_dir}/report1" > + local report2="${temp_dir}/report2" > + local report1_clean="${temp_dir}/report1.clean" > + local report2_clean="${temp_dir}/report2.clean" > + local diff_file="${temp_dir}/diff" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Report ASLR test [Success]" > + fi > +} > + > +test_pipe_report_aslr() { > + echo "Test pipe mode perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + # Use tee to save the original pipe data, then process it with inject -b > + perf record -e task-clock:u -o - ${prog} | \ > + tee "${data}" | \ > + perf inject -b --aslr -o "${data2}" > + perf inject -b -i "${data}" -o "${data_clean}" > + > + local report1="${temp_dir}/report1" > + local report2="${temp_dir}/report2" > + local report1_clean="${temp_dir}/report1.clean" > + local report2_clean="${temp_dir}/report2.clean" > + local diff_file="${temp_dir}/diff" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Pipe Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Pipe Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Pipe Report ASLR test [Success]" > + fi > +} > + > +test_pipe_out_report_aslr() { > + echo "Test pipe output mode perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + perf inject -b -i "${data}" -o "${data_clean}" > + > + local report1="${temp_dir}/report1" > + local report2="${temp_dir}/report2" > + local report1_clean="${temp_dir}/report1.clean" > + local report2_clean="${temp_dir}/report2.clean" > + local diff_file="${temp_dir}/diff" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Pipe Output Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Pipe Output Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Pipe Output Report ASLR test [Success]" > + fi > +} > + > +test_dropped_samples() { > + echo "Test dropped samples (phys-data)" > + local data > + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") > + > + # Check if --phys-data is supported by recording a short run > + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then > + echo "Skipping dropped samples test as --phys-data is not supported" > + return > + fi > + > + perf record -e task-clock:u --phys-data -o "${data}" ${prog} > + perf inject --aslr -i "${data}" -o "${data2}" > + > + # Verify that the original file actually contained samples! > + orig_samples=$(perf script -i "${data}" | wc -l) > + if [ "$orig_samples" -eq 0 ]; then > + echo "Dropped samples test [Failed - no samples in original file]" > + err=1 > + else > + # Verify that samples are dropped. > + samples_count=$(perf script -i "${data2}" | wc -l) > + > + if [ "$samples_count" -gt 0 ]; then > + echo "Dropped samples test [Failed - samples were not dropped]" > + err=1 > + else > + echo "Dropped samples test [Success]" > + fi > + fi > +} > + > +test_kernel_aslr() { > + echo "Test kernel ASLR remapping" > + local kdata > + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") > + local kdata2 > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") > + local log_file > + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") > + > + # Try to record kernel samples > + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then > + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" > + return > + fi > + > + # Check for warning about kernel map restriction > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" > + return > + fi > + > + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" > + > + # Check if kernel addresses are remapped. > + # Find the field that ends with :k: (the event name) and take the next field! > + orig_addr=$(perf script -i "${kdata}" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<NF; i++) { > + if ($i ~ /:[k]+:?$/) { > + if (!found) { > + print $(i+1) > + found=1 > + } > + } > + } > + }') > + new_addr=$(perf script -i "${kdata2}" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<NF; i++) { > + if ($i ~ /:[k]+:?$/) { > + if (!found) { > + print $(i+1) > + found=1 > + } > + } > + } > + }') > + > + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Kernel ASLR test [Failed - no kernel samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Kernel ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Kernel ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Kernel ASLR test [Success]" > + fi > +} > + > +test_kernel_report_aslr() { > + echo "Test kernel perf report consistency" > + local kdata > + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") > + local kdata2 > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + local log_file > + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") > + > + # Try to record kernel samples > + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then > + echo "Skipping kernel report test as recording failed (maybe no permissions)" > + return > + fi > + > + # Check for warning about kernel map restriction > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" > + return > + fi > + > + # Use -b to inject build-ids and force ordered events processing in both > + perf inject -b -i "${kdata}" -o "${data_clean}" > + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" > + > + local report1="${temp_dir}/report_kernel1" > + local report2="${temp_dir}/report_kernel2" > + local report1_clean="${temp_dir}/report_kernel1.clean" > + local report2_clean="${temp_dir}/report_kernel2.clean" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${kdata2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true > + > + # Normalize kernel DSOs and addresses in clean reports > + # This allows kernel modules to be either a module or kernel.kallsyms > + local report1_norm="${temp_dir}/report_kernel1.norm" > + local report2_norm="${temp_dir}/report_kernel2.norm" > + local diff_file="${temp_dir}/diff_kernel" > + > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ > + sort > "${report1_norm}" || true > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ > + sort > "${report2_norm}" || true > + > + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true > + > + if [ ! -s "${report1_norm}" ]; then > + echo "Kernel Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Kernel Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Kernel Report ASLR test [Success]" > + fi > +} > + > +test_basic_aslr > +test_pipe_aslr > +test_callchain_aslr > +test_report_aslr > +test_pipe_report_aslr > +test_pipe_out_report_aslr > +test_dropped_samples > +test_kernel_aslr > +test_kernel_report_aslr > + > +cleanup > +exit $err > -- > 2.54.0.563.g4f69b47b94-goog > ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v6 6/6] perf aslr: Strip sample registers 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (4 preceding siblings ...) 2026-05-08 8:27 ` [PATCH v6 5/6] perf test: Add inject ASLR test Ian Rogers @ 2026-05-08 8:27 ` Ian Rogers 2026-05-08 21:49 ` sashiko-bot 2026-05-19 8:08 ` [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 6 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-08 8:27 UTC (permalink / raw) To: irogers, acme, gmx, james.clark, namhyung Cc: adrian.hunter, jolsa, linux-kernel, linux-perf-users, mingo, peterz When the ASLR tracking tool encounters sample events containing user or interrupt register dumps (PERF_SAMPLE_REGS_USER / PERF_SAMPLE_REGS_INTR), it previously dropped the entire sample event conservatively to prevent absolute virtual memory pointers leakage embedded inside raw register frames. If a trace session was recorded with register collection flags enabled, this resulted in 100% sample drop rates, and this happened by default for ARM64. Refactor the ASLR tool to strip out obly the register dump payload words from PERF_RECORD_SAMPLE event streams, automatically shrinking the output sample header size. Incoming PERF_RECORD_ATTR events are scrubbed up front to clear the register dump bit selection flags and masks, and output sample ABI words are safely overwritten to PERF_SAMPLE_REGS_ABI_NONE. This keeps downstream evsel parsers perfectly synchronized while retaining full, comprehensive sample profiles completely clear of secret register data frames. Verification parity is established inside inject_aslr.sh via a dedicated sorted report diff comparison validation case proving zero starvation and absolute secrecy. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 11 ++++++ tools/perf/tests/shell/inject_aslr.sh | 51 +++++++++++++++++++++++++++ tools/perf/util/aslr.c | 27 +++++++------- 3 files changed, 75 insertions(+), 14 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 51dcf248b653..7a17ce019657 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2463,6 +2463,17 @@ static int __cmd_inject(struct perf_inject *inject) } } + if (inject->aslr) { + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + } + } + session->header.data_offset = output_data_offset; diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index 6363a0f69d2b..323782c3802d 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -446,6 +446,56 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -455,6 +505,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 09b7f2f8fb85..e5369589a733 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -751,18 +751,13 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_ev if (abi != PERF_SAMPLE_REGS_ABI_NONE) { u64 nr = hweight64(evsel->core.attr.sample_regs_user); - if (nr > max_i - i || nr > max_j - j) { + if (nr > max_i - i) { ret = -EFAULT; goto out_put; } - memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); i += nr; - j += nr; + out_array[j-1] = PERF_SAMPLE_REGS_ABI_NONE; } - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; } if (sample_type & PERF_SAMPLE_STACK_USER) { u64 size; @@ -806,18 +801,13 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_ev if (abi != PERF_SAMPLE_REGS_ABI_NONE) { u64 nr = hweight64(evsel->core.attr.sample_regs_intr); - if (nr > max_i - i || nr > max_j - j) { + if (nr > max_i - i) { ret = -EFAULT; goto out_put; } - memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); i += nr; - j += nr; + out_array[j-1] = PERF_SAMPLE_REGS_ABI_NONE; } - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; } if (sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ @@ -907,6 +897,15 @@ static int aslr_tool__process_attr(const struct perf_tool *tool, if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_USER) { + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_USER; + new_event->attr.attr.sample_regs_user = 0; + } + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_INTR) { + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_INTR; + new_event->attr.attr.sample_regs_intr = 0; + } + return delegate->attr(delegate, new_event, pevlist); } -- 2.54.0.563.g4f69b47b94-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v6 6/6] perf aslr: Strip sample registers 2026-05-08 8:27 ` [PATCH v6 6/6] perf aslr: Strip sample registers Ian Rogers @ 2026-05-08 21:49 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-08 21:49 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Stripping register feature bits from event attributes while retaining the ABI word in the payload permanently misaligns downstream parsers. - [High] In pipe mode, dynamically clearing register bits from ATTR events causes `perf inject` to misinterpret incoming raw SAMPLE events, leading to out-of-bounds reads and silent sample drops. -- commit 2f6701e115da3f7f7833ab4467a9c51f40012620 Author: Ian Rogers <irogers@google.com> perf aslr: Strip sample registers This commit modifies the perf ASLR tool to strip register dump payload words from sample events instead of dropping the entire sample. It also strips the corresponding sample bit flags from the event attributes to keep downstream parsers synchronized. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 09b7f2f8fb850..e5369589a7338 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c > @@ -751,18 +751,13 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, union perf_ev > if (abi != PERF_SAMPLE_REGS_ABI_NONE) { > u64 nr = hweight64(evsel->core.attr.sample_regs_user); > > - if (nr > max_i - i || nr > max_j - j) { > + if (nr > max_i - i) { > ret = -EFAULT; > goto out_put; > } > - memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > i += nr; > - j += nr; > + out_array[j-1] = PERF_SAMPLE_REGS_ABI_NONE; Since the PERF_SAMPLE_REGS_USER bit is completely cleared from the event's sample_type bitmask, downstream parsers will not expect an ABI word and will not advance their read pointer. Does leaving the 8-byte ABI word in out_array permanently misalign downstream parsers? It seems they might misinterpret this leftover word as the data for the next field present in the bitmask sequence, shifting all subsequent sample fields. > } [ ... ] > @@ -907,6 +897,15 @@ static int aslr_tool__process_attr(const struct perf_tool *tool, > if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) > new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ > > + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_USER) { > + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_USER; > + new_event->attr.attr.sample_regs_user = 0; > + } > + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_INTR) { > + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_INTR; > + new_event->attr.attr.sample_regs_intr = 0; > + } > + In pipe mode, ATTR events arrive dynamically before SAMPLE events. If we intercept these ATTR events and strip the register bits from the session's internal evsel, will aslr_tool__process_sample() still be able to parse incoming raw SAMPLE events correctly? Because the bits would be missing from evsel->core.attr.sample_type, it looks like the parser will skip the register parsing logic and fail to advance its read pointer over the ABI word and register dump that are actually present in the raw input payload. This could cause it to read subsequent fields from the middle of the register dump. > return delegate->attr(delegate, new_event, pevlist); > } -- Sashiko AI review · https://sashiko.dev/#/patchset/20260508082726.2795191-1-irogers@google.com?part=6 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers ` (5 preceding siblings ...) 2026-05-08 8:27 ` [PATCH v6 6/6] perf aslr: Strip sample registers Ian Rogers @ 2026-05-19 8:08 ` Ian Rogers 2026-05-19 8:08 ` [PATCH v7 1/4] perf maps: Add maps__mutate_mapping Ian Rogers ` (4 more replies) 6 siblings, 5 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-19 8:08 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a critical bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Core Feature: 'perf inject --aslr' (Patches 2, 3, and 4) Transferring perf.data files across environments introduces a potential leak of virtual address footprints, weakening Address Space Layout Randomization (ASLR) on the originating machine. To mitigate this, we introduce the --aslr flag into perf inject. Unknown or unhandled events are dropped conservatively, while handled samples and branch loops undergo systematic virtual memory offset obfuscation. Events carrying virtual memory layouts are conservatively remap-processed or dropped, while zero-address-risk lifecycle metadata records (such as namespaces, cgroups, and BPF program info) are intentionally delegated to preserve comprehensive downstream trace tool analysis compatibility. The ASLR tracking tool virtualizes process and machine namespaces using 'struct machines' to safely isolate host mappings from unprivileged KVM guest address spaces. Memory space layouts are tracked globally per process context to ensure linear, continuous space allocations across successive mapping runs. The topological invariant coordinate dso + invariant (start - pgoff) is tracked to uniquely index binary section frameworks, providing complete collision safety against separate overlapping shared-invariant libraries while remaining perfectly immune to boundary shifts or split fragmentations. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced in Patch 3 with a comprehensive POSIX shell suite ('inject_aslr.sh'), hardened against SIGPIPE signal exits with stream consuming awk loops and robust 'set -o pipefail' assertions. The suite utilizes a highly dense, system-call intensive VFS byte block loop workload (dd count=500) to guarantee deterministic hardware timer interrupts sampling streams inside kernel privilege states. Prerequisite Bug Fix (Patch 1) During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis: 1. perf symbols: Patch 1 replaces old remove-reinsert map boundary update cycles with a high-performance, thread-safe transactional framework maps__mutate_mapping() that enforces write semaphore lock closures around all in-place virtual address mutations and sorting invalidations, completely closing concurrent lookup race condition windows. It explicitly executes DWARF address space cache invalidation (libdw__invalidate_dwfl()) to keep debugger unwinding frames perfectly synchronized. Changes since v6: - Concurrency & OOM Safety (Patch 2): Avoid deadlock by early kernel map loading. Cache original evsel configuration before modifying it. - Test Spacing & Pipeline wrapping (Patch 3): Wrap long shell pipelines inside inject_aslr.sh and remove unused global data variables. --- Verification Status: The entire suite compiles 100% successfully and evaluates to 100% pure, flawless green operational success across all validation check scenarios: Test basic ASLR remapping: [Success] Test pipe mode ASLR remapping: [Success] Test Callchain ASLR remapping: [Success] Test perf report consistency: [Success] Test pipe mode perf report consistency: [Success] Test pipe output mode perf report consistency: [Success] Test user register stripping: [Success] Ian Rogers (4): perf maps: Add maps__mutate_mapping perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 115 ++- tools/perf/tests/shell/inject_aslr.sh | 513 +++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1155 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 38 + tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 55 ++ tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 10 files changed, 1938 insertions(+), 32 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v7 1/4] perf maps: Add maps__mutate_mapping 2026-05-19 8:08 ` [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-05-19 8:08 ` Ian Rogers 2026-05-19 8:38 ` sashiko-bot 2026-05-19 8:08 ` [PATCH v7 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers ` (3 subsequent siblings) 4 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-19 8:08 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookups failure. Fix this by introducing a thread-safe, atomic transactional framework routine maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under full lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees absolute atomic synchronization invariants, completely closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++++++++++-------- tools/perf/util/maps.c | 55 ++++++++++++++++++++++++++++++++++++ tools/perf/util/maps.h | 3 ++ tools/perf/util/symbol-elf.c | 41 +++++++++++++++++---------- tools/perf/util/symbol.c | 17 ++++++++--- 5 files changed, 117 insertions(+), 31 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index e76f8c86e62a..ea918f75e3ad 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1522,22 +1522,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..f9d5dc7f673f 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,32 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +652,35 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) + err = -1; + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 7afa8a117139..dc4ab58857b3 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1341,6 +1341,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1371,22 +1389,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index fcaeeddbbb6b..09b93e844887 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); static bool symbol__is_idle(const char *name); @@ -2121,10 +2128,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2164,10 +2172,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v7 1/4] perf maps: Add maps__mutate_mapping 2026-05-19 8:08 ` [PATCH v7 1/4] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-05-19 8:38 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-19 8:38 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Recursive read-to-write self-deadlock on maps rw_semaphore during lazy symbol loading - [Medium] Spurious -ENOMEM failure in maps__load_maps() for empty map collections -- commit d7722461d022cc4c35c94806250c76c0a6c00722 Author: Ian Rogers <irogers@google.com> perf maps: Add maps__mutate_mapping This commit introduces maps__mutate_mapping() to provide thread-safe modifications to map boundaries using a write lock. It also adds maps__load_maps() to allow pre-loading of kernel maps without triggering runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") > diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c > --- a/tools/perf/util/maps.c > +++ b/tools/perf/util/maps.c [ ... ] > @@ -576,6 +576,32 @@ void maps__remove(struct maps *maps, struct map *map) > #endif > } > > +int maps__mutate_mapping(struct maps *maps, struct map *map, > + int (*mutate_cb)(struct map *map, void *data), void *data) > +{ > + int err = 0; > + > + if (maps) > + down_write(maps__lock(maps)); > + > + err = mutate_cb(map, data); Does this unconditional down_write() create a recursive read-to-write self-deadlock during lazy symbol loading? Looking at the existing codebase, map__load() is often called while holding the read lock. For example, in maps__for_each_map() or maps__load_first(), a read lock is taken before iterating: maps__for_each_map() { ... down_read(maps__lock(maps)); ... cb(map, data); ... } The callback can trigger map__load() which calls dso__load_kernel_sym(). Since dso__load_kernel_sym() and dso__process_kernel_symbol() are updated in this patch to call maps__mutate_mapping(), which acquires the write lock on the same rw_semaphore, won't this cause a guaranteed deadlock for existing paths that haven't been updated to use the new maps__load_maps() pre-loading API? [ ... ] > @@ -626,6 +652,35 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) > return ret; > } > > +int maps__load_maps(struct maps *maps) > +{ > + struct map **maps_copy; > + unsigned int nr_maps; > + int err = 0; > + > + if (!maps) > + return 0; > + > + down_read(maps__lock(maps)); > + nr_maps = maps__nr_maps(maps); > + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); > + if (!maps_copy) { > + up_read(maps__lock(maps)); > + return -ENOMEM; > + } Can this return a spurious -ENOMEM if the maps list is empty? If nr_maps is 0, calloc(0, size) is implementation-defined and permitted to return NULL. If it does, this error path will be taken instead of safely returning 0 for an empty list. Should this code explicitly check for nr_maps == 0 before attempting the allocation? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260519080824.3329601-1-irogers@google.com?part=1 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v7 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-19 8:08 ` [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-05-19 8:08 ` [PATCH v7 1/4] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-05-19 8:08 ` Ian Rogers 2026-05-19 9:14 ` sashiko-bot 2026-05-19 8:08 ` [PATCH v7 3/4] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 4 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-19 8:08 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. Events carrying virtual memory layouts are conservatively remap-processed or dropped, while zero-address-risk lifecycle metadata records (such as namespaces, cgroups, and BPF program info) are intentionally delegated to preserve comprehensive downstream trace tool analysis compatibility. The ASLR tracking tool virtualizes process and machine namespaces using 'struct machines' to safely isolate host mappings from unprivileged KVM guest address spaces. Memory space layouts are tracked globally per process context to ensure linear, continuous space allocations across successive mapping runs. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). To permanently eliminate lazy-loading deadlock risks during runtime event processing, the inject tool force-loads all host and guest kernel and module maps up front at session startup under a clean single-threaded context using the new maps__load_maps() API. Guest kernel namespace isolation is secured by tracking guest kernels under kernel_pid (-1) and allocating guest kernel mappings inside kernel_space_start ranges. Unsupported or unrecognized UAPI sample flags are cleanly stripped from attributes up front using ASLR_SUPPORTED_SAMPLE_TYPE bitmask to prevent downstream parser misalignment crashes. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 93 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1106 +++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 1236 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a2493f1097df..8fe924e730a1 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -124,6 +125,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -2267,6 +2269,56 @@ static int output_fd(struct perf_inject *inject) return inject->in_place_update ? -1 : perf_data__fd(&inject->output); } +/* + * To prevent recursive read-to-write lock upgrades self-deadlocks and concurrent + * reader data corruptions, we must completely avoid mutating map boundaries + * during runtime event processing. Since maps__mutate_mapping() requires the + * write lock to safely protect concurrent searches in other threads, we force-load + * all host and guest kernel and module maps up-front at session startup under + * a clean single-threaded context, permanently bypassing lazy dynamic loading. + */ +static int machine__load_kernel_maps(struct machine *machine) +{ + struct maps *kmaps = machine__kernel_maps(machine); + int err; + + if (!kmaps) + return 0; + + err = maps__load_maps(kmaps); + if (!err) { + pr_debug("ASLR: Loaded %u kernel/module maps up front for machine pid %d:\n", + maps__nr_maps(kmaps), machine->pid); + if (verbose > 0) + maps__fprintf(kmaps, stderr); + } + return err; +} + +/* + * Scans and force-loads all registered host and guest machine kernel and + * module maps up front before event processing starts. + */ +static int perf_inject__load_kernel_maps(struct perf_inject *inject) +{ + struct machine *machine; + struct rb_node *nd; + int err = 0; + + /* Load host kernel maps up front */ + if (machine__load_kernel_maps(&inject->session->machines.host) < 0) + err = -1; + + /* Load all guest machines kernel maps up front */ + for (nd = rb_first_cached(&inject->session->machines.guests); nd; nd = rb_next(nd)) { + machine = rb_entry(nd, struct machine, rb_node); + if (machine__load_kernel_maps(machine) < 0) + err = -1; + } + + return err; +} + static int __cmd_inject(struct perf_inject *inject) { int ret = -EINVAL; @@ -2399,6 +2451,13 @@ static int __cmd_inject(struct perf_inject *inject) if (!inject->output.is_pipe && !inject->in_place_update) lseek(fd, output_data_offset, SEEK_SET); + if (inject->aslr) { + if (perf_inject__load_kernel_maps(inject) < 0) { + pr_err("Failed to load host or guest kernel maps up front\n"); + return -EINVAL; + } + } + ret = perf_session__process_events(session); if (ret) return ret; @@ -2460,6 +2519,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2569,6 +2630,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2576,6 +2639,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2596,6 +2660,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2689,18 +2758,38 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2794,6 +2883,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2bb60f50f62d..98da4e263c6d 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..d0b1b33377fd --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,1106 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->machine ^ (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(aslr_thread->maps); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + u64 *pmax = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(aslr_thread->maps); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : aslr_thread->pid_; + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + remap_key.dso = map__dso(al.map); + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start - pgoff; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + remap_addr = *remapped_invariant_ptr + (al.map ? map__pgoff(al.map) : pgoff); + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { + if (calculated_max > *pmax) + *pmax = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { + /* Current max allocated mmap address within the process. */ + remap_addr = *pmax; + + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + if (remap_addr + len > *pmax) + *pmax = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_space_start : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + pmax = malloc(sizeof(u64)); + if (!tk || !pmax) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + *pmax = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, pmax, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(pmax); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_user); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + + COPY_U64(); /* dyn_size */ + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; +} + +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + +static int aslr_tool__process_attr(const struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + memcpy(&new_event->attr, &event->attr, event->attr.header.size); + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + + new_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + return delegate->attr(delegate, new_event, pevlist); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + aslr->tool.tool.attr = aslr_tool__process_attr; + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ + /* auxtrace - no virtual addresses. */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v7 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-19 8:08 ` [PATCH v7 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-19 9:14 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-19 9:14 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: - [High] Modifying `evsel->core.attr.sample_type` upfront corrupts the input event parser, causing misaligned reads of raw payload data. - [High] Ineffective deadlock mitigation due to pre-loading kernel maps into the wrong `machines` struct and premature guest evaluation. - [High] `aslr_tool__process_ksymbol` truncates ksymbol addresses to the base address of their containing map. -- commit 9d45b9cad9df39407d7e9e5d06994ca1d17cd797 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses This commit introduces an ASLR tool for perf inject that remaps virtual addresses to prevent leaking address space layouts when sharing perf.data files. It also attempts to eliminate lazy-loading deadlocks by preloading kernel and module maps before processing events. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index a2493f1097df9..8fe924e730a16 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c [ ... ] > @@ -2267,6 +2269,56 @@ static int output_fd(struct perf_inject *inject) [ ... ] > +/* > + * Scans and force-loads all registered host and guest machine kernel and > + * module maps up front before event processing starts. > + */ > +static int perf_inject__load_kernel_maps(struct perf_inject *inject) > +{ > + struct machine *machine; > + struct rb_node *nd; > + int err = 0; > + > + /* Load host kernel maps up front */ > + if (machine__load_kernel_maps(&inject->session->machines.host) < 0) > + err = -1; > + > + /* Load all guest machines kernel maps up front */ > + for (nd = rb_first_cached(&inject->session->machines.guests); nd; nd = rb_next(nd)) { > + machine = rb_entry(nd, struct machine, rb_node); > + if (machine__load_kernel_maps(machine) < 0) > + err = -1; > + } > + > + return err; > +} Does this preload the kernel maps into the correct machines collection? It looks like this loads into inject->session->machines, but the ASLR tool uses its own isolated aslr->machines collection which starts empty. Because the preloaded maps are not available to the ASLR tool, will kernel maps still be lazy-loaded dynamically during runtime event processing? Also, since this is called before perf_session__process_events(), are there any guest machines discovered from the event stream yet to iterate over? [ ... ] > @@ -2689,18 +2758,38 @@ int cmd_inject(int argc, const char **argv) [ ... ] > + if (inject.aslr) { > + struct evsel *evsel; > + > + evlist__for_each_entry(inject.session->evlist, evsel) { > + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + > + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) > + evsel->core.attr.bp_addr = 0; > + } > + } Does modifying the sample_type upfront cause the input event parser to misread the physical perf.data file? The input file was recorded with the original sample_type, so its payloads still contain the stripped fields. In aslr_tool__process_sample(), when it checks the masked sample_type: sample_type = evsel->core.attr.sample_type; ... if (sample_type & PERF_SAMPLE_REGS_USER) { ... } Won't it fail to advance the input array index over the register payload, causing subsequent checks to mistakenly read the leftover register payload data as stack sizes or lengths? > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..d0b1b33377fd2 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + /* Remaps the ksymbol.start */ > + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, > + PERF_RECORD_MISC_KERNEL, > + event->ksymbol.addr, > + event->ksymbol.len, > + /*pgoff=*/0); > + > + err = delegate->ksymbol(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} Could this truncate ksymbol addresses? The aslr_tool__findnew_mapping() function appears to return the remapped base address of the entire map when an address falls into an existing map, but it does not seem to add the symbol's internal offset. Will this cause ksymbols inside an existing kernel map to be clustered at the exact start address of the mapping in the output trace? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260519080824.3329601-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v7 3/4] perf test: Add inject ASLR test 2026-05-19 8:08 ` [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-05-19 8:08 ` [PATCH v7 1/4] perf maps: Add maps__mutate_mapping Ian Rogers 2026-05-19 8:08 ` [PATCH v7 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-19 8:08 ` Ian Rogers 2026-05-19 8:08 ` [PATCH v7 4/4] perf aslr: Strip sample registers Ian Rogers 2026-05-20 6:30 ` [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 4 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-19 8:08 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. - Callchain address remapping. - Consistency of `perf report` output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 458 ++++++++++++++++++++++++++ 1 file changed, 458 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..098bf1db1245 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,458 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + cleanup + exit 1 +} + +trap cleanup EXIT +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v7 4/4] perf aslr: Strip sample registers 2026-05-19 8:08 ` [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-05-19 8:08 ` [PATCH v7 3/4] perf test: Add inject ASLR test Ian Rogers @ 2026-05-19 8:08 ` Ian Rogers 2026-05-19 9:55 ` sashiko-bot 2026-05-20 6:30 ` [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 4 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-19 8:08 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz When the ASLR tracking tool encounters sample events containing user or interrupt register dumps (PERF_SAMPLE_REGS_USER / PERF_SAMPLE_REGS_INTR), it previously dropped the entire sample event conservatively to prevent absolute virtual memory pointers leakage embedded inside raw register frames. If a trace session was recorded with register collection flags enabled, this resulted in 100% sample drop rates, and this happened by default for ARM64. Refactor the ASLR tool to strip out only the register dump payload words from PERF_RECORD_SAMPLE event streams, automatically shrinking the output sample header size. Incoming PERF_RECORD_ATTR events are scrubbed up front to clear the register dump bit selection flags and masks, and output sample ABI words are safely overwritten to PERF_SAMPLE_REGS_ABI_NONE. This keeps downstream evsel parsers perfectly synchronized while retaining full, comprehensive sample profiles completely clear of secret register data frames. Verification parity is established inside inject_aslr.sh via a dedicated sorted report diff comparison validation case proving zero starvation and absolute secrecy. Assisted-by: Gemini-CLI:Google Gemini 3 Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 22 +++ tools/perf/tests/shell/inject_aslr.sh | 55 ++++++++ tools/perf/util/aslr.c | 185 ++++++++++++++++---------- tools/perf/util/aslr.h | 1 + 4 files changed, 195 insertions(+), 68 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8fe924e730a1..4bafccf7dae4 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -2519,6 +2519,17 @@ static int __cmd_inject(struct perf_inject *inject) } } + if (inject->aslr) { + struct evsel *evsel; + + evlist__for_each_entry(session->evlist, evsel) { + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + } + } + session->header.data_offset = output_data_offset; @@ -2783,7 +2794,18 @@ int cmd_inject(int argc, const char **argv) struct evsel *evsel; evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + + /* Strip the registers and unknown flags natively inside memory! */ evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) evsel->core.attr.bp_addr = 0; diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index 098bf1db1245..cd60e1b7d840 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -444,6 +444,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -453,6 +507,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index d0b1b33377fd..ef6ba6fa5ff4 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,33 +17,10 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> -#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ - PERF_SAMPLE_IDENTIFIER | \ - PERF_SAMPLE_IP | \ - PERF_SAMPLE_TID | \ - PERF_SAMPLE_TIME | \ - PERF_SAMPLE_ADDR | \ - PERF_SAMPLE_ID | \ - PERF_SAMPLE_STREAM_ID | \ - PERF_SAMPLE_CPU | \ - PERF_SAMPLE_PERIOD | \ - PERF_SAMPLE_READ | \ - PERF_SAMPLE_CALLCHAIN | \ - PERF_SAMPLE_RAW | \ - PERF_SAMPLE_BRANCH_STACK | \ - PERF_SAMPLE_STACK_USER | \ - PERF_SAMPLE_WEIGHT_TYPE | \ - PERF_SAMPLE_DATA_SRC | \ - PERF_SAMPLE_TRANSACTION | \ - PERF_SAMPLE_PHYS_ADDR | \ - PERF_SAMPLE_CGROUP | \ - PERF_SAMPLE_DATA_PAGE_SIZE | \ - PERF_SAMPLE_CODE_PAGE_SIZE | \ - PERF_SAMPLE_AUX) - /** * struct remap_addresses_key - Key for mapping original addresses to remapped ones. * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. @@ -67,6 +45,22 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct aslr_tool { /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ struct delegate_tool tool; @@ -78,6 +72,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -167,9 +166,7 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, key.machine = maps__machine(aslr_thread->maps); key.dso = map__dso(al.map); key.invariant = map__start(al.map) - map__pgoff(al.map); - key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || - effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? - kernel_pid : aslr_thread->pid_; + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + @@ -563,12 +560,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); delegate = aslr->tool.delegate; ret = -EFAULT; sample_type = evsel->core.attr.sample_type; + orig_sample_type = sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -615,25 +625,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -667,7 +677,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -733,7 +743,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -752,7 +762,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -797,7 +807,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { u64 abi; if (CHECK_BOUNDS(1, 0)) { @@ -806,22 +816,16 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } abi = in_array[i++]; if (abi != PERF_SAMPLE_REGS_ABI_NONE) { - u64 nr = hweight64(evsel->core.attr.sample_regs_user); + u64 nr = hweight64(orig_regs_user); - if (nr > max_i - i || nr > max_j - j) { + if (nr > max_i - i) { ret = -EFAULT; goto out_put; } - memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); i += nr; - j += nr; } - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -854,13 +858,13 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { u64 abi; if (CHECK_BOUNDS(1, 0)) { @@ -869,36 +873,30 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } abi = in_array[i++]; if (abi != PERF_SAMPLE_REGS_ABI_NONE) { - u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + u64 nr = hweight64(orig_regs_intr); - if (nr > max_i - i || nr > max_j - j) { + if (nr > max_i - i) { ret = -EFAULT; goto out_put; } - memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); i += nr; - j += nr; } - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -966,6 +964,7 @@ static int aslr_tool__process_attr(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; union perf_event *new_event; + int err; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); @@ -976,9 +975,32 @@ static int aslr_tool__process_attr(const struct perf_tool *tool, if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_USER) { + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_USER; + new_event->attr.attr.sample_regs_user = 0; + } + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_INTR) { + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_INTR; + new_event->attr.attr.sample_regs_intr = 0; + } + new_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - return delegate->attr(delegate, new_event, pevlist); + err = delegate->attr(delegate, new_event, pevlist); + if (!err && pevlist && *pevlist) { + struct evsel *evsel = evlist__last(*pevlist); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + + if (priv) { + priv->orig_sample_type = event->attr.attr.sample_type; + priv->orig_sample_regs_user = event->attr.attr.sample_regs_user; + priv->orig_sample_regs_intr = event->attr.attr.sample_regs_intr; + if (hashmap__add(&aslr->evsel_orig_attrs, evsel, priv) != 0) + free(priv); + } + } + + return err; } static int skipn(int fd, off_t n) @@ -1037,6 +1059,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1097,10 +1122,34 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + + if (hashmap__add(&aslr->evsel_orig_attrs, evsel, priv) != 0) { + free(priv); + return -EEXIST; + } + return 0; +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..e4cdb337a66f 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -33,5 +33,6 @@ struct evsel; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); #endif /* __PERF_ASLR_H */ -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v7 4/4] perf aslr: Strip sample registers 2026-05-19 8:08 ` [PATCH v7 4/4] perf aslr: Strip sample registers Ian Rogers @ 2026-05-19 9:55 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-19 9:55 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 4 potential issue(s) to consider: - [High] Modifying `evsel->core.attr.sample_type` before input event processing corrupts the raw event stream parser. - [Low] Build failure due to undeclared `ASLR_SUPPORTED_SAMPLE_TYPE` identifier. - [Low] Commit message discrepancy regarding the handling of ABI words. - [Low] Error masking in `aslr_tool__cache_orig_attrs` hides out-of-memory errors. -- commit 828b37c8e32f7fe3bd6173589afcab0ed6408c51 Author: Ian Rogers <irogers@google.com> perf aslr: Strip sample registers This commit refactors the ASLR tool to strip register dumps from sample events rather than dropping the entire sample. It scrubs incoming attribute events and attempts to keep downstream parsers synchronized. > and output sample ABI words are safely overwritten to > PERF_SAMPLE_REGS_ABI_NONE. This isn't a bug, but the commit message states that the ABI words are overwritten to PERF_SAMPLE_REGS_ABI_NONE. However, the code in aslr_tool__process_sample() skips writing to the output array entirely, stripping the ABI words and register payloads rather than overwriting them. Is this description accurate? > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index 8fe924e730a16..4bafccf7dae40 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c [ ... ] > @@ -2783,7 +2794,18 @@ int cmd_inject(int argc, const char **argv) > struct evsel *evsel; > > evlist__for_each_entry(inject.session->evlist, evsel) { > + ret = aslr_tool__cache_orig_attrs(tool, evsel); > + if (ret) { > + pr_err("Failed to cache original attributes: %d\n", ret); > + goto out_delete; > + } > + > + /* Strip the registers and unknown flags natively inside memory! */ > evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; This macro is removed from tools/perf/util/aslr.c in this patch and doesn't appear to be relocated to a shared header. Will this cause a build failure due to an undeclared identifier? > + evsel__reset_sample_bit(evsel, REGS_USER); > + evsel__reset_sample_bit(evsel, REGS_INTR); > + evsel->core.attr.sample_regs_user = 0; > + evsel->core.attr.sample_regs_intr = 0; During trace ingestion, the core parser relies on evsel->core.attr.sample_type to deserialize raw binary samples linearly. If the register bits are cleared prematurely from the in-memory evsel here in cmd_inject() (before __cmd_inject() executes perf_session__process_events()), the parser will expect no register data and skip extracting those fields. If the raw sample contains subsequent trailing fields like PERF_SAMPLE_STACK_USER, will the unskipped raw register bytes physically present in the stream be misinterpreted as those trailing fields? Could this result in out-of-bounds memory reads and -EFAULT errors, effectively aborting trace processing for files recorded with registers? > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index d0b1b33377fd2..ef6ba6fa5ff45 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -1097,10 +1122,34 @@ void aslr_tool__delete(struct perf_tool *tool) [ ... ] > +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) > +{ > + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); > + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); > + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); > + > + if (!priv) > + return -ENOMEM; > + > + priv->orig_sample_type = evsel->core.attr.sample_type; > + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; > + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; > + > + if (hashmap__add(&aslr->evsel_orig_attrs, evsel, priv) != 0) { > + free(priv); > + return -EEXIST; > + } > + return 0; > +} If memory allocation fails for the new bucket or entry inside hashmap__add(), it will return -ENOMEM. Since the code assumes any non-zero return indicates the entry already exists and unconditionally returns -EEXIST, does this unintentionally mask legitimate memory exhaustion errors during tool initialization? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260519080824.3329601-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-05-19 8:08 ` [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-05-19 8:08 ` [PATCH v7 4/4] perf aslr: Strip sample registers Ian Rogers @ 2026-05-20 6:30 ` Ian Rogers 2026-05-20 6:30 ` [PATCH v8 1/4] perf maps: Add maps__mutate_mapping Ian Rogers ` (4 more replies) 4 siblings, 5 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-20 6:30 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a critical bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Core Feature: 'perf inject --aslr' (Patches 2, 3, and 4) Transferring perf.data files across environments introduces a potential leak of virtual address footprints, weakening Address Space Layout Randomization (ASLR) on the originating machine. To mitigate this, we introduce the --aslr flag into perf inject. Unknown or unhandled events are dropped conservatively, while handled samples and branch loops undergo systematic virtual memory offset obfuscation. Events carrying virtual memory layouts are conservatively remap-processed or dropped, while zero-address-risk lifecycle metadata records (such as namespaces, cgroups, and BPF program info) are intentionally delegated to preserve comprehensive downstream trace tool analysis compatibility. The ASLR tracking tool virtualizes process and machine namespaces using 'struct machines' to safely isolate host mappings from unprivileged KVM guest address spaces. Memory space layouts are tracked globally per process context to ensure linear, continuous space allocations across successive mapping runs. The topological invariant coordinate dso + invariant (start - pgoff) is tracked to uniquely index binary section frameworks, providing complete collision safety against separate overlapping shared-invariant libraries while remaining perfectly immune to boundary shifts or split fragmentations. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced in Patch 3 with a comprehensive POSIX shell suite ('inject_aslr.sh'), hardened against SIGPIPE signal exits with stream consuming awk loops and robust 'set -o pipefail' assertions. The suite utilizes a highly dense, system-call intensive VFS byte block loop workload (dd count=500) to guarantee deterministic hardware timer interrupts sampling streams inside kernel privilege states. Prerequisite Bug Fix (Patch 1) During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis: 1. perf symbols: Patch 1 replaces old remove-reinsert map boundary update cycles with a high-performance, thread-safe transactional framework maps__mutate_mapping() that enforces write semaphore lock closures around all in-place virtual address mutations and sorting invalidations, completely closing concurrent lookup race condition windows. It explicitly executes DWARF address space cache invalidation (libdw__invalidate_dwfl()) to keep debugger unwinding frames perfectly synchronized. Changes since v7: - Minor nits cleaned up. - Concurrency & Locking (Patch 1): Add a detailed doc comment block above maps__mutate_mapping() documenting the recursive down_write() deadlock risk during lazy symbol loading. Harden maps__load_maps() to return immediately when nr_maps == 0, avoiding spurious -ENOMEM returns. - Deadlock-Free Preloading (Patch 2): Replace upfront preloading with dynamic, discovery-driven preloading of host and guest kernel/module maps using machine->priv tracking in util/aslr.c, completely bypassing lazy symbol loading deadlock risks during event loops. - Symbol Offset Preservation (Patch 2): Fix the address translation offset truncation bug inside aslr_tool__findnew_mapping() to perfectly preserve the internal symbol address offset relative to map__start(), fully resolving relocation symbol truncations. - Trace Ingestion Decoupling (Patch 4): Decouple attributes stripping from trace ingestion parsing. Keep evsel->core.attr completely unmodified in-memory during ingestion, and apply format stripping dynamically inside pipe repiping and post-processing file header serialization. Implement temporary sample size and attributes overrides inside aslr_tool__process_sample() to safely parse repacked events via evsel__parse_sample(). Ian Rogers (4): perf maps: Add maps__mutate_mapping perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 70 +- tools/perf/tests/shell/inject_aslr.sh | 518 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1248 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 38 + tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 76 ++ tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 10 files changed, 2012 insertions(+), 32 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v8 1/4] perf maps: Add maps__mutate_mapping 2026-05-20 6:30 ` [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-05-20 6:30 ` Ian Rogers 2026-05-20 7:06 ` sashiko-bot 2026-05-20 6:30 ` [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers ` (3 subsequent siblings) 4 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-20 6:30 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++++++------ tools/perf/util/maps.c | 76 ++++++++++++++++++++++++++++++++++++ tools/perf/util/maps.h | 3 ++ tools/perf/util/symbol-elf.c | 41 ++++++++++++------- tools/perf/util/symbol.c | 17 ++++++-- 5 files changed, 138 insertions(+), 31 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index e76f8c86e62a..ea918f75e3ad 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1522,22 +1522,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..7dce07e4d9b4 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,39 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) + err = -1; + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 7afa8a117139..dc4ab58857b3 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1341,6 +1341,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1371,22 +1389,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index fcaeeddbbb6b..09b93e844887 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); static bool symbol__is_idle(const char *name); @@ -2121,10 +2128,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2164,10 +2172,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v8 1/4] perf maps: Add maps__mutate_mapping 2026-05-20 6:30 ` [PATCH v8 1/4] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-05-20 7:06 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-20 7:06 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] The patch introduces a guaranteed recursive read-to-write self-deadlock during lazy symbol loading by acquiring a write lock inside `maps__mutate_mapping` while the thread already holds a read lock. The provided mitigation function, `maps__load_maps()`, is effectively dead code for most tools as it is never actually invoked for them. -- commit 020b20379ee4c690b3b4da226aa6a8b8bbcfe099 Author: Ian Rogers <irogers@google.com> perf maps: Add maps__mutate_mapping This commit introduces maps__mutate_mapping() to safely mutate map boundaries under a write lock, resolving concurrent lookup race windows. It also adds maps__load_maps() to pre-load kernel symbol maps upfront in order to bypass dynamic runtime mutations during lazy symbol loading. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Signed-off-by: Ian Rogers <irogers@google.com> > diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c > index 923935ee21b66..7dce07e4d9b49 100644 > --- a/tools/perf/util/maps.c > +++ b/tools/perf/util/maps.c [ ... ] > @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) > #endif > } > > +/** > + * maps__mutate_mapping - Apply write-protected mutations to a map. > + * @maps: The maps collection containing the map. > + * @map: The map to mutate. > + * @mutate_cb: Callback function that performs the actual mutations. > + * @data: Private data passed to the callback. > + * > + * This acquires the write lock on the maps semaphore to safely protect > + * concurrent readers from seeing partially mutated or unsorted map boundaries. > + * > + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if > + * the caller already holds the read lock (e.g., during maps__for_each_map() or > + * maps__find() iteration paths that trigger lazy symbol loading). To completely > + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via > + * maps__load_maps()) under a clean, single-threaded context before entering > + * multi-threaded event processing loops. > + */ > +int maps__mutate_mapping(struct maps *maps, struct map *map, > + int (*mutate_cb)(struct map *map, void *data), void *data) > +{ > + int err = 0; > + > + if (maps) > + down_write(maps__lock(maps)); [Severity: High] Can this down_write() trigger a recursive self-deadlock for existing tools that rely on lazy kernel symbol loading? If tools like perf lock or perf probe have not been updated to call the new maps__load_maps() up front, they may still trigger lazy loading while iterating over maps. In that scenario, a read lock is already held: maps__for_each_map() down_read(maps__lock(maps)); ... maps__find_symbol_by_name_cb() map__load() dso__load() dso__load_kernel_sym() maps__mutate_mapping() down_write(maps__lock(maps)); Since the read lock is held by the same thread across the loop, will this down_write() cause a deadlock when lazy loading occurs? > + > + err = mutate_cb(map, data); > + > + if (maps) { > + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; > + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; > + } [ ... ] -- Sashiko AI review · https://sashiko.dev/#/patchset/20260520063050.3917261-1-irogers@google.com?part=1 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-20 6:30 ` [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-05-20 6:30 ` [PATCH v8 1/4] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-05-20 6:30 ` Ian Rogers 2026-05-20 7:50 ` sashiko-bot 2026-05-23 14:44 ` kernel test robot 2026-05-20 6:30 ` [PATCH v8 3/4] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 4 siblings, 2 replies; 183+ messages in thread From: Ian Rogers @ 2026-05-20 6:30 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. Events carrying virtual memory layouts are conservatively remap-processed or dropped, while zero-address-risk lifecycle metadata records (such as namespaces, cgroups, and BPF program info) are intentionally delegated to preserve downstream trace tool analysis compatibility. The ASLR tracking tool virtualizes process and machine namespaces using 'struct machines' to safely isolate host mappings from unprivileged KVM guest address spaces. Memory space layouts are tracked globally per process context to ensure linear, continuous space allocations across successive mapping runs. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). To permanently eliminate lazy-loading deadlock risks during runtime event processing, the inject tool force-loads all host and guest kernel and module maps up front at session startup under a clean single-threaded context using the new maps__load_maps() API. Guest kernel namespace isolation is secured by tracking guest kernels under kernel_pid (-1) and allocating guest kernel mappings inside kernel_space_start ranges. Unsupported or unrecognized UAPI sample flags are cleanly stripped from attributes up front using ASLR_SUPPORTED_SAMPLE_TYPE bitmask to prevent downstream parser misalignment crashes. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 44 +- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1151 +++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 1232 insertions(+), 1 deletion(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a2493f1097df..f42b315199b3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -124,6 +125,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -232,6 +234,14 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; + if (inject->aslr) { + union perf_event stripped_event; + + memcpy(&stripped_event, event, event->header.size); + stripped_event.attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + return perf_event__repipe_synth(tool, &stripped_event); + } + return perf_event__repipe_synth(tool, event); } @@ -2460,6 +2470,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2569,6 +2581,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2576,6 +2590,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2596,6 +2611,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2689,12 +2709,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2788,12 +2817,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 2bb60f50f62d..98da4e263c6d 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..901b563048fa --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,1151 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->machine ^ (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(aslr_thread->maps); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + u64 *pmax = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(aslr_thread->maps); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : aslr_thread->pid_; + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + remap_key.dso = map__dso(al.map); + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start - pgoff; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + remap_addr = *remapped_invariant_ptr + pgoff; + } + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { + if (calculated_max > *pmax) + *pmax = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &pmax)) { + /* Current max allocated mmap address within the process. */ + remap_addr = *pmax; + + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + if (remap_addr + len > *pmax) + *pmax = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_space_start : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + pmax = malloc(sizeof(u64)); + if (!tk || !pmax) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + *pmax = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, pmax, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(pmax); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct evsel *evsel, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_user); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + + COPY_U64(); /* dyn_size */ + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + ret = delegate->sample(delegate, new_event, &new_sample, evsel, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; +} + +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + +static int aslr_tool__process_attr(const struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + memcpy(&new_event->attr, &event->attr, event->attr.header.size); + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + + new_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + return delegate->attr(delegate, new_event, pevlist); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + aslr->tool.tool.attr = aslr_tool__process_attr; + /* event_update, tracing_data, finished_round, build_id, id_index, */ + /* auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, */ + /* stat_config, stat, feature, finished_init, bpf_metadata, compressed, */ + /* auxtrace - no virtual addresses. */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-20 6:30 ` [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-20 7:50 ` sashiko-bot 2026-05-23 14:44 ` kernel test robot 1 sibling, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-20 7:50 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [Critical] A malicious perf.data file can trigger a stack buffer overflow in `perf_event__repipe_attr` by providing an event with a maximized size. - [High] Prematurely masking `sample_type` in `aslr_tool__process_attr()` corrupts the parsing of subsequent sample payloads. -- commit 8f4f252e9954c1c30aaccf15ba0d851aee75c524 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index a2493f1097df9..f42b315199b3d 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c > @@ -232,6 +234,14 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, > if (!inject->output.is_pipe) > return 0; > > + if (inject->aslr) { > + union perf_event stripped_event; > + > + memcpy(&stripped_event, event, event->header.size); [Severity: Critical] Is it possible for event->header.size to exceed sizeof(union perf_event)? perf_session__process_events() reads events from the perf.data file and dynamically reallocates its heap buffer if an event's header.size exceeds sizeof(union perf_event). The format allows a maximum size of 65535 bytes. If a malicious or unusually large event is processed, could this memcpy overflow the local stripped_event buffer on the stack, potentially leading to a return address overwrite? > + stripped_event.attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + return perf_event__repipe_synth(tool, &stripped_event); > + } > + > return perf_event__repipe_synth(tool, event); > } [ ... ] > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..901b563048fa2 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static int aslr_tool__process_attr(const struct perf_tool *tool, > + union perf_event *event, > + struct evlist **pevlist) > +{ > + struct delegate_tool *del_tool; > + struct aslr_tool *aslr; > + struct perf_tool *delegate; > + union perf_event *new_event; > + > + del_tool = container_of(tool, struct delegate_tool, tool); > + aslr = container_of(del_tool, struct aslr_tool, tool); > + delegate = aslr->tool.delegate; > + new_event = (union perf_event *)aslr->event_copy; > + > + memcpy(&new_event->attr, &event->attr, event->attr.header.size); > + if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) > + new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ > + > + new_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; [Severity: High] Does prematurely masking sample_type here corrupt the parsing of subsequent sample payloads? If evsel is created in the core session lacking the masked bits (e.g., PERF_SAMPLE_REGS_USER), evlist__parse_sample() will rely on this masked sample_type when a raw PERF_RECORD_SAMPLE event is later read from the file. However, the raw event on disk still contains the data bytes for the masked fields. Could the parser fail to skip them and instead misalign, reading these raw bytes as subsequent fields? This might cause evsel__parse_sample to fail with -EFAULT or corrupt the parsed data. > + > + return delegate->attr(delegate, new_event, pevlist); > +} -- Sashiko AI review · https://sashiko.dev/#/patchset/20260520063050.3917261-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses 2026-05-20 6:30 ` [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-20 7:50 ` sashiko-bot @ 2026-05-23 14:44 ` kernel test robot 1 sibling, 0 replies; 183+ messages in thread From: kernel test robot @ 2026-05-23 14:44 UTC (permalink / raw) To: Ian Rogers Cc: oe-lkp, lkp, Gabriel Marin, linux-perf-users, linux-kernel, irogers, acme, james.clark, namhyung, adrian.hunter, jolsa, mingo, peterz, oliver.sang Hello, kernel test robot noticed "perf-sanity-tests.perf.make.fail" on: commit: ac4569348731804907b1e2e53b53583534fc0f5b ("[PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses") url: https://github.com/intel-lab-lkp/linux/commits/Ian-Rogers/perf-maps-Add-maps__mutate_mapping/20260520-143636 patch link: https://lore.kernel.org/all/20260520063050.3917261-3-irogers@google.com/ patch subject: [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses in testcase: perf-sanity-tests version: with following parameters: perf_compiler: gcc group: group-01 config: x86_64-rhel-9.4-bpf compiler: gcc-14 test machine: 16 threads Intel(R) Core(TM) i7-13620H (Raptor Lake) with 32G memory (please refer to attached dmesg/kmsg for entire log/backtrace) If you fix the issue in a separate patch/commit (i.e. not just a new version of the same patch/commit), kindly add following tags | Reported-by: kernel test robot <oliver.sang@intel.com> | Closes: https://lore.kernel.org/oe-lkp/202605231644.ac628f76-lkp@intel.com The kernel config and materials to reproduce are available at: https://download.01.org/0day-ci/archive/20260523/202605231644.ac628f76-lkp@intel.com user :err : [ 174.437942] [ T434] util/aslr.c: In function ‘aslr_tool__remap_address’: user :err : [ 175.421882] [ T434] util/aslr.c:143:48: error: ‘struct thread’ has no member named ‘maps’ user :err : [ 175.439227] [ T434] 143 | key.machine = maps__machine(aslr_thread->maps); user :err : [ 175.455264] [ T434] | ^~ user :err : [ 175.480862] [ T434] util/aslr.c:148:43: error: ‘struct thread’ has no member named ‘pid_’ user :err : [ 175.498459] [ T434] 148 | kernel_pid : aslr_thread->pid_; user :err : [ 175.513973] [ T434] | ^~ user :err : [ 175.775147] [ T434] util/aslr.c: In function ‘aslr_tool__findnew_mapping’: user :err : [ 175.800930] [ T434] util/aslr.c:242:54: error: ‘struct thread’ has no member named ‘maps’ user :err : [ 175.818508] [ T434] 242 | remap_key.machine = maps__machine(aslr_thread->maps); user :err : [ 175.835226] [ T434] | ^~ user :err : [ 175.861033] [ T434] util/aslr.c:245:49: error: ‘struct thread’ has no member named ‘pid_’ user :err : [ 175.878595] [ T434] 245 | kernel_pid : aslr_thread->pid_; user :err : [ 175.894692] [ T434] | ^~ user :err : [ 181.727159] [ T434] make[4]: *** [/usr/src/perf_selftests-x86_64-rhel-9.4-bpf-ac4569348731804907b1e2e53b53583534fc0f5b/tools/build/Makefile.build:96: util/aslr.o] Error 1 user :err : [ 181.748550] [ T434] make[4]: *** Waiting for unfinished jobs.... user :err : [ 181.761820] [ T434] make[3]: *** [/usr/src/perf_selftests-x86_64-rhel-9.4-bpf-ac4569348731804907b1e2e53b53583534fc0f5b/tools/build/Makefile.build:158: util] Error 2 user :err : [ 181.783098] [ T434] make[2]: *** [Makefile.perf:797: perf-util-in.o] Error 2 user :err : [ 181.796142] [ T434] make[2]: *** Waiting for unfinished jobs.... user :err : [ 183.966859] [ T434] make[1]: *** [Makefile.perf:289: sub-make] Error 2 user :notice: [ 183.977452] [ T432] make PERF failed user :err : [ 183.978411] [ T434] make: *** [Makefile:76: all] Error 2 -- 0-DAY CI Kernel Test Service https://github.com/intel/lkp-tests/wiki ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v8 3/4] perf test: Add inject ASLR test 2026-05-20 6:30 ` [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-05-20 6:30 ` [PATCH v8 1/4] perf maps: Add maps__mutate_mapping Ian Rogers 2026-05-20 6:30 ` [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers @ 2026-05-20 6:30 ` Ian Rogers 2026-05-20 8:02 ` sashiko-bot 2026-05-20 6:30 ` [PATCH v8 4/4] perf aslr: Strip sample registers Ian Rogers 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 4 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-20 6:30 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test `inject_aslr.sh` to verify the `perf inject --aslr` feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for `perf record` piped into `perf inject --aslr`. - Callchain address remapping. - Consistency of `perf report` output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 463 ++++++++++++++++++++++++++ 1 file changed, 463 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..d921287e849b --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,463 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=$? + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + cleanup + exit 1 +} + +trap cleanup EXIT +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v8 3/4] perf test: Add inject ASLR test 2026-05-20 6:30 ` [PATCH v8 3/4] perf test: Add inject ASLR test Ian Rogers @ 2026-05-20 8:02 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-20 8:02 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] The signal handler `trap_cleanup` incorrectly deletes the temporary directory on interruption under certain conditions, while falsely claiming to preserve it, and prints duplicate messages on SIGINT. -- commit 7275f0955025f2f2ba770bab4992dc63529b0af5 Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test This commit adds a shell script to verify the perf inject --aslr feature. It covers address remapping, pipe mode, callchain remapping, dropped samples, and kernel address normalization. > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 0000000000000..d921287e849b3 > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh [ ... ] > +cleanup() { > + local exit_code=$? > + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then > + echo "Test failed! Preserving temp directory: ${temp_dir}" > + return > + fi > + # Check if temp_dir is set and looks sane before removing > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > + rm -rf "${temp_dir}" > + fi > +} > + > +trap_cleanup() { > + cleanup > + exit 1 > +} > + > +trap cleanup EXIT > +trap trap_cleanup TERM INT [Severity: Medium] Will this cause the cleanup routine to run twice when a signal is received? Since trap_cleanup explicitly calls cleanup and then exits with code 1, won't the exit 1 statement natively trigger the EXIT trap and invoke cleanup a second time? If the script receives a targeted signal (like SIGTERM) while the foreground command successfully completes, the first cleanup execution evaluates the exit status as successful and irrevocably deletes the temporary directory. The immediate exit 1 then triggers the second cleanup execution, which evaluates the failure status and prints the preservation message for a directory that no longer exists. Under a standard interruption like SIGINT, it prints the preservation message twice. Could trap_cleanup simply execute exit 1 and rely entirely on the EXIT trap to handle unified cleanup? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260520063050.3917261-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v8 4/4] perf aslr: Strip sample registers 2026-05-20 6:30 ` [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-05-20 6:30 ` [PATCH v8 3/4] perf test: Add inject ASLR test Ian Rogers @ 2026-05-20 6:30 ` Ian Rogers 2026-05-20 8:41 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 4 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-05-20 6:30 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz When the ASLR tracking tool encounters sample events containing user or interrupt register dumps (PERF_SAMPLE_REGS_USER / PERF_SAMPLE_REGS_INTR), it previously dropped the entire sample event conservatively to prevent absolute virtual memory pointers leakage embedded inside raw register frames. If a trace session was recorded with register collection flags enabled, this resulted in 100% sample drop rates, and this happened by default for ARM64. Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. The inject_aslr.sh test is extended to ensure sampled registers aren't present after the --aslr pass. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 26 ++++ tools/perf/tests/shell/inject_aslr.sh | 55 ++++++++ tools/perf/util/aslr.c | 187 +++++++++++++++++++------- tools/perf/util/aslr.h | 1 + 4 files changed, 224 insertions(+), 45 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index f42b315199b3..a34902ff0e77 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -239,6 +239,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, memcpy(&stripped_event, event, event->header.size); stripped_event.attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + stripped_event.attr.attr.sample_regs_user = 0; + stripped_event.attr.attr.sample_regs_intr = 0; return perf_event__repipe_synth(tool, &stripped_event); } @@ -2470,7 +2472,16 @@ static int __cmd_inject(struct perf_inject *inject) } } + if (inject->aslr) { + struct evsel *evsel; + evlist__for_each_entry(session->evlist, evsel) { + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + } + } session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; @@ -2730,6 +2741,17 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2822,6 +2844,10 @@ int cmd_inject(int argc, const char **argv) evlist__for_each_entry(inject.session->evlist, evsel) { evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) evsel->core.attr.bp_addr = 0; diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index d921287e849b..7eaa553371ce 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -449,6 +449,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -458,6 +512,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 901b563048fa..457813812a34 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,6 +17,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,22 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct aslr_tool { /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ struct delegate_tool tool; @@ -54,6 +72,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -143,9 +166,7 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, key.machine = maps__machine(aslr_thread->maps); key.dso = map__dso(al.map); key.invariant = map__start(al.map) - map__pgoff(al.map); - key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || - effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? - kernel_pid : aslr_thread->pid_; + key.pid = effective_cpumode == PERF_RECORD_MISC_KERNEL ? kernel_pid : aslr_thread->pid_; if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + @@ -593,6 +614,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -605,12 +627,32 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, evsel, machine); + ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } + + sample_type = orig_sample_type & ASLR_SUPPORTED_SAMPLE_TYPE; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -659,25 +701,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -711,7 +753,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -777,7 +819,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -796,7 +838,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -841,7 +883,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { u64 abi; if (CHECK_BOUNDS(1, 0)) { @@ -850,22 +892,16 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } abi = in_array[i++]; if (abi != PERF_SAMPLE_REGS_ABI_NONE) { - u64 nr = hweight64(evsel->core.attr.sample_regs_user); + u64 nr = hweight64(orig_regs_user); - if (nr > max_i - i || nr > max_j - j) { + if (nr > max_i - i) { ret = -EFAULT; goto out_put; } - memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); i += nr; - j += nr; } - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -898,13 +934,13 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { u64 abi; if (CHECK_BOUNDS(1, 0)) { @@ -913,36 +949,30 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } abi = in_array[i++]; if (abi != PERF_SAMPLE_REGS_ABI_NONE) { - u64 nr = hweight64(evsel->core.attr.sample_regs_intr); + u64 nr = hweight64(orig_regs_intr); - if (nr > max_i - i || nr > max_j - j) { + if (nr > max_i - i) { ret = -EFAULT; goto out_put; } - memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); i += nr; - j += nr; } - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -983,8 +1013,22 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + /* Temporarily override evsel attributes to match the stripped new_event format! */ + orig_sample_size = evsel->sample_size; + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + perf_sample__init(&new_sample, /*all=*/ true); ret = evsel__parse_sample(evsel, new_event, &new_sample); + + /* Restore original attributes immediately so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + if (ret) { perf_sample__exit(&new_sample); goto out_put; @@ -1010,6 +1054,7 @@ static int aslr_tool__process_attr(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; union perf_event *new_event; + int err; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); @@ -1020,9 +1065,32 @@ static int aslr_tool__process_attr(const struct perf_tool *tool, if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_USER) { + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_USER; + new_event->attr.attr.sample_regs_user = 0; + } + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_INTR) { + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_INTR; + new_event->attr.attr.sample_regs_intr = 0; + } + new_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - return delegate->attr(delegate, new_event, pevlist); + err = delegate->attr(delegate, new_event, pevlist); + if (!err && pevlist && *pevlist) { + struct evsel *evsel = evlist__last(*pevlist); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + + if (priv) { + priv->orig_sample_type = event->attr.attr.sample_type; + priv->orig_sample_regs_user = event->attr.attr.sample_regs_user; + priv->orig_sample_regs_intr = event->attr.attr.sample_regs_intr; + if (hashmap__add(&aslr->evsel_orig_attrs, evsel, priv) != 0) + free(priv); + } + } + + return err; } static int skipn(int fd, off_t n) @@ -1081,6 +1149,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1141,11 +1212,37 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..e4cdb337a66f 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -33,5 +33,6 @@ struct evsel; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); #endif /* __PERF_ASLR_H */ -- 2.54.0.631.ge1b05301d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v8 4/4] perf aslr: Strip sample registers 2026-05-20 6:30 ` [PATCH v8 4/4] perf aslr: Strip sample registers Ian Rogers @ 2026-05-20 8:41 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-05-20 8:41 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 6 potential issue(s) to consider: New issues: - [High] Restoring unstripped `evsel` attributes before delegating the stripped sample event corrupts downstream processors. - [High] In pipe mode, synthesized headers bypass `aslr_tool__process_attr`, causing the output stream to start with unstripped attributes. - [High] The manual attribute stripping loop in `__cmd_inject` misses `bp_addr` and unsupported sample types, leaking ASLR data in the file header. - [High] Stripping `sample_type` attributes during pipe mode inline attribute processing corrupts subsequent input sample parsing. - [Medium] Silently ignoring allocation failures when caching original attributes leads to trace parsing desynchronization and potential out-of-bounds reads. Pre-existing issues: - [Critical] Stack buffer overflow when copying a variable-length `perf_event` into a statically sized `union perf_event` local variable. -- commit fb30fb1bb4e2af08887645a167b8d9c8ef4d7b2b Author: Ian Rogers <irogers@google.com> perf aslr: Strip sample registers When the ASLR tracking tool encounters sample events containing user or interrupt register dumps, it previously dropped the entire sample event conservatively to prevent absolute virtual memory pointers leakage. Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index f42b315199b3d..a34902ff0e77b 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c > @@ -239,6 +239,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, > > memcpy(&stripped_event, event, event->header.size); [Severity: Critical] This is a pre-existing issue, but stripped_event is a statically sized union on the stack. Since PERF_RECORD_HEADER_ATTR events can be almost 64KB due to the variable-length ID array, could copying event->header.size bytes cause a massive stack buffer overflow? > stripped_event.attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + stripped_event.attr.attr.sample_regs_user = 0; > + stripped_event.attr.attr.sample_regs_intr = 0; > return perf_event__repipe_synth(tool, &stripped_event); > } [ ... ] > @@ -2470,7 +2472,16 @@ static int __cmd_inject(struct perf_inject *inject) > } > } > > + if (inject->aslr) { > + struct evsel *evsel; > > + evlist__for_each_entry(session->evlist, evsel) { > + evsel__reset_sample_bit(evsel, REGS_USER); > + evsel__reset_sample_bit(evsel, REGS_INTR); > + evsel->core.attr.sample_regs_user = 0; > + evsel->core.attr.sample_regs_intr = 0; > + } > + } > > session->header.data_offset = output_data_offset; > session->header.data_size = inject->bytes_written; [Severity: High] This loop strips REGS_USER and REGS_INTR before the file header is serialized, but it doesn't clear bp_addr for breakpoints or apply the ASLR_SUPPORTED_SAMPLE_TYPE mask. Since those are only cleared at the end of cmd_inject() after the header is written, does this leak breakpoint virtual addresses and defeat ASLR for those events? [ ... ] > @@ -2730,6 +2741,17 @@ int cmd_inject(int argc, const char **argv) > if (zstd_init(&(inject.session->zstd_data), 0) < 0) > pr_warning("Decompression initialization failed.\n"); > > + if (inject.aslr) { > + struct evsel *evsel; > + > + evlist__for_each_entry(inject.session->evlist, evsel) { > + ret = aslr_tool__cache_orig_attrs(tool, evsel); > + if (ret) { > + pr_err("Failed to cache original attributes: %d\n", ret); > + goto out_delete; > + } > + } > + } [Severity: High] When processing pipe output, perf_event__synthesize_for_pipe() uses perf_event__repipe directly rather than the tool's configured attr callback. Will the output pipe stream begin with unstripped ATTR headers claiming registers are present? Does this cause trace consumers to fail when parsing the stripped SAMPLE records? [ ... ] > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 901b563048fa2..457813812a34a 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -983,8 +1013,22 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, > > new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); > > + /* Temporarily override evsel attributes to match the stripped new_event format! */ > + orig_sample_size = evsel->sample_size; > + evsel->sample_size = __evsel__sample_size(sample_type); > + evsel->core.attr.sample_type = sample_type; > + evsel->core.attr.sample_regs_user = 0; > + evsel->core.attr.sample_regs_intr = 0; > + > perf_sample__init(&new_sample, /*all=*/ true); > ret = evsel__parse_sample(evsel, new_event, &new_sample); > + > + /* Restore original attributes immediately so trace ingestion never desynchronizes! */ > + evsel->sample_size = orig_sample_size; > + evsel->core.attr.sample_type = orig_sample_type; > + evsel->core.attr.sample_regs_user = orig_regs_user; > + evsel->core.attr.sample_regs_intr = orig_regs_intr; > + > if (ret) { > perf_sample__exit(&new_sample); > goto out_put; [Severity: High] If downstream delegates like perf_event__convert_sample_callchain use evsel->core.attr.sample_type to parse the event, will they expect register data that is no longer present in new_event? Since we restored the original attributes before calling the delegate, could this read past the end of new_event and cause data corruption? Should we restore the attributes after the delegate returns? [ ... ] > @@ -1010,6 +1054,7 @@ static int aslr_tool__process_attr(const struct perf_tool *tool, > struct aslr_tool *aslr; > struct perf_tool *delegate; > union perf_event *new_event; > + int err; > > del_tool = container_of(tool, struct delegate_tool, tool); > aslr = container_of(del_tool, struct aslr_tool, tool); > @@ -1020,9 +1065,32 @@ static int aslr_tool__process_attr(const struct perf_tool *tool, > if (new_event->attr.attr.type == PERF_TYPE_BREAKPOINT) > new_event->attr.attr.bp_addr = 0; /* Conservatively remove addresses. */ > > + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_USER) { > + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_USER; > + new_event->attr.attr.sample_regs_user = 0; > + } > + if (new_event->attr.attr.sample_type & PERF_SAMPLE_REGS_INTR) { > + new_event->attr.attr.sample_type &= ~PERF_SAMPLE_REGS_INTR; > + new_event->attr.attr.sample_regs_intr = 0; > + } > + [Severity: High] When processing inline attributes in pipe mode, this strips the register flags before delegating to perf_event__repipe_attr(). When subsequent PERF_RECORD_SAMPLE events arrive from the input pipe, will evsel__parse_sample() use this stripped evsel to parse the unstripped raw input buffer? Could this cause the parser to misinterpret the unread register data as subsequent fields and drop the samples? > new_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > > - return delegate->attr(delegate, new_event, pevlist); > + err = delegate->attr(delegate, new_event, pevlist); > + if (!err && pevlist && *pevlist) { > + struct evsel *evsel = evlist__last(*pevlist); > + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); > + > + if (priv) { > + priv->orig_sample_type = event->attr.attr.sample_type; > + priv->orig_sample_regs_user = event->attr.attr.sample_regs_user; > + priv->orig_sample_regs_intr = event->attr.attr.sample_regs_intr; > + if (hashmap__add(&aslr->evsel_orig_attrs, evsel, priv) != 0) > + free(priv); > + } > + } > + > + return err; > } [Severity: Medium] If zalloc() or hashmap__add() fails here, the error is silently ignored. When aslr_tool__process_sample() later fails the hashmap lookup, it will fall back to an evsel->core.attr.sample_type that lacks the register flags. Will parsing the unstripped raw kernel sample array with this stripped mask cause the manual parser to desynchronize and misinterpret the unread register data? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260520063050.3917261-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-05-20 6:30 ` [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-05-20 6:30 ` [PATCH v8 4/4] perf aslr: Strip sample registers Ian Rogers @ 2026-06-04 17:28 ` Ian Rogers 2026-06-04 17:28 ` [PATCH v9 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 4 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-04 17:28 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses a topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space) and 'orig_last_end' (the end address of the last processed original mapping). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - If the new mapping is contiguous to the previous one in the original address space (start == orig_last_end), we place it contiguously in the remapped space. This is critical to preserve the contiguity of mappings for downstream merging (e.g. symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If not contiguous, we insert a 1-page gap (using page_size) from the previous maximum allocated address to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v8: - Split the large "Add aslr tool" commit into two patches: - Patch 2: Add ASLR tool infrastructure and MMAP tracking - Patch 3: Implement sample address remapping - Address Sashiko and internal feedback. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 70 +- tools/perf/tests/shell/inject_aslr.sh | 517 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1263 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 + tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 77 ++ tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 10 files changed, 2029 insertions(+), 33 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v9 1/5] perf maps: Add maps__mutate_mapping 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-04 17:28 ` Ian Rogers 2026-06-04 17:46 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-04 17:28 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++++++------ tools/perf/util/maps.c | 77 ++++++++++++++++++++++++++++++++++++ tools/perf/util/maps.h | 3 ++ tools/perf/util/symbol-elf.c | 41 ++++++++++++------- tools/perf/util/symbol.c | 17 ++++++-- 5 files changed, 139 insertions(+), 31 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index e5d1e8b882a9..621205f26523 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1538,22 +1538,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..332222365f23 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,39 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) + err = -1; + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -700,6 +776,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, st .sym = NULL, }; + maps__load_maps(maps); maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); return args.sym; } diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 77e6dcba8fda..7c575efeff24 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1341,6 +1341,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1371,22 +1389,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 714b6e6048fa..72be310a8221 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); static bool symbol__is_idle(const char *name); @@ -2121,10 +2128,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2164,10 +2172,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v9 1/5] perf maps: Add maps__mutate_mapping 2026-06-04 17:28 ` [PATCH v9 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-04 17:46 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-04 17:46 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] Ignoring the return value of maps__load_maps() and a TOCTOU race expose a recursive read-to-write self-deadlock in maps__find_symbol_by_name(). -- commit 0d10c2abb7793e097fe64599f4a0512e9d3d888d Author: Ian Rogers <irogers@google.com> perf maps: Add maps__mutate_mapping This patch introduces maps__mutate_mapping() to safely update map boundaries under a write lock. It also adds maps__load_maps() to pre-load kernel symbol maps up front, aiming to avoid recursive read-to-write self-deadlocks during lazy symbol loading. > diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c > index 923935ee21b66..332222365f237 100644 > --- a/tools/perf/util/maps.c > +++ b/tools/perf/util/maps.c [ ... ] > @@ -626,6 +669,39 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) > return ret; > } > > +int maps__load_maps(struct maps *maps) > +{ [ ... ] > + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); > + if (!maps_copy) { > + up_read(maps__lock(maps)); > + return -ENOMEM; > + } [ ... ] > @@ -700,6 +776,7 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, st > .sym = NULL, > }; > > + maps__load_maps(maps); > maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); [Severity: Medium] Does this expose a recursive read-to-write self-deadlock if maps__load_maps() fails? If the maps_copy allocation in maps__load_maps() fails with -ENOMEM, it returns early and leaves the maps unloaded. Because the return value is ignored here, execution proceeds to maps__for_each_map() which takes a read lock. When it attempts to lazy-load the unloaded maps, it will call maps__mutate_mapping() which attempts to acquire the write lock, triggering the deadlock. Is there also a race window between these two calls? If a concurrent thread inserts a new map after maps__load_maps() releases its lock, but before or during maps__for_each_map(), the new unloaded map will be iterated over under the read lock. Would this again trigger the same self-deadlock when lazy loading calls maps__mutate_mapping()? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260604172850.683329-1-irogers@google.com?part=1 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v9 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-04 17:28 ` [PATCH v9 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-04 17:28 ` Ian Rogers 2026-06-04 17:45 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-04 17:28 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 57 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 688 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 781 insertions(+), 2 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index d8cb1f562f69..a9f0a3901e7b 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -124,6 +125,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -242,8 +244,25 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; - if (!inject->itrace_synth_opts.set) + if (!inject->itrace_synth_opts.set) { + if (inject->aslr) { + union perf_event *stripped_event = malloc(event->header.size); + int err; + + if (!stripped_event) + return -ENOMEM; + memcpy(stripped_event, event, event->header.size); + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + stripped_event->attr.attr.bp_addr = 0; + + err = perf_event__repipe_synth(tool, stripped_event); + free(stripped_event); + return err; + } return perf_event__repipe_synth(tool, event); + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); @@ -276,6 +295,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2597,6 +2618,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2706,6 +2729,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2713,6 +2738,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2733,6 +2759,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2826,12 +2857,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2925,12 +2965,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..be7280f88430 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,688 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; + u64 orig_last_end; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->machine ^ (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(aslr_thread->maps); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : aslr_thread->pid_; + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + if (start == top->orig_last_end) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + top->orig_last_end = start + len; + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_space_start : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + top_val->orig_last_end = start + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v9 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-04 17:28 ` [PATCH v9 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-04 17:45 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-04 17:45 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Breakpoint addresses and register masks leak when `--itrace` is combined with `--aslr` in pipe mode due to missing attribute zeroing. - [High] Auxiliary trace events (`auxtrace`, `auxtrace_info`, `auxtrace_error`) are silently dropped when `--aslr` is used, breaking `--itrace` decoding. -- commit 54e7ffb94c135676309f30095bfd44c906c79590 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index d8cb1f562f690..a9f0a3901e7bc 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c [ ... ] > @@ -276,6 +295,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, > > attr.size = sizeof(struct perf_event_attr); > attr.sample_type &= ~PERF_SAMPLE_AUX; > + if (inject->aslr) > + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; [Severity: High] Does this alternative synthesis path for itrace also need to clear bp_addr, sample_regs_user, and sample_regs_intr? When perf inject is run in pipe mode with both the aslr and itrace options, perf_event__repipe_attr takes this path. The non-itrace path correctly clears the breakpoint addresses and register masks for ASLR sanitization, but this path only masks the sample type. Could this leak sensitive kernel virtual addresses encoded in breakpoint attributes into the output stream, bypassing the ASLR protections? > > if (inject->itrace_synth_opts.add_last_branch) { > attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; [ ... ] > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..be7280f884308 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, > + struct perf_session *session, > + union perf_event *event) > +{ > + if (perf_data__is_pipe(session->data)) { > + /* Copy behavior of the stub by reading all pipe data. */ > + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); > + > + if (err < 0) > + return err; > + } > + return event->auxtrace.size; > +} [ ... ] > +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) > +{ > + delegate_tool__init(&aslr->tool, delegate); > + aslr->tool.tool.ordered_events = true; [ ... ] > + /* > + * event_update, tracing_data, finished_round, build_id, id_index, > + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, > + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, > + * auxtrace - no virtual addresses. > + */ > + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; > + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; > + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; > +} [Severity: High] Will overwriting the auxtrace delegate callbacks cause silent data loss? The auxtrace, auxtrace_info, and auxtrace_error callbacks are overridden here with internal stubs. As seen in aslr_tool__process_auxtrace above, the stub consumes the pipe data but returns without forwarding the event to the delegate tool. Does this mean raw hardware traces like Intel PT are silently dropped, preventing them from reaching perf_event__process_auxtrace to generate synthetic samples? This looks like it would break itrace decoding. -- Sashiko AI review · https://sashiko.dev/#/patchset/20260604172850.683329-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v9 3/5] perf inject/aslr: Implement sample address remapping 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-04 17:28 ` [PATCH v9 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-04 17:28 ` [PATCH v9 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-04 17:28 ` Ian Rogers 2026-06-04 17:45 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 4/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-04 17:28 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/aslr.c | 448 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 444 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index be7280f88430..fc619b9f1f40 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -109,6 +109,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(aslr_thread->maps); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : aslr_thread->pid_; + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -554,13 +608,399 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; - return delegate->sample(delegate, event, sample, machine); + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v9 3/5] perf inject/aslr: Implement sample address remapping 2026-06-04 17:28 ` [PATCH v9 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-04 17:45 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-04 17:45 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Direct access to fields of the reference-counted `struct thread` breaks the build when `REFCNT_CHECKING` is enabled. - [High] Double-swapping of `PERF_SAMPLE_BRANCH_STACK` flags corrupts branch data when analyzing cross-endian perf.data files. -- commit 22641fe8dc789606c2c20387f60f64010f88923f Author: Ian Rogers <irogers@google.com> perf inject/aslr: Implement sample address remapping This commit adds the sample address remapping logic to the ASLR tool. It parses sample events and remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index be7280f884308..fc619b9f1f40a 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c > @@ -109,6 +109,60 @@ static u64 round_up_to_page_size(u64 addr) > return (addr + page_size - 1) & ~((u64)page_size - 1); > } > > +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, > + u64 addr) > +{ [ ... ] > + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { > + addr_location__exit(&al); > + return 0; /* No mmap. */ > + } > + } > + > + key.machine = maps__machine(aslr_thread->maps); > + key.dso = map__dso(al.map); > + key.invariant = map__start(al.map) - map__pgoff(al.map); > + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || > + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? > + kernel_pid : aslr_thread->pid_; [Severity: High] Will this direct field access break the build when REFCNT_CHECKING is enabled? When perf is built with REFCNT_CHECKING=1 for ASAN or LSAN builds, the thread struct is wrapped in an opaque pointer, hiding the actual fields. Direct access to maps and pid_ usually causes compilation failures under these builds. Should this use the proper thread__maps() and thread__pid() accessors instead? > @@ -554,13 +608,399 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, > struct perf_sample *sample, > struct machine *machine) > { [ ... ] > + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { > + u64 nr; [ ... ] > + for (u64 cntr = 0; cntr < nr; cntr++) { > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > + sample->cpumode, > + in_array[i++]); /* from */ > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > + sample->cpumode, > + in_array[i++]); /* to */ > + out_array[j++] = in_array[i++]; /* flags */ > + } [ ... ] > + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); > + > + perf_sample__init(&new_sample, /*all=*/ true); > + ret = evsel__parse_sample(evsel, new_event, &new_sample); [Severity: High] Does this second call to evsel__parse_sample() corrupt branch stack flags when analyzing cross-endian perf.data files? The initial event processing loop calls evsel__parse_sample() on the original event, which mutates the event array in place by swapping the branch stack flags. When the already-mutated, native-endian payload is copied into new_event here, calling evsel__parse_sample() a second time will bitfield-swap the flags again. Does this inadvertently restore the original foreign-endian layout and feed corrupted branch flags to the downstream delegate tool? > + if (ret) { > + perf_sample__exit(&new_sample); > + goto out_put; > + } > + > + new_sample.evsel = evsel; > + ret = delegate->sample(delegate, new_event, &new_sample, machine); > + perf_sample__exit(&new_sample); > + > +out_put: > + thread__put(thread); > + return ret; > } -- Sashiko AI review · https://sashiko.dev/#/patchset/20260604172850.683329-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v9 4/5] perf test: Add inject ASLR test 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-04 17:28 ` [PATCH v9 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-04 17:28 ` Ian Rogers 2026-06-04 17:40 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-04 17:28 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into . - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 462 ++++++++++++++++++++++++++ 1 file changed, 462 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..ea0db1d5faf9 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,462 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=$? + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + exit 1 +} + +trap cleanup EXIT +trap trap_cleanup TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v9 4/5] perf test: Add inject ASLR test 2026-06-04 17:28 ` [PATCH v9 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-04 17:40 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-04 17:40 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Low] Double execution of the cleanup function on script termination. -- commit 15a772bdaa14c7c9486c156f196dc344720b9656 Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test Add a new shell test to verify the feature. The test covers basic address remapping for user space samples, pipe mode coverage, callchain address remapping, report consistency, and kernel address remapping. > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 0000000000000..ea0db1d5faf9b > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh [ ... ] > +cleanup() { > + local exit_code=$? > + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then > + echo "Test failed! Preserving temp directory: ${temp_dir}" > + return > + fi > + # Check if temp_dir is set and looks sane before removing > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > + rm -rf "${temp_dir}" > + fi > +} > + > +trap_cleanup() { > + exit 1 > +} > + > +trap cleanup EXIT > +trap trap_cleanup TERM INT [ ... ] > +test_kernel_report_aslr > + > +cleanup > +exit $err [Severity: Low] Does this code execute the cleanup function twice? The script registers cleanup on the EXIT trap, but explicitly calls it here right before exit. If a test fails, could this result in the failure message being printed twice, or if tests succeed, might it attempt to remove the temporary directory a second time? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260604172850.683329-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v9 5/5] perf aslr: Strip sample registers 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-04 17:28 ` [PATCH v9 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-04 17:28 ` Ian Rogers 2026-06-04 17:45 ` sashiko-bot 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-04 17:28 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 35 +++-- tools/perf/tests/shell/inject_aslr.sh | 55 +++++++ tools/perf/util/aslr.c | 209 +++++++++++++++++++++----- tools/perf/util/aslr.h | 4 + 4 files changed, 255 insertions(+), 48 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a9f0a3901e7b..e0f59ef8b97b 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -253,6 +253,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, return -ENOMEM; memcpy(stripped_event, event, event->header.size); stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + stripped_event->attr.attr.sample_regs_user = 0; + stripped_event->attr.attr.sample_regs_intr = 0; if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) stripped_event->attr.attr.bp_addr = 0; @@ -2620,6 +2622,9 @@ static int __cmd_inject(struct perf_inject *inject) + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2878,6 +2883,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2896,10 +2913,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2965,17 +2989,6 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); - if (inject.aslr) { - struct evsel *evsel; - - evlist__for_each_entry(inject.session->evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - } - } - guest_session__exit(&inject.guest_session); out_delete: diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index ea0db1d5faf9..8fe33a8e6fc8 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -448,6 +448,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -457,6 +511,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index fc619b9f1f40..6ccd6290620a 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,6 +17,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,22 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; u64 orig_last_end; @@ -58,6 +76,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -613,6 +636,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -625,6 +649,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); @@ -634,7 +662,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -683,25 +727,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -735,7 +779,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -801,7 +845,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -820,7 +864,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -865,19 +909,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -908,39 +958,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -981,9 +1037,21 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + perf_sample__init(&new_sample, /*all=*/ true); ret = evsel__parse_sample(evsel, new_event, &new_sample); + if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -992,6 +1060,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1057,6 +1131,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1118,11 +1195,69 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..4c2cffc0e500 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -30,8 +30,12 @@ struct perf_tool; struct evsel; +struct evlist; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v9 5/5] perf aslr: Strip sample registers 2026-06-04 17:28 ` [PATCH v9 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-04 17:45 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-04 17:45 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [Critical] Allocating `stripped_event` using unvalidated `event->header.size` and unconditionally writing to `sample_regs_user` and `sample_regs_intr` causes a severe heap buffer overflow. - [High] Missing register stripping in the `itrace`-enabled path of `perf_event__repipe_attr` corrupts the trace output. -- commit 5f1dabd94a15919e76927871536447a8782dba24 Author: Ian Rogers <irogers@google.com> perf aslr: Strip sample registers Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index a9f0a3901e7bc..e0f59ef8b97b2 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c > @@ -253,6 +253,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, > return -ENOMEM; > memcpy(stripped_event, event, event->header.size); > stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + stripped_event->attr.attr.sample_regs_user = 0; > + stripped_event->attr.attr.sample_regs_intr = 0; [Severity: Critical] Can this write out of bounds and corrupt the heap? In perf_event__repipe_attr(), stripped_event is allocated using event->header.size. For older perf.data formats (like PERF_ATTR_SIZE_VER0), the size can be as small as 72 bytes. However, sample_regs_user and sample_regs_intr are at offsets 88 and 104 in the perf_event_attr structure. Writing to these fields unconditionally on a 72-byte allocation will corrupt the heap when processing older traces. [Severity: High] Also in perf_event__repipe_attr(), when itrace_synth_opts.set is true, should the register bits and masks be cleared from the synthesized attr? When --itrace and --aslr are used together in pipe mode, the code branches to the itrace_synth_opts.set path in perf_event__repipe_attr(): perf_event__repipe_attr() { ... if (!inject->itrace_synth_opts.set) { /* registers are stripped in this branch */ } ... attr.sample_type &= ~PERF_SAMPLE_AUX; if (inject->aslr) attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; /* register clearing is missing here */ ... perf_event__synthesize_attr(...) } This patch strips the repiped samples of their register payloads via aslr_tool__process_sample(), but the itrace path above leaves the register flags in the attr event. This mismatch between the attr event and the payload can corrupt the output pipe trace and cause parsers to fail. > if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) > stripped_event->attr.attr.bp_addr = 0; [ ... ] -- Sashiko AI review · https://sashiko.dev/#/patchset/20260604172850.683329-1-irogers@google.com?part=5 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-04 17:28 ` [PATCH v9 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-05 6:06 ` Ian Rogers 2026-06-05 6:06 ` [PATCH v10 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 6:06 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space) and 'orig_last_end' (the end address of the last processed original mapping). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - If the new mapping is contiguous to the previous one in the original address space (start == orig_last_end), we place it contiguously in the remapped space. This is critical to preserve the contiguity of mappings for downstream merging (e.g. symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If not contiguous, we insert a 1-page gap (using page_size) from the previous maximum allocated address to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 79 +- tools/perf/tests/shell/inject_aslr.sh | 518 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1269 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 + tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 80 ++ tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 10 files changed, 2048 insertions(+), 33 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v10 1/5] perf maps: Add maps__mutate_mapping 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-05 6:06 ` Ian Rogers 2026-06-05 6:20 ` sashiko-bot 2026-06-05 6:06 ` [PATCH v10 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 6:06 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++++++------ tools/perf/util/maps.c | 80 ++++++++++++++++++++++++++++++++++++ tools/perf/util/maps.h | 3 ++ tools/perf/util/symbol-elf.c | 41 +++++++++++------- tools/perf/util/symbol.c | 17 ++++++-- 5 files changed, 142 insertions(+), 31 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..be77dee16041 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) + err = -1; + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -700,6 +778,8 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, st .sym = NULL, }; + if (maps__load_maps(maps) < 0) + return NULL; maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); return args.sym; } diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v10 1/5] perf maps: Add maps__mutate_mapping 2026-06-05 6:06 ` [PATCH v10 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-05 6:20 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 6:20 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: - [High] Data race and logical corruption by modifying map sorting flags under a read lock on the memory allocation error path in `maps__load_maps()`. - [High] A single module load failure in `maps__load_maps()` causes `maps__find_symbol_by_name()` to abort the entire symbol lookup. - [High] The lockless map pre-loading exposes a TOCTOU race that still leads to the read-to-write recursive self-deadlock. -- commit 2675e44926570b5a06b75a048fcec8958d572142 Author: Ian Rogers <irogers@google.com> perf maps: Add maps__mutate_mapping This patch introduces maps__mutate_mapping() to apply map boundary mutations under a write lock to fix a concurrent lookup race. It also introduces maps__load_maps() to pre-load modules and bypass dynamic runtime mutations to prevent read-to-write self-deadlocks. [ ... ] > diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c > index 923935ee21b66..be77dee16041b 100644 > --- a/tools/perf/util/maps.c > +++ b/tools/perf/util/maps.c [ ... ] > @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) > return ret; > } > > +int maps__load_maps(struct maps *maps) > +{ > + struct map **maps_copy; > + unsigned int nr_maps; > + int err = 0; > + > + if (!maps) > + return 0; > + > + down_read(maps__lock(maps)); > + nr_maps = maps__nr_maps(maps); > + if (nr_maps == 0) { > + up_read(maps__lock(maps)); > + return 0; > + } > + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); > + if (!maps_copy) { > + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; > + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; [Severity: High] Does this code create a data race by modifying shared state under a read lock? We only hold the down_read() semaphore here, but we are writing to the shared boolean flags. Also, since the memory allocation failed and no maps were actually mutated, is it necessary to invalidate the sorted state? This seems like it will unnecessarily force future readers to acquire a write lock to re-sort an already sorted array. > + up_read(maps__lock(maps)); > + return -ENOMEM; > + } > + for (unsigned int i = 0; i < nr_maps; i++) > + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); > + up_read(maps__lock(maps)); > + > + for (unsigned int i = 0; i < nr_maps; i++) { > + if (map__load(maps_copy[i]) < 0) > + err = -1; > + map__put(maps_copy[i]); > + } > + free(maps_copy); > + return err; > +} [ ... ] > @@ -700,6 +778,8 @@ struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, st > .sym = NULL, > }; > > + if (maps__load_maps(maps) < 0) > + return NULL; [Severity: High] Will a single module load failure cause this to abort the entire symbol lookup? If maps__load_maps() encounters a single failure (for example, due to a missing kernel module debuginfo), it propagates an error of -1. By returning NULL immediately here, don't we bypass searching all the successfully loaded modules? Previously, individual map load failures were silently skipped, allowing the search to gracefully continue. [Severity: High] Can a concurrent insertion defeat this lockless pre-loading and still trigger the recursive self-deadlock? maps__load_maps() drops the read lock after copying the map pointers. What happens if a concurrent thread inserts a new, unloaded kernel map right after maps__load_maps() returns, but before maps__for_each_map() executes? It looks like maps__for_each_map() would then iterate over the newly inserted map, invoke map__load() while holding the down_read() lock, and trigger maps__mutate_mapping(). Won't maps__mutate_mapping() then unconditionally attempt to acquire the down_write() lock, triggering the exact read-to-write self-deadlock this patch intends to fix? > maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); > return args.sym; > } [ ... ] -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605060610.1529996-1-irogers@google.com?part=1 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v10 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 6:06 ` [PATCH v10 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-05 6:06 ` Ian Rogers 2026-06-05 6:06 ` [PATCH v10 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 6:06 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 57 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 689 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 782 insertions(+), 2 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..65c7eccccf4d 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -124,6 +125,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -242,8 +244,25 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; - if (!inject->itrace_synth_opts.set) + if (!inject->itrace_synth_opts.set) { + if (inject->aslr) { + union perf_event *stripped_event = malloc(event->header.size); + int err; + + if (!stripped_event) + return -ENOMEM; + memcpy(stripped_event, event, event->header.size); + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + stripped_event->attr.attr.bp_addr = 0; + + err = perf_event__repipe_synth(tool, stripped_event); + free(stripped_event); + return err; + } return perf_event__repipe_synth(tool, event); + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); @@ -276,6 +295,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2595,6 +2616,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2727,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2736,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2757,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2855,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2923,12 +2963,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..375a0355f281 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,689 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; + u64 orig_last_end; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->machine ^ (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + if (start == top->orig_last_end) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + top->orig_last_end = start + len; + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_space_start : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + top_val->orig_last_end = start + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v10 3/5] perf inject/aslr: Implement sample address remapping 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 6:06 ` [PATCH v10 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 6:06 ` [PATCH v10 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 6:06 ` Ian Rogers 2026-06-05 6:30 ` sashiko-bot 2026-06-05 6:06 ` [PATCH v10 4/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 6:06 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/aslr.c | 454 ++++++++++++++++++++++++++++++++++++++++- 1 file changed, 450 insertions(+), 4 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 375a0355f281..8cfefa23030d 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -109,6 +109,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -554,13 +608,405 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + bool orig_needs_swap; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + orig_needs_swap = evsel->needs_swap; - return delegate->sample(delegate, event, sample, machine); + perf_sample__init(&new_sample, /*all=*/ true); + evsel->needs_swap = false; + ret = evsel__parse_sample(evsel, new_event, &new_sample); + evsel->needs_swap = orig_needs_swap; + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v10 3/5] perf inject/aslr: Implement sample address remapping 2026-06-05 6:06 ` [PATCH v10 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-05 6:30 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 6:30 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] aslr_tool__process_sample disables needs_swap when parsing new_event, causing 32-bit sample fields (like PID and TID) to be swapped during cross-endian analysis, which silently breaks downstream sample processing like build ID injection. -- commit d0d9af33f8edcc0ac1964813a5b4d04938f7d3bf Author: Ian Rogers <irogers@google.com> perf inject/aslr: Implement sample address remapping This patch introduces the logic for aslr_tool__process_sample to parse sample events and remap addresses using mapping metadata. It filters out specific sample types to prevent ASLR leaks and delegates the modified sample to the next tool. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 375a0355f2812..8cfefa23030d2 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -554,13 +608,405 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, [ ... ] > + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); > + > + orig_needs_swap = evsel->needs_swap; > > - return delegate->sample(delegate, event, sample, machine); > + perf_sample__init(&new_sample, /*all=*/ true); > + evsel->needs_swap = false; > + ret = evsel__parse_sample(evsel, new_event, &new_sample); [Severity: High] Does disabling evsel->needs_swap here break 32-bit fields when processing cross-endian perf.data files? When processing a cross-endian perf.data file, the core perf session loop blindly 64-bit swaps the sample array before invoking the tool callback. For fields containing packed 32-bit values (like PERF_SAMPLE_TID containing pid and tid), the 64-bit swap reverses their order in memory. evsel__parse_sample() normally corrects this by undoing the 64-bit swap and reswapping the 32-bit words if evsel->needs_swap is true. By explicitly setting it to false in aslr_tool__process_sample(), we bypass this 32-bit unswapping logic. Could this cause new_sample.pid to receive the TID and new_sample.tid to receive the PID (and similarly for PERF_SAMPLE_CPU)? If these fields are swapped, wouldn't this silently break downstream sample processing, such as build ID injection failing to find the correct thread because the PID is completely invalid? > + evsel->needs_swap = orig_needs_swap; > + > + if (ret) { > + perf_sample__exit(&new_sample); > + goto out_put; > + } > + > + new_sample.evsel = evsel; > + ret = delegate->sample(delegate, new_event, &new_sample, machine); > + perf_sample__exit(&new_sample); > + > +out_put: > + thread__put(thread); > + return ret; > } -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605060610.1529996-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v10 4/5] perf test: Add inject ASLR test 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-05 6:06 ` [PATCH v10 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-05 6:06 ` Ian Rogers 2026-06-05 6:13 ` sashiko-bot 2026-06-05 6:06 ` [PATCH v10 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 6:06 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into . - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 463 ++++++++++++++++++++++++++ 1 file changed, 463 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..9dd0a4e5f903 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,463 @@ +#!/bin/bash +# perf inject --aslr test +# SPDX-License-Identifier: GPL-2.0 + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + trap - EXIT TERM INT + local exit_code=$? + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup + exit 1 +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v10 4/5] perf test: Add inject ASLR test 2026-06-05 6:06 ` [PATCH v10 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-05 6:13 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 6:13 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] The trap and cleanup logic incorrectly clobbers the script's exit status (`$?`), preventing the temporary directory from being preserved on unexpected script failure. -- commit c84fe24323edc3828dba95662be22cc7389443c1 Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test This commit adds a new shell test suite for validating the perf inject ASLR feature. It provides coverage for basic address remapping, pipe mode processing, callchain normalization, and report consistency across both user space and kernel workloads. > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 0000000000000..9dd0a4e5f903e > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh [ ... ] > +cleanup() { > + trap - EXIT TERM INT > + local exit_code=$? [Severity: Medium] Does this code accidentally clobber the exit status? The command trap - EXIT TERM INT will execute successfully and set $? to 0. That 0 is then saved into exit_code, masking the original failure status that triggered the cleanup. > + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then > + echo "Test failed! Preserving temp directory: ${temp_dir}" > + return > + fi > + # Check if temp_dir is set and looks sane before removing > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > + rm -rf "${temp_dir}" > + fi > +} > + > +trap_cleanup() { > + echo "Unexpected signal in ${FUNCNAME[1]}" > + cleanup [Severity: Medium] Similarly, does the echo command here overwrite the actual script failure status in $? before cleanup is invoked? If so, this would prevent the temporary directory from being preserved as intended during an unexpected failure, making debugging test failures more difficult. > + exit 1 > +} -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605060610.1529996-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v10 5/5] perf aslr: Strip sample registers 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-05 6:06 ` [PATCH v10 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-05 6:06 ` Ian Rogers 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 6:06 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 46 ++++-- tools/perf/tests/shell/inject_aslr.sh | 55 +++++++ tools/perf/util/aslr.c | 208 +++++++++++++++++++++----- tools/perf/util/aslr.h | 4 + 4 files changed, 264 insertions(+), 49 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 65c7eccccf4d..de315bb334b3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -253,6 +253,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, return -ENOMEM; memcpy(stripped_event, event, event->header.size); stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_user = 0; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_intr = 0; if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) stripped_event->attr.attr.bp_addr = 0; @@ -295,8 +301,13 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; - if (inject->aslr) + if (inject->aslr) { attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (attr.type == PERF_TYPE_BREAKPOINT) + attr.bp_addr = 0; + attr.sample_regs_user = 0; + attr.sample_regs_intr = 0; + } if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2618,6 +2629,9 @@ static int __cmd_inject(struct perf_inject *inject) + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2876,6 +2890,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2894,10 +2920,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2963,17 +2996,6 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); - if (inject.aslr) { - struct evsel *evsel; - - evlist__for_each_entry(inject.session->evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - } - } - guest_session__exit(&inject.guest_session); out_delete: diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index 9dd0a4e5f903..e9ce6891ac12 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -449,6 +449,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -458,6 +512,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 8cfefa23030d..33b7fca68686 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,6 +17,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,22 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; u64 orig_last_end; @@ -58,6 +76,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -613,6 +636,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -626,6 +650,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, size_t i; size_t j; bool orig_needs_swap; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); @@ -635,7 +663,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -684,25 +728,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -736,7 +780,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -802,7 +846,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -821,7 +865,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -866,19 +910,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -909,39 +959,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -982,6 +1038,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + orig_needs_swap = evsel->needs_swap; perf_sample__init(&new_sample, /*all=*/ true); @@ -990,6 +1052,11 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, evsel->needs_swap = orig_needs_swap; if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -998,6 +1065,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1064,6 +1137,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1125,11 +1201,69 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..4c2cffc0e500 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -30,8 +30,12 @@ struct perf_tool; struct evsel; +struct evlist; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-05 6:06 ` [PATCH v10 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-05 18:52 ` Ian Rogers 2026-06-05 18:52 ` [PATCH v11 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 18:52 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space) and 'orig_last_end' (the end address of the last processed original mapping). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - If the new mapping is contiguous to the previous one in the original address space (start == orig_last_end), we place it contiguously in the remapped space. This is critical to preserve the contiguity of mappings for downstream merging (e.g. symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If not contiguous, we insert a 1-page gap (using page_size) from the previous maximum allocated address to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 79 +- tools/perf/tests/shell/inject_aslr.sh | 519 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1262 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2095 insertions(+), 65 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v11 1/5] perf maps: Add maps__mutate_mapping 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-05 18:52 ` Ian Rogers 2026-06-05 19:06 ` sashiko-bot 2026-06-05 18:52 ` [PATCH v11 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 18:52 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..aed72c9f0a50 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", map__dso(maps_copy[i])->name); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v11 1/5] perf maps: Add maps__mutate_mapping 2026-06-05 18:52 ` [PATCH v11 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-05 19:06 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 19:06 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Low] Compiler error in `maps__load_maps` due to accessing non-existent `name` field on `struct dso`. -- commit 42ed9a7e77f954259213b06a013da93fc23180c3 Author: Ian Rogers <irogers@google.com> perf maps: Add maps__mutate_mapping This patch introduces maps__mutate_mapping() to safely perform field updates under lock protection, resolving a concurrent lookup race window. It also adds maps__load_maps() to pre-load all kernel symbol maps up front at startup to avoid deadlocks. > diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c > index 923935ee21b66..aed72c9f0a509 100644 > --- a/tools/perf/util/maps.c > +++ b/tools/perf/util/maps.c [ ... ] > @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) > return ret; > } > > +int maps__load_maps(struct maps *maps) > +{ > + struct map **maps_copy; > + unsigned int nr_maps; > + int err = 0; [ ... ] > + for (unsigned int i = 0; i < nr_maps; i++) { > + if (map__load(maps_copy[i]) < 0) { > + pr_warning("Failed to load map %s\n", map__dso(maps_copy[i])->name); [Severity: Low] Does this cause a build failure? It looks like struct dso does not expose a name field directly. Should this use an accessor function like dso__name(map__dso(maps_copy[i])) or dso__short_name() instead to retrieve the name string? > + err = -1; > + } > + map__put(maps_copy[i]); > + } > + free(maps_copy); > + return err; > +} -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605185215.2359881-1-irogers@google.com?part=1 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v11 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 18:52 ` [PATCH v11 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-05 18:52 ` Ian Rogers 2026-06-05 19:07 ` sashiko-bot 2026-06-05 18:52 ` [PATCH v11 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 18:52 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 57 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 689 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 782 insertions(+), 2 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..65c7eccccf4d 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -124,6 +125,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -242,8 +244,25 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; - if (!inject->itrace_synth_opts.set) + if (!inject->itrace_synth_opts.set) { + if (inject->aslr) { + union perf_event *stripped_event = malloc(event->header.size); + int err; + + if (!stripped_event) + return -ENOMEM; + memcpy(stripped_event, event, event->header.size); + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + stripped_event->attr.attr.bp_addr = 0; + + err = perf_event__repipe_synth(tool, stripped_event); + free(stripped_event); + return err; + } return perf_event__repipe_synth(tool, event); + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); @@ -276,6 +295,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2595,6 +2616,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2727,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2736,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2757,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2855,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2923,12 +2963,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..375a0355f281 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,689 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; + u64 orig_last_end; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->machine ^ (size_t)key->dso ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + if (start == top->orig_last_end) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + top->orig_last_end = start + len; + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_space_start : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + top_val->orig_last_end = start + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v11 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 18:52 ` [PATCH v11 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 19:07 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 19:07 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] `remap_addresses__hash` hashes the RC pointer handle for `dso` directly, breaking hashmap lookups when `REFCNT_CHECKING` is enabled. -- commit 4e905ffc55cc982a92f6bbc5744803fa9093aab4 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking This patch adds an --aslr option to perf inject that remaps virtual addresses to prevent ASLR leaks when transferring perf.data files. It introduces the core remapping infrastructure and implements tracking for metadata events. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..375a0355f2812 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) > +{ > + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; > + > + return (size_t)key->machine ^ (size_t)key->dso ^ key->invariant ^ key->pid; [Severity: High] Will this hash function work correctly when REFCNT_CHECKING is enabled? The struct dso is reference-counted. Under REFCNT_CHECKING (such as when built with ASAN/LSAN), dso__get() allocates and returns a completely new pointer handle. Because the pointer handle itself is hashed here rather than the underlying object, two handles pointing to the same dso will hash to different buckets. Since aslr_tool__findnew_mapping() acquires a new reference via dso__get() before insertion, hashmap__find() will systematically fail to locate previously inserted keys. This breaks the ASLR remapping lookup and causes memory leaks. Should this resolve the underlying pointer before hashing, for example by using RC_CHK_ACCESS(key->dso) or similar? > +} [ ... ] -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605185215.2359881-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v11 3/5] perf inject/aslr: Implement sample address remapping 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 18:52 ` [PATCH v11 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 18:52 ` [PATCH v11 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 18:52 ` Ian Rogers 2026-06-05 18:52 ` [PATCH v11 4/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 18:52 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/aslr.c | 449 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 456 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 375a0355f281..6d081dfae480 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -109,6 +109,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -554,13 +608,400 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; - return delegate->sample(delegate, event, sample, machine); + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v11 4/5] perf test: Add inject ASLR test 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-05 18:52 ` [PATCH v11 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-05 18:52 ` Ian Rogers 2026-06-05 18:52 ` [PATCH v11 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 18:52 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into . - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 464 ++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..d8ded16ba905 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,464 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v11 5/5] perf aslr: Strip sample registers 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-05 18:52 ` [PATCH v11 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-05 18:52 ` Ian Rogers 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 18:52 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 46 ++++-- tools/perf/tests/shell/inject_aslr.sh | 55 +++++++ tools/perf/util/aslr.c | 208 +++++++++++++++++++++----- tools/perf/util/aslr.h | 4 + 4 files changed, 263 insertions(+), 50 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 65c7eccccf4d..de315bb334b3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -253,6 +253,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, return -ENOMEM; memcpy(stripped_event, event, event->header.size); stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_user = 0; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_intr = 0; if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) stripped_event->attr.attr.bp_addr = 0; @@ -295,8 +301,13 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; - if (inject->aslr) + if (inject->aslr) { attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (attr.type == PERF_TYPE_BREAKPOINT) + attr.bp_addr = 0; + attr.sample_regs_user = 0; + attr.sample_regs_intr = 0; + } if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2618,6 +2629,9 @@ static int __cmd_inject(struct perf_inject *inject) + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2876,6 +2890,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2894,10 +2920,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2963,17 +2996,6 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); - if (inject.aslr) { - struct evsel *evsel; - - evlist__for_each_entry(inject.session->evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - } - } - guest_session__exit(&inject.guest_session); out_delete: diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index d8ded16ba905..21d306a0ff2f 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -450,6 +450,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -459,6 +513,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup ${err} exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 6d081dfae480..f9c00caf79b7 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,6 +17,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,22 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; u64 orig_last_end; @@ -58,6 +76,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -613,6 +636,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -625,7 +649,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; - + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); delegate = aslr->tool.delegate; @@ -634,7 +661,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -683,25 +726,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -735,7 +778,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -801,7 +844,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -820,7 +863,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -865,19 +908,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -908,39 +957,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -981,10 +1036,20 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; perf_sample__init(&new_sample, /*all=*/ true); ret = evsel__parse_sample(evsel, new_event, &new_sample); if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -993,6 +1058,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1059,6 +1130,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1120,11 +1194,69 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..4c2cffc0e500 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -30,8 +30,12 @@ struct perf_tool; struct evsel; +struct evlist; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-05 18:52 ` [PATCH v11 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-05 19:24 ` Ian Rogers 2026-06-05 19:24 ` [PATCH v12 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:24 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space) and 'orig_last_end' (the end address of the last processed original mapping). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - If the new mapping is contiguous to the previous one in the original address space (start == orig_last_end), we place it contiguously in the remapped space. This is critical to preserve the contiguity of mappings for downstream merging (e.g. symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If not contiguous, we insert a 1-page gap (using page_size) from the previous maximum allocated address to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 79 +- tools/perf/tests/shell/inject_aslr.sh | 519 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1262 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2095 insertions(+), 65 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v12 1/5] perf maps: Add maps__mutate_mapping 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-05 19:24 ` Ian Rogers 2026-06-05 19:24 ` [PATCH v12 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:24 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..b1b8efe42149 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v12 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 19:24 ` [PATCH v12 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-05 19:24 ` Ian Rogers 2026-06-05 19:38 ` sashiko-bot 2026-06-05 19:24 ` [PATCH v12 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:24 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 57 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 689 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 782 insertions(+), 2 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..65c7eccccf4d 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -124,6 +125,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -242,8 +244,25 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; - if (!inject->itrace_synth_opts.set) + if (!inject->itrace_synth_opts.set) { + if (inject->aslr) { + union perf_event *stripped_event = malloc(event->header.size); + int err; + + if (!stripped_event) + return -ENOMEM; + memcpy(stripped_event, event, event->header.size); + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + stripped_event->attr.attr.bp_addr = 0; + + err = perf_event__repipe_synth(tool, stripped_event); + free(stripped_event); + return err; + } return perf_event__repipe_synth(tool, event); + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); @@ -276,6 +295,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2595,6 +2616,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2727,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2736,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2757,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2855,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2923,12 +2963,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..ac24fda658a5 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,689 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; + u64 orig_last_end; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start = 0xffff800010000000; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + + return (size_t)key->machine ^ (size_t)RC_CHK_ACCESS(key->dso) ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + if (start == top->orig_last_end) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + top->orig_last_end = start + len; + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_space_start : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + top_val->orig_last_end = start + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v12 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 19:24 ` [PATCH v12 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 19:38 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 19:38 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Dereferencing a NULL pointer in the hashmap hash function when built with REFCNT_CHECKING. - [Medium] Hardcoded 64-bit kernel address breaks cross-platform analysis for 32-bit architectures. -- commit 0eb1d131e7e278ed75ab636cb567e8964c56e531 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..ac24fda658a57 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) > +{ > + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; > + > + return (size_t)key->machine ^ (size_t)RC_CHK_ACCESS(key->dso) ^ key->invariant ^ key->pid; [Severity: High] Does this code crash when built with REFCNT_CHECKING? In aslr_tool__findnew_mapping(), if thread__find_map() fails, the fallback code explicitly sets remap_key.dso to NULL: } else { remap_key.dso = NULL; remap_key.invariant = start; } When this key is passed to hashmap__find(), the hash function remap_addresses__hash() uses RC_CHK_ACCESS(key->dso). When perf is built with REFCNT_CHECKING (e.g., via Address Sanitizer), RC_CHK_ACCESS(object) expands strictly to object->orig. Since key->dso is NULL, this expands to NULL->orig, resulting in an unconditional segmentation fault when processing unmapped or anonymous memory events. [ ... ] > +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, u64 start, > + u64 len, u64 pgoff) > +{ [ ... ] > + } else { > + /* First address of the process, allocate key and first top address. */ > + struct top_addresses_key *tk; > + struct process_top_address *top_val; > + > + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || > + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? > + kernel_space_start : user_space_start; > + remap_addr = round_up_to_page_size(remap_addr); [Severity: Medium] Will this hardcoded 64-bit kernel address break cross-platform analysis for 32-bit architectures? The code hardcodes kernel_space_start to 0xffff800010000000 and unconditionally applies it to any mapping with a kernel cpumode. If perf inject --aslr is run on a perf.data file collected from a 32-bit machine, this will inject 64-bit virtual addresses into the output stream. Downstream analysis tools expecting 32-bit address spaces based on the ELF e_machine type might encounter out-of-bounds addresses and fail. Should the tool dynamically determine the architecture's address space bounds via struct perf_env instead? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605192425.2523260-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v12 3/5] perf inject/aslr: Implement sample address remapping 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 19:24 ` [PATCH v12 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 19:24 ` [PATCH v12 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 19:24 ` Ian Rogers 2026-06-05 19:24 ` [PATCH v12 4/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:24 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/aslr.c | 449 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 456 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index ac24fda658a5..dece5fb43dd8 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -109,6 +109,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -554,13 +608,400 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; - return delegate->sample(delegate, event, sample, machine); + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v12 4/5] perf test: Add inject ASLR test 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-05 19:24 ` [PATCH v12 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-05 19:24 ` Ian Rogers 2026-06-05 19:24 ` [PATCH v12 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:24 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into . - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 464 ++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..d8ded16ba905 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,464 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v12 5/5] perf aslr: Strip sample registers 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-05 19:24 ` [PATCH v12 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-05 19:24 ` Ian Rogers 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:24 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 46 ++++-- tools/perf/tests/shell/inject_aslr.sh | 55 +++++++ tools/perf/util/aslr.c | 208 +++++++++++++++++++++----- tools/perf/util/aslr.h | 4 + 4 files changed, 263 insertions(+), 50 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 65c7eccccf4d..de315bb334b3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -253,6 +253,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, return -ENOMEM; memcpy(stripped_event, event, event->header.size); stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_user = 0; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_intr = 0; if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) stripped_event->attr.attr.bp_addr = 0; @@ -295,8 +301,13 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; - if (inject->aslr) + if (inject->aslr) { attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (attr.type == PERF_TYPE_BREAKPOINT) + attr.bp_addr = 0; + attr.sample_regs_user = 0; + attr.sample_regs_intr = 0; + } if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2618,6 +2629,9 @@ static int __cmd_inject(struct perf_inject *inject) + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2876,6 +2890,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2894,10 +2920,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2963,17 +2996,6 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); - if (inject.aslr) { - struct evsel *evsel; - - evlist__for_each_entry(inject.session->evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - } - } - guest_session__exit(&inject.guest_session); out_delete: diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index d8ded16ba905..21d306a0ff2f 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -450,6 +450,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -459,6 +513,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup ${err} exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index dece5fb43dd8..14c8589d7453 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,6 +17,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,22 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; u64 orig_last_end; @@ -58,6 +76,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -613,6 +636,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -625,7 +649,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; - + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); delegate = aslr->tool.delegate; @@ -634,7 +661,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -683,25 +726,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -735,7 +778,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -801,7 +844,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -820,7 +863,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -865,19 +908,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -908,39 +957,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -981,10 +1036,20 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; perf_sample__init(&new_sample, /*all=*/ true); ret = evsel__parse_sample(evsel, new_event, &new_sample); if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -993,6 +1058,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1059,6 +1130,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1120,11 +1194,69 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..4c2cffc0e500 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -30,8 +30,12 @@ struct perf_tool; struct evsel; +struct evlist; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-05 19:24 ` [PATCH v12 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-05 19:48 ` Ian Rogers 2026-06-05 19:48 ` [PATCH v13 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space) and 'orig_last_end' (the end address of the last processed original mapping). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - If the new mapping is contiguous to the previous one in the original address space (start == orig_last_end), we place it contiguously in the remapped space. This is critical to preserve the contiguity of mappings for downstream merging (e.g. symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If not contiguous, we insert a 1-page gap (using page_size) from the previous maximum allocated address to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v12: - Patch 2: Fixed potential NULL pointer dereference in remap_addresses__hash() when handling unmapped memory events (key->dso is NULL) under REFCNT_CHECKING. - Patch 2: Dynamically detect machine architecture bitness via perf_env__kernel_is_64_bit() to select appropriate kernel_space_start boundaries, avoiding 64-bit address injection on 32-bit platforms. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 79 +- tools/perf/tests/shell/inject_aslr.sh | 519 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1268 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2101 insertions(+), 65 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v13 1/5] perf maps: Add maps__mutate_mapping 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-05 19:48 ` Ian Rogers 2026-06-05 19:48 ` [PATCH v13 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..b1b8efe42149 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v13 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 19:48 ` [PATCH v13 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-05 19:48 ` Ian Rogers 2026-06-05 20:06 ` sashiko-bot 2026-06-05 19:48 ` [PATCH v13 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 57 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 695 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 788 insertions(+), 2 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..65c7eccccf4d 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -124,6 +125,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -242,8 +244,25 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; - if (!inject->itrace_synth_opts.set) + if (!inject->itrace_synth_opts.set) { + if (inject->aslr) { + union perf_event *stripped_event = malloc(event->header.size); + int err; + + if (!stripped_event) + return -ENOMEM; + memcpy(stripped_event, event, event->header.size); + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + stripped_event->attr.attr.bp_addr = 0; + + err = perf_event__repipe_synth(tool, stripped_event); + free(stripped_event); + return err; + } return perf_event__repipe_synth(tool, event); + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); @@ -276,6 +295,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2595,6 +2616,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2727,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2736,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2757,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2855,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2923,12 +2963,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..5a002dcecb8f --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; + u64 orig_last_end; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start_64 = 0xffff800010000000ULL; +static const u64 kernel_space_start_32 = 0x80000000ULL; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + void *dso_ptr = key->dso ? RC_CHK_ACCESS(key->dso) : NULL; + + return (size_t)key->machine ^ (size_t)dso_ptr ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + if (start == top->orig_last_end) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + top->orig_last_end = start + len; + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + struct machine *machine = remap_key.machine; + struct perf_env *env = machine ? machine->env : NULL; + u64 kernel_start_addr = perf_env__kernel_is_64_bit(env) ? + kernel_space_start_64 : kernel_space_start_32; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_start_addr : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + top_val->orig_last_end = start + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v13 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 19:48 ` [PATCH v13 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 20:06 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 20:06 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] Calling `perf_env__kernel_is_64_bit()` with a NULL `env` pointer in `aslr_tool__findnew_mapping()` causes an immediate crash when processing mmap events. -- commit 8bfb4707677f93e242641f872dc50c16c412d50e Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking This patch adds an --aslr option to perf inject to remap virtual addresses and drop data/events to prevent leaking ASLR information. It implements remapping and tracking for metadata events. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..5a002dcecb8f1 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, u64 start, > + u64 len, u64 pgoff) > +{ [ ... ] > + } else { > + /* First address of the process, allocate key and first top address. */ > + struct top_addresses_key *tk; > + struct process_top_address *top_val; > + struct machine *machine = remap_key.machine; > + struct perf_env *env = machine ? machine->env : NULL; > + u64 kernel_start_addr = perf_env__kernel_is_64_bit(env) ? > + kernel_space_start_64 : kernel_space_start_32; [Severity: Medium] Will this crash if env is NULL? If machine->env is NULL, env is assigned NULL. Passing it to perf_env__kernel_is_64_bit() will unconditionally dereference it: tools/perf/util/env.c:perf_env__kernel_is_64_bit() { if (env->kernel_is_64_bit == -1) perf_env__init_kernel_mode(env); ... } Because machines__init() leaves the host machine's env pointer as NULL, could this cause a segmentation fault when processing the first mapped event? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605194851.2533568-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v13 3/5] perf inject/aslr: Implement sample address remapping 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 19:48 ` [PATCH v13 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 19:48 ` [PATCH v13 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 19:48 ` Ian Rogers 2026-06-05 19:48 ` [PATCH v13 4/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/aslr.c | 449 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 456 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 5a002dcecb8f..c62ae5bcc124 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -111,6 +111,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -560,13 +614,400 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; - return delegate->sample(delegate, event, sample, machine); + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v13 4/5] perf test: Add inject ASLR test 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-05 19:48 ` [PATCH v13 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-05 19:48 ` Ian Rogers 2026-06-05 19:48 ` [PATCH v13 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into . - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 464 ++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..d8ded16ba905 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,464 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v13 5/5] perf aslr: Strip sample registers 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-05 19:48 ` [PATCH v13 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-05 19:48 ` Ian Rogers 2026-06-05 20:04 ` sashiko-bot 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 19:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 46 ++++-- tools/perf/tests/shell/inject_aslr.sh | 55 +++++++ tools/perf/util/aslr.c | 208 +++++++++++++++++++++----- tools/perf/util/aslr.h | 4 + 4 files changed, 263 insertions(+), 50 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 65c7eccccf4d..de315bb334b3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -253,6 +253,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, return -ENOMEM; memcpy(stripped_event, event, event->header.size); stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_user = 0; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_intr = 0; if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) stripped_event->attr.attr.bp_addr = 0; @@ -295,8 +301,13 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; - if (inject->aslr) + if (inject->aslr) { attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (attr.type == PERF_TYPE_BREAKPOINT) + attr.bp_addr = 0; + attr.sample_regs_user = 0; + attr.sample_regs_intr = 0; + } if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2618,6 +2629,9 @@ static int __cmd_inject(struct perf_inject *inject) + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2876,6 +2890,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2894,10 +2920,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2963,17 +2996,6 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); - if (inject.aslr) { - struct evsel *evsel; - - evlist__for_each_entry(inject.session->evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - } - } - guest_session__exit(&inject.guest_session); out_delete: diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index d8ded16ba905..21d306a0ff2f 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -450,6 +450,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -459,6 +513,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup ${err} exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index c62ae5bcc124..19fca84a7405 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,6 +17,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,22 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; u64 orig_last_end; @@ -58,6 +76,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -619,6 +642,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -631,7 +655,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; - + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); delegate = aslr->tool.delegate; @@ -640,7 +667,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -689,25 +732,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -741,7 +784,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -807,7 +850,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -826,7 +869,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -871,19 +914,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -914,39 +963,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -987,10 +1042,20 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; perf_sample__init(&new_sample, /*all=*/ true); ret = evsel__parse_sample(evsel, new_event, &new_sample); if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -999,6 +1064,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1065,6 +1136,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1126,11 +1200,69 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel__reset_sample_bit(evsel, REGS_USER); + evsel__reset_sample_bit(evsel, REGS_INTR); + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..4c2cffc0e500 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -30,8 +30,12 @@ struct perf_tool; struct evsel; +struct evlist; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v13 5/5] perf aslr: Strip sample registers 2026-06-05 19:48 ` [PATCH v13 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-05 20:04 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 20:04 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Low] Useless calls to `evsel__reset_sample_bit` due to prior bitwise AND, masking a potential desynchronization hazard. -- commit 407d38620d5d09a61109d6eb3da76fc9aad40847 Author: Ian Rogers <irogers@google.com> perf aslr: Strip sample registers Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index c62ae5bcc124f..19fca84a7405e 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -1126,11 +1200,69 @@ void aslr_tool__delete(struct perf_tool *tool) [ ... ] > +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) > +{ > + struct evsel *evsel; > + > + evlist__for_each_entry(evlist, evsel) { > + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + evsel__reset_sample_bit(evsel, REGS_USER); > + evsel__reset_sample_bit(evsel, REGS_INTR); [Severity: Low] Are these evsel__reset_sample_bit() calls acting as no-ops? Because sample_type is already bitwise ANDed with ASLR_SUPPORTED_SAMPLE_TYPE just above, the bits are cleared before the helper functions are called. This leaves the internal sample_size state temporarily unsynchronized with the sample_type flags since evsel__reset_sample_bit() will see the bits are already cleared and return silently without decrementing sample_size. > + evsel->core.attr.sample_regs_user = 0; > + evsel->core.attr.sample_regs_intr = 0; > + > + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) > + evsel->core.attr.bp_addr = 0; > + } > +} > + > +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) > +{ > + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); > + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); > + struct evsel *evsel; > + struct aslr_evsel_priv *priv; > + > + evlist__for_each_entry(evlist, evsel) { > + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { > + evsel->core.attr.sample_type = priv->orig_sample_type; [Severity: Low] If sample_size had actually been modified by the reset helpers in aslr_tool__strip_evlist(), would it remain corrupted here? Only the sample_type and registers are restored. If evsel__reset_sample_bit() had worked as expected and decremented sample_size, the sample_size would remain incorrect here when the sample_type flags are put back. > + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; > + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; > + } > + } > +} -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605194851.2533568-1-irogers@google.com?part=5 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-05 19:48 ` [PATCH v13 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-05 20:56 ` Ian Rogers 2026-06-05 20:56 ` [PATCH v14 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 20:56 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space) and 'orig_last_end' (the end address of the last processed original mapping). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - If the new mapping is contiguous to the previous one in the original address space (start == orig_last_end), we place it contiguously in the remapped space. This is critical to preserve the contiguity of mappings for downstream merging (e.g. symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If not contiguous, we insert a 1-page gap (using page_size) from the previous maximum allocated address to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v13: - Patch 2: Added a NULL check for env before calling perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the recorded environment has no headers. - Patch 5: Fixed sample_size and id_pos going out of sync during aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of using evsel__reset_sample_bit(), which was acting as a no-op due to early bit clearing and corrupted sample_size, the tool now directly updates sample_type and recomputes sample_size/id_pos dynamically. Added orig_sample_size to aslr_evsel_priv to correctly restore the state. Changes since v12: - Patch 2: Fixed potential NULL pointer dereference in remap_addresses__hash() when handling unmapped memory events (key->dso is NULL) under REFCNT_CHECKING. - Patch 2: Dynamically detect machine architecture bitness via perf_env__kernel_is_64_bit() to select appropriate kernel_space_start boundaries, avoiding 64-bit address injection on 32-bit platforms. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 79 +- tools/perf/tests/shell/inject_aslr.sh | 519 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1272 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2105 insertions(+), 65 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v14 1/5] perf maps: Add maps__mutate_mapping 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-05 20:56 ` Ian Rogers 2026-06-05 20:56 ` [PATCH v14 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 20:56 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..b1b8efe42149 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v14 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 20:56 ` [PATCH v14 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-05 20:56 ` Ian Rogers 2026-06-05 21:12 ` sashiko-bot 2026-06-05 20:56 ` [PATCH v14 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 20:56 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 57 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 695 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 788 insertions(+), 2 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..65c7eccccf4d 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -124,6 +125,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -242,8 +244,25 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; - if (!inject->itrace_synth_opts.set) + if (!inject->itrace_synth_opts.set) { + if (inject->aslr) { + union perf_event *stripped_event = malloc(event->header.size); + int err; + + if (!stripped_event) + return -ENOMEM; + memcpy(stripped_event, event, event->header.size); + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) + stripped_event->attr.attr.bp_addr = 0; + + err = perf_event__repipe_synth(tool, stripped_event); + free(stripped_event); + return err; + } return perf_event__repipe_synth(tool, event); + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); @@ -276,6 +295,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2595,6 +2616,8 @@ static int __cmd_inject(struct perf_inject *inject) } } + + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2727,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2736,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2757,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2855,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2923,12 +2963,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..4b4b00b4d52d --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,695 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; + u64 orig_last_end; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start_64 = 0xffff800010000000ULL; +static const u64 kernel_space_start_32 = 0x80000000ULL; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + void *dso_ptr = key->dso ? RC_CHK_ACCESS(key->dso) : NULL; + + return (size_t)key->machine ^ (size_t)dso_ptr ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + if (start == top->orig_last_end) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + top->orig_last_end = start + len; + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + struct machine *machine = remap_key.machine; + struct perf_env *env = machine ? machine->env : NULL; + bool is_64 = env ? perf_env__kernel_is_64_bit(env) : (sizeof(void *) == 8); + u64 kernel_start_addr = is_64 ? kernel_space_start_64 : kernel_space_start_32; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_start_addr : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + top_val->orig_last_end = start + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v14 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-05 20:56 ` [PATCH v14 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 21:12 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 21:12 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] The ASLR tool fails to remap the `pgoff` field for user-space anonymous `MMAP` and `MMAP2` events, leaking original randomized virtual addresses. - [High] The tool leaks kernel ASLR addresses through unhandled `kprobe_addr` and `kprobe_func` fields in `perf_event_attr`. -- commit 5f7118d61e85f58390f8e97238a20e6193ea5f8d Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index 75ffe31d03fea..65c7eccccf4d2 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c [ ... ] > @@ -2923,12 +2963,25 @@ int cmd_inject(int argc, const char **argv) > > ret = __cmd_inject(&inject); > > + if (inject.aslr) { > + struct evsel *evsel; > + > + evlist__for_each_entry(inject.session->evlist, evsel) { > + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + > + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) > + evsel->core.attr.bp_addr = 0; [Severity: High] Does this code leak kernel ASLR addresses through unhandled kprobe_addr and kprobe_func fields? For dynamic PMUs like perf_kprobe and perf_uprobe, attr.type is assigned a dynamic identifier rather than a fixed constant like PERF_TYPE_BREAKPOINT. The perf_event_attr struct utilizes the same config1 and config2 union fields to store kprobe_func/uprobe_path and kprobe_addr/probe_offset. If this only checks for PERF_TYPE_BREAKPOINT, it fails to scrub these union fields for kprobe events, which could leak exact kernel ASLR addresses and perf record process pointers into the output perf.data file's attribute headers. > + } > + } > + > guest_session__exit(&inject.guest_session); > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..4b4b00b4d52d9 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static int aslr_tool__process_mmap(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + /* Remaps the mmap.start. */ > + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, > + event->mmap.start, > + event->mmap.len, > + event->mmap.pgoff); > + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) > + new_event->mmap.pgoff = new_event->mmap.start; [Severity: High] Does this fail to remap the pgoff field for user-space anonymous MMAP events, potentially leaking original randomized virtual addresses? The Linux kernel's perf_event_mmap_event() function unconditionally sets pgoff = (u64)vma->vm_pgoff << PAGE_SHIFT for all mappings. For anonymous mappings ([anon], [stack], [heap]), vma->vm_pgoff is initialized as addr >> PAGE_SHIFT, meaning pgoff literally contains the original randomized virtual address. If pgoff is copied verbatim from the original event for user-space mappings, it could directly leak the original ASLR addresses into the output perf.data file. > + err = delegate->mmap(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_mmap2(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + /* Remaps the mmap.start. */ > + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, > + event->mmap2.start, > + event->mmap2.len, > + event->mmap2.pgoff); > + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) > + new_event->mmap2.pgoff = new_event->mmap2.start; [Severity: High] Can this similarly leak original randomized virtual addresses for MMAP2 events? Like with aslr_tool__process_mmap() above, skipping the pgoff remapping for user-space anonymous mappings here leaves the original ASLR addresses unmodified. > + err = delegate->mmap2(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605205649.2566948-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v14 3/5] perf inject/aslr: Implement sample address remapping 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 20:56 ` [PATCH v14 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 20:56 ` [PATCH v14 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-05 20:56 ` Ian Rogers 2026-06-05 23:17 ` sashiko-bot 2026-06-05 20:56 ` [PATCH v14 4/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 20:56 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/aslr.c | 449 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 456 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 4b4b00b4d52d..ebae4617b158 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -111,6 +111,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -560,13 +614,400 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; - return delegate->sample(delegate, event, sample, machine); + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v14 3/5] perf inject/aslr: Implement sample address remapping 2026-06-05 20:56 ` [PATCH v14 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-05 23:17 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 23:17 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] Double-swapping of branch stack flags on cross-endian hosts corrupts output file branch metadata. -- commit dcb8e448dca91e01727a6cd53948bd906dde4043 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Implement sample address remapping This commit adds sample address remapping logic to the ASLR tool. It parses sample events, remaps IPs, addresses, callchains, and branch stacks, and filters out potential leak sources like raw samples. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -560,13 +614,400 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, [ ... ] > + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { [ ... ] > + for (u64 cntr = 0; cntr < nr; cntr++) { > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > + sample->cpumode, > + in_array[i++]); /* from */ > + out_array[j++] = aslr_tool__remap_address(aslr, thread, > + sample->cpumode, > + in_array[i++]); /* to */ > + out_array[j++] = in_array[i++]; /* flags */ > + } [ ... ] > + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); > + > + perf_sample__init(&new_sample, /*all=*/ true); > + ret = evsel__parse_sample(evsel, new_event, &new_sample); [Severity: Medium] On cross-endian hosts, does calling evsel__parse_sample() here corrupt the branch stack flags? The initial pipeline already called evsel__parse_sample() on the original event, which performs an in-place bitfield swap on the branch stack flags to match the host layout when evsel->needs_swap is true. When aslr_tool__process_sample() copies these already-swapped flags into the new event array and then calls evsel__parse_sample() a second time on the new event, won't this trigger the in-place bitfield swap again? This double-swap would revert the flags back to the file endianness layout within a host-endian output file, potentially corrupting branch stack metadata (like mispredicts or cycles) for downstream tools. > + > + if (ret) { > + perf_sample__exit(&new_sample); > + goto out_put; > + } > + > + new_sample.evsel = evsel; > + ret = delegate->sample(delegate, new_event, &new_sample, machine); > + perf_sample__exit(&new_sample); -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605205649.2566948-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v14 4/5] perf test: Add inject ASLR test 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-05 20:56 ` [PATCH v14 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-05 20:56 ` Ian Rogers 2026-06-05 21:05 ` sashiko-bot 2026-06-05 20:56 ` [PATCH v14 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-05 20:56 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into . - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 464 ++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..d8ded16ba905 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,464 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v14 4/5] perf test: Add inject ASLR test 2026-06-05 20:56 ` [PATCH v14 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-05 21:05 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-05 21:05 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] The kernel workload for sampling (`dd if=/dev/zero of=/dev/null bs=1M count=500`) executes too quickly on modern systems, risking 0 kernel samples and causing flaky test failures. -- commit d9af52a5de1d8869d352d74aa99a8ba4f6d3011b Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test This commit adds a new shell test inject_aslr.sh to verify ASLR address remapping. The test validates sample generation and address remapping across basic operations, pipe modes, callchains, and kernel sampling. > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 0000000000000..d8ded16ba9052 > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh > @@ -0,0 +1,464 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# perf inject --aslr test > + > +set -e > +set -o pipefail > + > +shelldir=$(dirname "$0") > +# shellcheck source=lib/perf_has_symbol.sh > +. "${shelldir}"/lib/perf_has_symbol.sh > + > +sym="noploop" > + > +skip_test_missing_symbol ${sym} > + > +# Create global temp directory > +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) > + > +prog="perf test -w noploop" > +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" > +err=0 > +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" [Severity: Medium] Will this workload execute too quickly on modern systems? The command transfers 500MB in 1MB chunks, resulting in only 1000 syscalls which could complete in just a few milliseconds. With perf record defaulting to a ~4000Hz sampling rate, a workload completing this fast might generate very few or exactly 0 task-clock:k samples, potentially causing flaky test failures. Could we replace this with a syscall-bound workload, like using the default 512B block size with a larger count, to ensure consistent kernel execution time irrespective of memory bandwidth? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260605205649.2566948-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v14 5/5] perf aslr: Strip sample registers 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-05 20:56 ` [PATCH v14 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-05 20:56 ` Ian Rogers 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-05 20:56 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 46 ++++-- tools/perf/tests/shell/inject_aslr.sh | 55 +++++++ tools/perf/util/aslr.c | 212 +++++++++++++++++++++----- tools/perf/util/aslr.h | 4 + 4 files changed, 267 insertions(+), 50 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 65c7eccccf4d..de315bb334b3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -253,6 +253,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, return -ENOMEM; memcpy(stripped_event, event, event->header.size); stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_user = 0; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_intr = 0; if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) stripped_event->attr.attr.bp_addr = 0; @@ -295,8 +301,13 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; - if (inject->aslr) + if (inject->aslr) { attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (attr.type == PERF_TYPE_BREAKPOINT) + attr.bp_addr = 0; + attr.sample_regs_user = 0; + attr.sample_regs_intr = 0; + } if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2618,6 +2629,9 @@ static int __cmd_inject(struct perf_inject *inject) + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2876,6 +2890,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2894,10 +2920,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2963,17 +2996,6 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); - if (inject.aslr) { - struct evsel *evsel; - - evlist__for_each_entry(inject.session->evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - } - } - guest_session__exit(&inject.guest_session); out_delete: diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index d8ded16ba905..21d306a0ff2f 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -450,6 +450,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -459,6 +513,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup ${err} exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index ebae4617b158..995159d56ff5 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,6 +17,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,23 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; + int orig_sample_size; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; u64 orig_last_end; @@ -58,6 +77,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -619,6 +643,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -631,7 +656,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; - + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); delegate = aslr->tool.delegate; @@ -640,7 +668,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -689,25 +733,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -741,7 +785,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -807,7 +851,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -826,7 +870,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -871,19 +915,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -914,39 +964,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -987,10 +1043,20 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; perf_sample__init(&new_sample, /*all=*/ true); ret = evsel__parse_sample(evsel, new_event, &new_sample); if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -999,6 +1065,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1065,6 +1137,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1126,11 +1201,72 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + priv->orig_sample_size = evsel->sample_size; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + evsel->sample_size = __evsel__sample_size(evsel->core.attr.sample_type); + evsel__calc_id_pos(evsel); + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + evsel->sample_size = priv->orig_sample_size; + evsel__calc_id_pos(evsel); + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..4c2cffc0e500 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -30,8 +30,12 @@ struct perf_tool; struct evsel; +struct evlist; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-05 20:56 ` [PATCH v14 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-06 7:21 ` Ian Rogers 2026-06-06 7:21 ` [PATCH v15 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-06 7:21 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - We use thread__find_map to look up the mapping immediately preceding the new one in the original address space (at start - 1). If the preceding mapping was also remapped, we place the new mapping contiguously after it in the remapped space. This preserves contiguity of split mappings (e.g., symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If no contiguous mapping is found, we insert a 1-page gap from the highest allocated address (remapped_max) to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v14: - Patch 2: Removed unnecessary vertical whitespace in builtin-inject.c. - Patch 2: Added comments explaining why pgoff is assigned for anonymous memory maps to prevent ASLR leaks. - Patch 2: Removed orig_last_end tracking and refactored contiguous mapping detection to use thread__find_map(..., start - 1, ...) based on Gabriel's feedback. - Patch 2: Scrub kprobe/uprobe event config1 and config2 fields to prevent address leaks. - Patch 2: Overwrite pgoff with the remapped start address for anonymous mappings (detected via is_anon_memory and is_no_dso_memory). - Patch 3: Fix C90 mixed declaration error for orig_needs_swap. - Patch 3: Temporarily disable evsel->needs_swap during the secondary evsel__parse_sample() call to prevent branch stack double-swapping bugs. Changes since v13: - Patch 2: Added a NULL check for env before calling perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the recorded environment has no headers. - Patch 5: Fixed sample_size and id_pos going out of sync during aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of using evsel__reset_sample_bit(), which was acting as a no-op due to early bit clearing and corrupted sample_size, the tool now directly updates sample_type and recomputes sample_size/id_pos dynamically. Added orig_sample_size to aslr_evsel_priv to correctly restore the state. Changes since v12: - Patch 2: Fixed potential NULL pointer dereference in remap_addresses__hash() when handling unmapped memory events (key->dso is NULL) under REFCNT_CHECKING. - Patch 2: Dynamically detect machine architecture bitness via perf_env__kernel_is_64_bit() to select appropriate kernel_space_start boundaries, avoiding 64-bit address injection on 32-bit platforms. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 96 +- tools/perf/tests/shell/inject_aslr.sh | 519 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1299 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2149 insertions(+), 65 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v15 1/5] perf maps: Add maps__mutate_mapping 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-06 7:21 ` Ian Rogers 2026-06-06 7:21 ` [PATCH v15 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-06 7:21 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..b1b8efe42149 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v15 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-06 7:21 ` [PATCH v15 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-06 7:21 ` Ian Rogers 2026-06-06 7:38 ` sashiko-bot 2026-06-06 7:21 ` [PATCH v15 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-06 7:21 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 76 +++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 718 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 829 insertions(+), 3 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..6ac6e6fb3b47 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -24,6 +25,7 @@ #include "util/string2.h" #include "util/symbol.h" #include "util/synthetic-events.h" +#include "util/pmus.h" #include "util/thread.h" #include "util/namespaces.h" #include "util/unwind.h" @@ -124,6 +126,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -242,8 +245,35 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; - if (!inject->itrace_synth_opts.set) + if (!inject->itrace_synth_opts.set) { + if (inject->aslr) { + union perf_event *stripped_event = malloc(event->header.size); + int err; + + if (!stripped_event) + return -ENOMEM; + memcpy(stripped_event, event, event->header.size); + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) { + stripped_event->attr.attr.bp_addr = 0; + } else if (stripped_event->attr.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu; + + pmu = perf_pmus__find_by_type(stripped_event->attr.attr.type); + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + stripped_event->attr.attr.config1 = 0; + stripped_event->attr.attr.config2 = 0; + } + } + + err = perf_event__repipe_synth(tool, stripped_event); + free(stripped_event); + return err; + } return perf_event__repipe_synth(tool, event); + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); @@ -276,6 +306,17 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) { + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe"))) { + attr.config1 = 0; + attr.config2 = 0; + } + } + } if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2594,7 +2635,6 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } - session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2744,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2753,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2774,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2872,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2923,12 +2980,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..16537e0e1bbb --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,718 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start_64 = 0xffff800010000000ULL; +static const u64 kernel_space_start_32 = 0x80000000ULL; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + void *dso_ptr = key->dso ? RC_CHK_ACCESS(key->dso) : NULL; + + return (size_t)key->machine ^ (size_t)dso_ptr ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + struct addr_location prev_al; + bool is_contiguous = false; + + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__end(prev_al.map) == start) + is_contiguous = true; + } + addr_location__exit(&prev_al); + + if (is_contiguous) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + struct machine *machine = remap_key.machine; + struct perf_env *env = machine ? machine->env : NULL; + bool is_64 = env ? perf_env__kernel_is_64_bit(env) : (sizeof(void *) == 8); + u64 kernel_start_addr = is_64 ? kernel_space_start_64 : kernel_space_start_32; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_start_addr : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + *new_remap_val = remap_addr; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + is_anon_memory(event->mmap.filename) || is_no_dso_memory(event->mmap.filename)) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + is_anon_memory(event->mmap2.filename) || is_no_dso_memory(event->mmap2.filename)) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v15 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-06 7:21 ` [PATCH v15 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-06 7:38 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-06 7:38 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Unvalidated writes to `config1` and `config2` in `perf_event__repipe_attr` cause a heap buffer overflow for older ABI events. - [High] Asymmetric computation for kernel mappings without an associated DSO causes massive address overflows on lookup. -- commit 493cc968b1147b3f20e3102e4ae64844207831a1 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking This patch adds an ASLR tool to perf inject, allowing it to remap virtual addresses or drop sensitive data to prevent leaking a machine's ASLR layout when sharing perf.data files. It implements core ASLR remapping and tracking for metadata events like MMAP, FORK, and KSYMBOL. Sample events are currently delegated without remapping. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index 75ffe31d03fea..6ac6e6fb3b47e 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c [ ... ] > @@ -242,8 +245,35 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, > if (!inject->output.is_pipe) > return 0; > > - if (!inject->itrace_synth_opts.set) > + if (!inject->itrace_synth_opts.set) { > + if (inject->aslr) { > + union perf_event *stripped_event = malloc(event->header.size); > + int err; > + > + if (!stripped_event) > + return -ENOMEM; > + memcpy(stripped_event, event, event->header.size); > + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + > + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) { > + stripped_event->attr.attr.bp_addr = 0; > + } else if (stripped_event->attr.attr.type >= PERF_TYPE_MAX) { > + struct perf_pmu *pmu; > + > + pmu = perf_pmus__find_by_type(stripped_event->attr.attr.type); > + if (pmu && (!strcmp(pmu->name, "kprobe") || > + !strcmp(pmu->name, "uprobe"))) { > + stripped_event->attr.attr.config1 = 0; > + stripped_event->attr.attr.config2 = 0; > + } > + } > + > + err = perf_event__repipe_synth(tool, stripped_event); > + free(stripped_event); > + return err; > + } [Severity: High] Does this code risk a heap buffer overflow for older ABI events? In perf_event__repipe_attr(), the stripped_event buffer is allocated with event->header.size. For older ABI0 perf.data files, this size can be as small as 72 bytes. The code here unconditionally writes to bp_addr (at offset 88), config1 (offset 88), and config2 (offset 96) for relevant events, without verifying if event->header.size is large enough to contain these fields. Could this write up to 32 bytes past the end of the stripped_event heap allocation? [ ... ] > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..16537e0e1bbb4 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -0,0 +1,718 @@ [ ... ] > +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, u64 start, > + u64 len, u64 pgoff) > +{ [ ... ] > + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { > + /* Mmap already exists. */ > + u64 calculated_max; > + > + if (al.map) { > + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + > + (start - map__start(al.map)); > + } else { > + remap_addr = *remapped_invariant_ptr + pgoff; > + } [Severity: High] Does this lookup path correctly handle the invariant when al.map is NULL? If al.map is NULL, remap_addr is computed as *remapped_invariant_ptr + pgoff. [ ... ] > + /* Create rmeapping entry. */ > + new_remap_key = malloc(sizeof(*new_remap_key)); > + new_remap_val = malloc(sizeof(u64)); > + if (!new_remap_key || !new_remap_val) { > + err = -ENOMEM; > + } else { > + *new_remap_key = remap_key; > + new_remap_key->dso = dso__get(remap_key.dso); > + if (cpumode == PERF_RECORD_MISC_KERNEL || > + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { > + if (al.map) { > + *new_remap_val = remap_addr - > + (start - map__start(al.map)) - > + map__pgoff(al.map); > + } else { > + *new_remap_val = remap_addr; > + } [Severity: High] Is it possible this logic creates an asymmetric computation for kernel mappings without an associated DSO? When al.map is NULL during insert, this stores remap_addr directly into *new_remap_val without subtracting pgoff. However, as seen in the lookup path above, it unconditionally adds pgoff to the cached value when al.map is NULL. For anonymous kernel maps, the kernel sets pgoff to the original virtual address in bytes. This asymmetric addition seems like it would cause a massive overflow by adding a 64-bit kernel address to the already high remap_addr, drastically corrupting the returned remapped address on lookups. -- Sashiko AI review · https://sashiko.dev/#/patchset/20260606072125.2786845-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v15 3/5] perf inject/aslr: Implement sample address remapping 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-06 7:21 ` [PATCH v15 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-06 7:21 ` [PATCH v15 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-06 7:21 ` Ian Rogers 2026-06-06 7:36 ` sashiko-bot 2026-06-06 7:21 ` [PATCH v15 4/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-06 7:21 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/aslr.c | 454 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 461 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 16537e0e1bbb..7bd646a5c2fb 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -110,6 +110,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -583,13 +637,405 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + bool orig_needs_swap; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + orig_needs_swap = evsel->needs_swap; - return delegate->sample(delegate, event, sample, machine); + evsel->needs_swap = false; + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + evsel->needs_swap = orig_needs_swap; + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v15 3/5] perf inject/aslr: Implement sample address remapping 2026-06-06 7:21 ` [PATCH v15 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-06 7:36 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-06 7:36 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] Silent Drop of Entire Sample for recordings with branch counters -- commit 626067d9ffedea6b6e512a71e2a2cb18442024b3 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Implement sample address remapping This commit adds sample address remapping logic to the ASLR tool. It parses sample events and remaps IPs, addresses, callchains, and branch stacks while dropping potentially leaking information like raw and stack samples. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -583,13 +637,405 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, [ ... ] > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { > + if (nr > max_i - i || nr > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > + i += nr; > + j += nr; > + /* TODO: confirm branch counters don't leak ASLR information. */ > + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); > + ret = 0; > + goto out_put; > + } [Severity: High] Does this code unintentionally drop the entire sample? The code copies the branch counters into the new array but then immediately executes ret = 0; goto out_put; within aslr_tool__process_sample(). This aborts processing of the sample entirely and skips delegating the sample to the output, dropping it completely. Since branch_sample_type is an attribute of the event, won't this result in all samples being silently dropped, leading to an empty output file for any recording that includes branch counters? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260606072125.2786845-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v15 4/5] perf test: Add inject ASLR test 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-06 7:21 ` [PATCH v15 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-06 7:21 ` Ian Rogers 2026-06-06 7:31 ` sashiko-bot 2026-06-06 7:21 ` [PATCH v15 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-06 7:21 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into . - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 464 ++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..d8ded16ba905 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,464 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v15 4/5] perf test: Add inject ASLR test 2026-06-06 7:21 ` [PATCH v15 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-06 7:31 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-06 7:31 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] Missing unresolved symbol address filtering in user-space ASLR tests causes spurious test failures. -- commit 0ac54fddb12723dd235fe2bd67c473f8ec3fe19a Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test Adds a new shell test to verify ASLR injection, covering basic address remapping, pipe mode, callchains, and kernel address remapping. > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 0000000000000..d8ded16ba9052 > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh [ ... ] > +test_report_aslr() { > + echo "Test perf report consistency" [ ... ] > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true [Severity: Medium] Does this code need to filter out unresolved symbol addresses? Because --aslr intentionally shifts mappings, unresolved symbols (which perf report displays as raw hexadecimal addresses) will differ between the two reports. Can this cause the diff to fail and the test to flake if perf record catches samples in the dynamic loader or other unmapped regions? I noticed test_kernel_report_aslr() avoids this by filtering out hex addresses with grep -v -E '0x[0-9a-f]{8,}'. > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true [ ... ] > +test_pipe_report_aslr() { > + echo "Test pipe mode perf report consistency" [ ... ] > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true [Severity: Medium] Will this comparison also suffer from the same unresolved symbol flakiness described in test_report_aslr() above? > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true [ ... ] > +test_pipe_out_report_aslr() { > + echo "Test pipe output mode perf report consistency" [ ... ] > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true [Severity: Medium] Could this filter also fail to strip raw hex addresses for unresolved symbols and cause similar test flakes? > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true -- Sashiko AI review · https://sashiko.dev/#/patchset/20260606072125.2786845-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v15 5/5] perf aslr: Strip sample registers 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-06 7:21 ` [PATCH v15 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-06 7:21 ` Ian Rogers 2026-06-06 7:38 ` sashiko-bot 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-06 7:21 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 46 ++++-- tools/perf/tests/shell/inject_aslr.sh | 55 +++++++ tools/perf/util/aslr.c | 213 +++++++++++++++++++++----- tools/perf/util/aslr.h | 4 + 4 files changed, 267 insertions(+), 51 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6ac6e6fb3b47..96b90af8264c 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -254,6 +254,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, return -ENOMEM; memcpy(stripped_event, event, event->header.size); stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_user = 0; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_intr = 0; if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) { stripped_event->attr.attr.bp_addr = 0; @@ -308,7 +314,9 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.sample_type &= ~PERF_SAMPLE_AUX; if (inject->aslr) { attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - if (attr.type >= PERF_TYPE_MAX) { + if (attr.type == PERF_TYPE_BREAKPOINT) { + attr.bp_addr = 0; + } else if (attr.type >= PERF_TYPE_MAX) { struct perf_pmu *pmu = perf_pmus__find_by_type(attr.type); if (pmu && (!strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe"))) { @@ -316,6 +324,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.config2 = 0; } } + attr.sample_regs_user = 0; + attr.sample_regs_intr = 0; } if (inject->itrace_synth_opts.add_last_branch) { @@ -2635,6 +2645,10 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } + + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2893,6 +2907,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2911,10 +2937,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2980,17 +3013,6 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); - if (inject.aslr) { - struct evsel *evsel; - - evlist__for_each_entry(inject.session->evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - } - } - guest_session__exit(&inject.guest_session); out_delete: diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index d8ded16ba905..21d306a0ff2f 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -450,6 +450,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -459,6 +513,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup ${err} exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 7bd646a5c2fb..99a6d7318174 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -16,6 +17,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,23 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; + int orig_sample_size; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; }; @@ -57,6 +76,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -642,6 +666,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -654,6 +679,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; bool orig_needs_swap; del_tool = container_of(tool, struct delegate_tool, tool); @@ -664,7 +693,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -713,25 +758,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -765,7 +810,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -831,7 +876,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -850,7 +895,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -895,19 +940,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -938,39 +989,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -1010,15 +1067,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); - + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; orig_needs_swap = evsel->needs_swap; - evsel->needs_swap = false; perf_sample__init(&new_sample, /*all=*/ true); ret = evsel__parse_sample(evsel, new_event, &new_sample); evsel->needs_swap = orig_needs_swap; if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -1027,6 +1092,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1093,6 +1164,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1154,11 +1228,72 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + priv->orig_sample_size = evsel->sample_size; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + evsel->sample_size = __evsel__sample_size(evsel->core.attr.sample_type); + evsel__calc_id_pos(evsel); + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + evsel->sample_size = priv->orig_sample_size; + evsel__calc_id_pos(evsel); + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..4c2cffc0e500 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -30,8 +30,12 @@ struct perf_tool; struct evsel; +struct evlist; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v15 5/5] perf aslr: Strip sample registers 2026-06-06 7:21 ` [PATCH v15 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-06 7:38 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-06 7:38 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: Pre-existing issues: - [High] Pre-existing out-of-bounds heap write of 8 bytes when zeroing `config2` in `perf_event__repipe_attr` for small `perf_event_attr` sizes. - [High] Pre-existing ASLR leak via incomplete attribute stripping in `aslr_tool__strip_evlist`. -- commit 5ac431e4dd0a910e235e94ecc9652b8c0f318e54 Author: Ian Rogers <irogers@google.com> Subject: perf aslr: Strip sample registers This commit refactors the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. It maintains the original perf_event_attr values in struct aslr_evsel_priv to keep samples that would otherwise be dropped. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index 6ac6e6fb3b47e..96b90af8264c6 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c > @@ -254,6 +254,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, > return -ENOMEM; > memcpy(stripped_event, event, event->header.size); > stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + if (stripped_event->attr.attr.size >= > + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) > + stripped_event->attr.attr.sample_regs_user = 0; > + if (stripped_event->attr.attr.size >= > + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) > + stripped_event->attr.attr.sample_regs_intr = 0; > > if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT) { > stripped_event->attr.attr.bp_addr = 0; [Severity: High] This isn't a bug introduced by this patch, but can this lead to an out-of-bounds heap write? In pipe mode, stripped_event is dynamically allocated based on event->header.size. For older perf formats or dynamically sized events (like PERF_ATTR_SIZE_VER0 which is 64 bytes), the total event size may be just 72 bytes. Immediately following this block, the code does: } else if (stripped_event->attr.attr.type >= PERF_TYPE_MAX) { ... if (pmu && (!strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe"))) { stripped_event->attr.attr.config1 = 0; stripped_event->attr.attr.config2 = 0; } } If the event type matches a kprobe or uprobe PMU, the code zeroes config2 unconditionally. Because config2 starts at offset 64 within the attr structure (offset 72 in the perf_event), this results in an 8-byte out-of-bounds write of zeros on the heap. Could a size check be added similar to the ones just introduced for the sample registers? > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 7bd646a5c2fb1..99a6d73181746 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) > +{ > + struct evsel *evsel; > + > + evlist__for_each_entry(evlist, evsel) { > + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + evsel->core.attr.sample_regs_user = 0; > + evsel->core.attr.sample_regs_intr = 0; > + evsel->sample_size = __evsel__sample_size(evsel->core.attr.sample_type); > + evsel__calc_id_pos(evsel); > + > + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) > + evsel->core.attr.bp_addr = 0; [Severity: High] This is a pre-existing issue, but does this function miss zeroing config1 and config2 for kprobes and uprobes? While perf_event__repipe_attr() attempts to handle this for pipe mode to prevent ASLR leaks, it looks like aslr_tool__strip_evlist(), which is used for non-pipe mode, entirely omits stripping these attributes. Could this cause the unstripped config1 and config2 attributes to be written into the file header, potentially leaking precise probe addresses and defeating the ASLR mitigation? > + } > +} -- Sashiko AI review · https://sashiko.dev/#/patchset/20260606072125.2786845-1-irogers@google.com?part=5 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-06 7:21 ` [PATCH v15 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-06 15:14 ` Ian Rogers 2026-06-06 15:14 ` [PATCH v16 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-06 15:14 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - We use thread__find_map to look up the mapping immediately preceding the new one in the original address space (at start - 1). If the preceding mapping was also remapped, we place the new mapping contiguously after it in the remapped space. This preserves contiguity of split mappings (e.g., symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If no contiguous mapping is found, we insert a 1-page gap from the highest allocated address (remapped_max) to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v15: - Patch 2: Added bounds checking for event->header.size before writing to breakpoint fields to avoid heap buffer overflow on older ABI events. - Patch 2: Fixed asymmetric calculation bug in aslr_tool__findnew_mapping() where pgoff for anonymous kernel memory was not properly subtracted upon insertion, causing the lookup addition to overflow. - Patch 2: Added detailed comments documenting the symmetric lookup and insertion math for unmapped and mapped memory blocks. - Patch 5: Add missing kprobe and uprobe scrubbing of config1 and config2 during aslr_tool__strip_evlist() to strictly conform with repipe constraints. Changes since v14: - Patch 2: Removed unnecessary vertical whitespace in builtin-inject.c. - Patch 2: Added comments explaining why pgoff is assigned for anonymous memory maps to prevent ASLR leaks. - Patch 2: Removed orig_last_end tracking and refactored contiguous mapping detection to use thread__find_map(..., start - 1, ...) based on Gabriel's feedback. - Patch 2: Scrub kprobe/uprobe event config1 and config2 fields to prevent address leaks. - Patch 2: Overwrite pgoff with the remapped start address for anonymous mappings (detected via is_anon_memory and is_no_dso_memory). - Patch 3: Fix C90 mixed declaration error for orig_needs_swap. - Patch 3: Temporarily disable evsel->needs_swap during the secondary evsel__parse_sample() call to prevent branch stack double-swapping bugs. Changes since v13: - Patch 2: Added a NULL check for env before calling perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the recorded environment has no headers. - Patch 5: Fixed sample_size and id_pos going out of sync during aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of using evsel__reset_sample_bit(), which was acting as a no-op due to early bit clearing and corrupted sample_size, the tool now directly updates sample_type and recomputes sample_size/id_pos dynamically. Added orig_sample_size to aslr_evsel_priv to correctly restore the state. Changes since v12: - Patch 2: Fixed potential NULL pointer dereference in remap_addresses__hash() when handling unmapped memory events (key->dso is NULL) under REFCNT_CHECKING. - Patch 2: Dynamically detect machine architecture bitness via perf_env__kernel_is_64_bit() to select appropriate kernel_space_start boundaries, avoiding 64-bit address injection on 32-bit platforms. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf test: Add inject ASLR test perf aslr: Strip sample registers tools/perf/builtin-inject.c | 104 +- tools/perf/tests/shell/inject_aslr.sh | 519 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1322 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2180 insertions(+), 65 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v16 1/5] perf maps: Add maps__mutate_mapping 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-06 15:14 ` Ian Rogers 2026-06-06 15:14 ` [PATCH v16 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-06 15:14 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..b1b8efe42149 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v16 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-06 15:14 ` [PATCH v16 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-06 15:14 ` Ian Rogers 2026-06-06 15:31 ` sashiko-bot 2026-06-06 15:14 ` [PATCH v16 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-06 15:14 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> --- tools/perf/builtin-inject.c | 84 ++++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 732 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 37 ++ 4 files changed, 851 insertions(+), 3 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..00a54d1c7e41 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -24,6 +25,7 @@ #include "util/string2.h" #include "util/symbol.h" #include "util/synthetic-events.h" +#include "util/pmus.h" #include "util/thread.h" #include "util/namespaces.h" #include "util/unwind.h" @@ -124,6 +126,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -242,8 +245,43 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!inject->output.is_pipe) return 0; - if (!inject->itrace_synth_opts.set) + if (!inject->itrace_synth_opts.set) { + if (inject->aslr) { + union perf_event *stripped_event = malloc(event->header.size); + int err; + + if (!stripped_event) + return -ENOMEM; + memcpy(stripped_event, event, event->header.size); + stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT && + event->header.size >= (offsetof(struct perf_record_header_attr, + attr.bp_addr) + sizeof(u64))) { + stripped_event->attr.attr.bp_addr = 0; + } else if (stripped_event->attr.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu; + + pmu = perf_pmus__find_by_type(stripped_event->attr.attr.type); + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + if (event->header.size >= + (offsetof(struct perf_record_header_attr, + attr.config1) + sizeof(u64))) + stripped_event->attr.attr.config1 = 0; + if (event->header.size >= + (offsetof(struct perf_record_header_attr, + attr.config2) + sizeof(u64))) + stripped_event->attr.attr.config2 = 0; + } + } + + err = perf_event__repipe_synth(tool, stripped_event); + free(stripped_event); + return err; + } return perf_event__repipe_synth(tool, event); + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); @@ -276,6 +314,17 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) { + attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe"))) { + attr.config1 = 0; + attr.config2 = 0; + } + } + } if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2594,7 +2643,6 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } - session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2752,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2761,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2782,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2880,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2923,12 +2988,25 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + } + } + guest_session__exit(&inject.guest_session); out_delete: strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..084158014dc7 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,732 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start_64 = 0xffff800010000000ULL; +static const u64 kernel_space_start_32 = 0x80000000ULL; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + void *dso_ptr = key->dso ? RC_CHK_ACCESS(key->dso) : NULL; + + return (size_t)key->machine ^ (size_t)dso_ptr ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + /* + * The cached value is the base of the invariant. We add the + * offset into the VMA (start - map__start), plus the map's + * pgoff, to get the precise virtual address within this chunk. + */ + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + /* + * For unmapped memory (e.g. kernel anonymous), the cached value + * was stored offset by pgoff. Adding pgoff yields the true remap_addr. + */ + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + struct addr_location prev_al; + bool is_contiguous = false; + + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__end(prev_al.map) == start) + is_contiguous = true; + } + addr_location__exit(&prev_al); + + if (is_contiguous) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + struct machine *machine = remap_key.machine; + struct perf_env *env = machine ? machine->env : NULL; + bool is_64 = env ? perf_env__kernel_is_64_bit(env) : (sizeof(void *) == 8); + u64 kernel_start_addr = is_64 ? kernel_space_start_64 : kernel_space_start_32; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_start_addr : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + /* + * Subtract pgoff from the base virtual address so that + * when the lookup path adds pgoff back, it perfectly + * cancels out and returns remap_addr. + */ + *new_remap_val = remap_addr - pgoff; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + is_anon_memory(event->mmap.filename) || is_no_dso_memory(event->mmap.filename)) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + is_anon_memory(event->mmap2.filename) || is_no_dso_memory(event->mmap2.filename)) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..a9b90bf29540 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,37 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v16 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-06 15:14 ` [PATCH v16 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-06 15:31 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-06 15:31 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] The attribute size check incorrectly uses `event->header.size` instead of `attr.size`, potentially overwriting the `id` array. - [Medium] The ASLR tool's isolated `machines` collection lacks a `perf_env` pointer, forcing kernel space mapping to silently fall back to the host's compilation architecture bitness. -- commit 77ff517ae16085496a64df04678110167f79e9c4 Author: Ian Rogers <irogers@google.com> Subject: perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events. It adds an aslr option for perf inject that remaps all virtual addresses to prevent virtual address information leakage when sharing perf.data files. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c [ ... ] > @@ -242,8 +245,43 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, [ ... ] > + if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT && > + event->header.size >= (offsetof(struct perf_record_header_attr, > + attr.bp_addr) + sizeof(u64))) { > + stripped_event->attr.attr.bp_addr = 0; > + } else if (stripped_event->attr.attr.type >= PERF_TYPE_MAX) { > + struct perf_pmu *pmu; > + > + pmu = perf_pmus__find_by_type(stripped_event->attr.attr.type); > + if (pmu && (!strcmp(pmu->name, "kprobe") || > + !strcmp(pmu->name, "uprobe"))) { > + if (event->header.size >= > + (offsetof(struct perf_record_header_attr, > + attr.config1) + sizeof(u64))) > + stripped_event->attr.attr.config1 = 0; > + if (event->header.size >= > + (offsetof(struct perf_record_header_attr, > + attr.config2) + sizeof(u64))) > + stripped_event->attr.attr.config2 = 0; > + } > + } [Severity: High] Does this incorrectly check event->header.size instead of the actual attr.size? If an older perf.data file is processed with a shorter ABI struct (such as the 64-byte PERF_ATTR_SIZE_VER0) and contains appended IDs, this boundary check would pass because the total event size exceeds the offset. Could this cause the assignment to config1 or config2 to overwrite the leading elements of the appended id[] array with zeroes, permanently corrupting the event ID stream? > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) > +{ > + delegate_tool__init(&aslr->tool, delegate); > + aslr->tool.tool.ordered_events = true; > + > + machines__init(&aslr->machines); [ ... ] > +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, [ ... ] > + } else { > + /* First address of the process, allocate key and first top address. */ > + struct top_addresses_key *tk; > + struct process_top_address *top_val; > + struct machine *machine = remap_key.machine; > + struct perf_env *env = machine ? machine->env : NULL; > + bool is_64 = env ? perf_env__kernel_is_64_bit(env) : (sizeof(void *) == 8); [Severity: Medium] Because machines__init() creates an isolated collection of machines without linking its host environment to the primary session's environment, will env always resolve to NULL here? If env is NULL, this falls back to the executing host's pointer size (sizeof(void *) == 8) rather than the recorded target architecture. Will this break cross-platform analysis when analyzing a 32-bit perf.data file on a 64-bit host? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260606151451.2913860-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v16 3/5] perf inject/aslr: Implement sample address remapping 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-06 15:14 ` [PATCH v16 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-06 15:14 ` [PATCH v16 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-06 15:14 ` Ian Rogers 2026-06-06 15:30 ` sashiko-bot 2026-06-06 15:14 ` [PATCH v16 4/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-06 15:14 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/util/aslr.c | 454 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 461 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 084158014dc7..7afa5a0dac2f 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -110,6 +110,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -597,13 +651,405 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + bool orig_needs_swap; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + nr = out_array[j++]; + i++; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* from */ + out_array[j++] = aslr_tool__remap_address(aslr, thread, + sample->cpumode, + in_array[i++]); /* to */ + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + orig_needs_swap = evsel->needs_swap; - return delegate->sample(delegate, event, sample, machine); + evsel->needs_swap = false; + perf_sample__init(&new_sample, /*all=*/ true); + ret = evsel__parse_sample(evsel, new_event, &new_sample); + evsel->needs_swap = orig_needs_swap; + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v16 3/5] perf inject/aslr: Implement sample address remapping 2026-06-06 15:14 ` [PATCH v16 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-06 15:30 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-06 15:30 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] The tool modifies the shared `evsel->needs_swap` flag without locking and bypasses necessary 32-bit field unswapping, corrupting cross-endian sample parsing. -- commit 3f98dbfbab405b30a77c04881137ca3c9d4c8fa6 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Implement sample address remapping This commit adds the sample address remapping logic to the ASLR tool. It parses sample events, remaps addresses and IPs, and filters out potentially leaking raw samples. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 084158014dc73..7afa5a0dac2f3 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -597,13 +651,405 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, [ ... ] > +#define COPY_U64() \ > + do { \ > + if (CHECK_BOUNDS(1, 1)) { \ > + ret = -EFAULT; \ > + goto out_put; \ > + } \ > + out_array[j++] = in_array[i++]; \ > + } while (0) [ ... ] > + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); > + > + orig_needs_swap = evsel->needs_swap; > > - return delegate->sample(delegate, event, sample, machine); > + evsel->needs_swap = false; > + perf_sample__init(&new_sample, /*all=*/ true); > + ret = evsel__parse_sample(evsel, new_event, &new_sample); > + evsel->needs_swap = orig_needs_swap; [Severity: High] Does temporarily setting evsel->needs_swap to false here break cross-endian parsing and introduce a data race? Since new_event is built using COPY_U64(), it directly copies the 64-bit swapped words from the original event. If we force needs_swap to false when parsing new_event, won't that bypass the necessary unswapping of packed 32-bit fields like pid and tid, resulting in corrupted data for cross-endian files? Also, mutating the shared evsel->needs_swap flag without locking could cause data races with other threads processing samples. Since this patch introduces __evsel__parse_sample() specifically to take a needs_swap parameter, should we be using that helper here instead of mutating the evsel? And should we be passing the original orig_needs_swap value to it so that the packed fields are unswapped correctly? > + > + if (ret) { > + perf_sample__exit(&new_sample); > + goto out_put; > + } -- Sashiko AI review · https://sashiko.dev/#/patchset/20260606151451.2913860-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v16 4/5] perf test: Add inject ASLR test 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-06 15:14 ` [PATCH v16 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-06 15:14 ` Ian Rogers 2026-06-06 15:14 ` [PATCH v16 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-06 15:14 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into . - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Assisted-by: Antigravity:gemini-3.5-flash Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/tests/shell/inject_aslr.sh | 464 ++++++++++++++++++++++++++ 1 file changed, 464 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..d8ded16ba905 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,464 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v16 5/5] perf aslr: Strip sample registers 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-06 15:14 ` [PATCH v16 4/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-06 15:14 ` Ian Rogers 2026-06-06 15:33 ` sashiko-bot 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-06 15:14 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Refactor the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> --- tools/perf/builtin-inject.c | 46 ++++-- tools/perf/tests/shell/inject_aslr.sh | 55 +++++++ tools/perf/util/aslr.c | 224 +++++++++++++++++++++----- tools/perf/util/aslr.h | 4 + 4 files changed, 277 insertions(+), 52 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 00a54d1c7e41..c852ade3c4e3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -254,6 +254,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, return -ENOMEM; memcpy(stripped_event, event, event->header.size); stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_user = 0; + if (stripped_event->attr.attr.size >= + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + stripped_event->attr.attr.sample_regs_intr = 0; if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT && event->header.size >= (offsetof(struct perf_record_header_attr, @@ -316,7 +322,9 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.sample_type &= ~PERF_SAMPLE_AUX; if (inject->aslr) { attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - if (attr.type >= PERF_TYPE_MAX) { + if (attr.type == PERF_TYPE_BREAKPOINT) { + attr.bp_addr = 0; + } else if (attr.type >= PERF_TYPE_MAX) { struct perf_pmu *pmu = perf_pmus__find_by_type(attr.type); if (pmu && (!strcmp(pmu->name, "kprobe") || !strcmp(pmu->name, "uprobe"))) { @@ -324,6 +332,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.config2 = 0; } } + attr.sample_regs_user = 0; + attr.sample_regs_intr = 0; } if (inject->itrace_synth_opts.add_last_branch) { @@ -2643,6 +2653,10 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } + + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2901,6 +2915,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2919,10 +2945,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2988,17 +3021,6 @@ int cmd_inject(int argc, const char **argv) ret = __cmd_inject(&inject); - if (inject.aslr) { - struct evsel *evsel; - - evlist__for_each_entry(inject.session->evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - } - } - guest_session__exit(&inject.guest_session); out_delete: diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh index d8ded16ba905..21d306a0ff2f 100755 --- a/tools/perf/tests/shell/inject_aslr.sh +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -450,6 +450,60 @@ test_kernel_report_aslr() { fi } +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + test_basic_aslr test_pipe_aslr test_callchain_aslr @@ -459,6 +513,7 @@ test_pipe_out_report_aslr test_dropped_samples test_kernel_aslr test_kernel_report_aslr +test_regs_stripping cleanup ${err} exit $err diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 7afa5a0dac2f..bac41bff1b2a 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -5,6 +5,7 @@ #include "debug.h" #include "event.h" #include "evsel.h" +#include "evlist.h" #include "machine.h" #include "map.h" #include "thread.h" @@ -12,10 +13,11 @@ #include "session.h" #include "data.h" #include "dso.h" - +#include "pmus.h" #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> @@ -43,6 +45,23 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; + int orig_sample_size; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; }; @@ -57,6 +76,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -656,6 +680,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -668,6 +693,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; bool orig_needs_swap; del_tool = container_of(tool, struct delegate_tool, tool); @@ -678,7 +707,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -727,25 +772,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -779,7 +824,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -845,7 +890,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = aslr_tool__remap_address(aslr, thread, cpumode, addr); } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -864,7 +909,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -909,19 +954,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -952,39 +1003,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -1024,15 +1081,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); - + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; orig_needs_swap = evsel->needs_swap; - evsel->needs_swap = false; perf_sample__init(&new_sample, /*all=*/ true); ret = evsel__parse_sample(evsel, new_event, &new_sample); evsel->needs_swap = orig_needs_swap; if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -1041,6 +1106,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1107,6 +1178,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1168,11 +1242,81 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + priv->orig_sample_size = evsel->sample_size; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + evsel->sample_size = __evsel__sample_size(evsel->core.attr.sample_type); + evsel__calc_id_pos(evsel); + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) { + evsel->core.attr.bp_addr = 0; + } else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + evsel->sample_size = priv->orig_sample_size; + evsel__calc_id_pos(evsel); + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index a9b90bf29540..4c2cffc0e500 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -30,8 +30,12 @@ struct perf_tool; struct evsel; +struct evlist; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); void aslr_tool__delete(struct perf_tool *aslr); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v16 5/5] perf aslr: Strip sample registers 2026-06-06 15:14 ` [PATCH v16 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-06 15:33 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-06 15:33 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: New issues: - [High] Incorrect size bounds check for config1 and config2 overwrites the trailing id[] array in older (ABI0/VER0) events. - [High] Failure to mask unsupported sample_type bits leads to parsing failures on newer perf.data formats. Pre-existing issues: - [High] Cross-endian `perf.data` files are corrupted during ASLR injection due to mixed endianness in the synthesized `new_event` payload. -- commit 7fad8822f4c998bf1af1ae27d27ca2449d9ecea7 Author: Ian Rogers <irogers@google.com> perf aslr: Strip sample registers This patch refactors the ASLR tool to strip out only the register dump payload by masking out the relevant perf_event_attr fields when the delegated tool is handling the data. It maintains the original perf_event_attr values via an aslr_evsel_priv struct looked up in the evsel_orig_attrs hashmap. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index 00a54d1c7e41f..c852ade3c4e37 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c > @@ -254,6 +254,12 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, > return -ENOMEM; > memcpy(stripped_event, event, event->header.size); > stripped_event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + if (stripped_event->attr.attr.size >= > + (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) > + stripped_event->attr.attr.sample_regs_user = 0; > + if (stripped_event->attr.attr.size >= > + (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) > + stripped_event->attr.attr.sample_regs_intr = 0; > > if (stripped_event->attr.attr.type == PERF_TYPE_BREAKPOINT && > event->header.size >= (offsetof(struct perf_record_header_attr, [Severity: High] Does this bounds check for config1 and config2 overwrite the trailing id[] array in older (ABI0/VER0) events? In perf_event__repipe_attr(), when clearing config1 and config2 for kprobe/uprobe events, the size check validates against event->header.size rather than stripped_event->attr.attr.size. In older perf records, config2 is not part of the perf_event_attr payload, and the id[] array begins exactly at offset 64. However, event->header.size includes the size of the appended id[] array. If an event has a 64-byte attr.size and an appended id[] array, the validation will incorrectly pass and the code will write 0 to config2 (offset 72), overwriting id[0]. Can this corrupt the unique event ID and break event demuxing for downstream readers? [ ... ] > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 7afa5a0dac2f3..bac41bff1b2a3 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -668,6 +693,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, > u64 addr; > size_t i; > size_t j; > + struct aslr_evsel_priv *priv = NULL; > + u64 orig_sample_type; > + u64 orig_regs_user; > + u64 orig_regs_intr; > bool orig_needs_swap; > > del_tool = container_of(tool, struct delegate_tool, tool); > @@ -678,7 +707,23 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, > return delegate->sample(delegate, event, sample, machine); > > ret = -EFAULT; > - sample_type = evsel->core.attr.sample_type; > + > + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { > + orig_sample_type = priv->orig_sample_type; > + orig_regs_user = priv->orig_sample_regs_user; > + orig_regs_intr = priv->orig_sample_regs_intr; > + } else { > + orig_sample_type = evsel->core.attr.sample_type; > + orig_regs_user = evsel->core.attr.sample_regs_user; > + orig_regs_intr = evsel->core.attr.sample_regs_intr; > + } > + > + orig_sample_size = evsel->sample_size; > + > + sample_type = orig_sample_type; > + sample_type &= ~PERF_SAMPLE_REGS_USER; > + sample_type &= ~PERF_SAMPLE_REGS_INTR; > + [Severity: High] Does failing to mask out unsupported sample_type bits lead to parsing failures on newer perf.data formats? In aslr_tool__process_sample(), the sample_type used to synthesize the new event is derived from orig_sample_type by masking out only PERF_SAMPLE_REGS_USER and PERF_SAMPLE_REGS_INTR. Because this fails to mask out unsupported bits (unlike the pre-patch logic which used &= ASLR_SUPPORTED_SAMPLE_TYPE), and the extraction loop (COPY_U64) lacks handlers for unknown/unsupported bits, they are dropped from the new_event payload. However, since evsel->core.attr.sample_type = sample_type advertises these bits as present, the subsequent call to evsel__parse_sample() will expect to find them in the array. Can this mismatch cause the parser to read misaligned or out-of-bounds data when processing traces from newer perf tools? > max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); > max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); > new_event = (union perf_event *)aslr->event_copy; > @@ -727,25 +772,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, > i++; \ > } while (0) > > - if (sample_type & PERF_SAMPLE_IDENTIFIER) > + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) > COPY_U64(); /* id */ [Severity: High] This isn't a bug introduced by this patch, but can cross-endian perf.data files be corrupted during ASLR injection due to mixed endianness in the synthesized new_event payload? In aslr_tool__process_sample(), new_event is synthesized using a combination of COPY_U64() and REMAP_U64(). COPY_U64() copies directly from the original event->sample.array, which is in the original file's endianness (perf core does not swap sample payloads in-place). However, REMAP_U64() computes remapped addresses and writes them in host endianness. For cross-endian perf.data files, this results in a corrupted, mixed-endian payload. Furthermore, evsel->needs_swap = false forces the subsequent parser and the delegated tool to treat the entire payload as host-endian, which permanently corrupts fields like TID, TIME, and IDENTIFIER in the injected output file. -- Sashiko AI review · https://sashiko.dev/#/patchset/20260606151451.2913860-1-irogers@google.com?part=5 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-06 15:14 ` [PATCH v16 5/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-07 6:09 ` Ian Rogers 2026-06-07 6:09 ` [PATCH v17 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-07 6:09 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - We use thread__find_map to look up the mapping immediately preceding the new one in the original address space (at start - 1). If the preceding mapping was also remapped, we place the new mapping contiguously after it in the remapped space. This preserves contiguity of split mappings (e.g., symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If no contiguous mapping is found, we insert a 1-page gap from the highest allocated address (remapped_max) to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v16: - Patch 2: Refactored inline ASLR stripping logic out of builtin-inject.c and into dedicated helpers (aslr_tool__strip_attr_event and aslr_tool__strip_evlist) in aslr.c to better separate concerns. - Patch 2: Fixed guest machine allocation memory leak in aslr_tool__delete() where machines__exit() explicitly skipped freeing the guest processes tree. - Patch 3: Fixed bounds-check violations during cross-endian parsing inside aslr_tool__process_sample() by correctly applying bswap_64() to raw offsets, iteration counts, sizes, and addresses prior to logical evaluation when orig_needs_swap is active. - Patch 4: Fixed pipe mode parser misalignment bug by safely fetching needs_swap from the initialized evsel rather than blindly intercepting HEADER_ATTR events prior to session parsing. - Patch 4: Resolved checkpatch.pl line length warnings in the bswap_64 endianness swapping logic. - Patch Series: Reordered the final two patches. "perf aslr: Strip sample registers" is now Patch 4, and "perf test: Add inject ASLR test" is now Patch 5. This ensures the register stripping logic is fully introduced before the comprehensive shell tests validate it, preventing bisectability test failures and easing merge conflicts. - Patch 5: Fixed "User registers stripping test" starvation when run as root by explicitly using '-e cycles:u' during recording, preventing the ring buffer from overflowing with kernel samples. Changes since v15: - Patch 2: Added bounds checking for event->header.size before writing to breakpoint fields to avoid heap buffer overflow on older ABI events. - Patch 2: Fixed asymmetric calculation bug in aslr_tool__findnew_mapping() where pgoff for anonymous kernel memory was not properly subtracted upon insertion, causing the lookup addition to overflow. - Patch 2: Added detailed comments documenting the symmetric lookup and insertion math for unmapped and mapped memory blocks. - Patch 5: Add missing kprobe and uprobe scrubbing of config1 and config2 during aslr_tool__strip_evlist() to strictly conform with repipe constraints. Changes since v14: - Patch 2: Removed unnecessary vertical whitespace in builtin-inject.c. - Patch 2: Added comments explaining why pgoff is assigned for anonymous memory maps to prevent ASLR leaks. - Patch 2: Removed orig_last_end tracking and refactored contiguous mapping detection to use thread__find_map(..., start - 1, ...) based on Gabriel's feedback. - Patch 2: Scrub kprobe/uprobe event config1 and config2 fields to prevent address leaks. - Patch 2: Overwrite pgoff with the remapped start address for anonymous mappings (detected via is_anon_memory and is_no_dso_memory). - Patch 3: Fix C90 mixed declaration error for orig_needs_swap. - Patch 3: Temporarily disable evsel->needs_swap during the secondary evsel__parse_sample() call to prevent branch stack double-swapping bugs. Changes since v13: - Patch 2: Added a NULL check for env before calling perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the recorded environment has no headers. - Patch 5: Fixed sample_size and id_pos going out of sync during aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of using evsel__reset_sample_bit(), which was acting as a no-op due to early bit clearing and corrupted sample_size, the tool now directly updates sample_type and recomputes sample_size/id_pos dynamically. Added orig_sample_size to aslr_evsel_priv to correctly restore the state. Changes since v12: - Patch 2: Fixed potential NULL pointer dereference in remap_addresses__hash() when handling unmapped memory events (key->dso is NULL) under REFCNT_CHECKING. - Patch 2: Dynamically detect machine architecture bitness via perf_env__kernel_is_64_bit() to select appropriate kernel_space_start boundaries, avoiding 64-bit address injection on 32-bit platforms. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf aslr: Strip sample registers perf test: Add inject ASLR test tools/perf/builtin-inject.c | 50 +- tools/perf/tests/shell/inject_aslr.sh | 519 +++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1394 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 44 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2202 insertions(+), 64 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v17 1/5] perf maps: Add maps__mutate_mapping 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-07 6:09 ` Ian Rogers 2026-06-07 6:09 ` [PATCH v17 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-07 6:09 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..b1b8efe42149 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v17 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-07 6:09 ` [PATCH v17 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-07 6:09 ` Ian Rogers 2026-06-07 8:27 ` sashiko-bot 2026-06-07 6:09 ` [PATCH v17 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-07 6:09 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/builtin-inject.c | 29 +- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 822 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 ++ 4 files changed, 891 insertions(+), 2 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..6cc9c6dbf608 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -24,6 +25,7 @@ #include "util/string2.h" #include "util/symbol.h" #include "util/synthetic-events.h" +#include "util/pmus.h" #include "util/thread.h" #include "util/namespaces.h" #include "util/unwind.h" @@ -124,6 +126,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -276,6 +279,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->aslr) + aslr_tool__strip_attr_event(event, pevlist); if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2594,7 +2599,6 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } - session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2708,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2717,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2738,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2836,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2922,6 +2943,8 @@ int cmd_inject(int argc, const char **argv) goto out_delete; ret = __cmd_inject(&inject); + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); guest_session__exit(&inject.guest_session); @@ -2929,6 +2952,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..2c5fafbe5d84 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,822 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" +#include "pmus.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start_64 = 0xffff800010000000ULL; +static const u64 kernel_space_start_32 = 0x80000000ULL; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + void *dso_ptr = key->dso ? RC_CHK_ACCESS(key->dso) : NULL; + + return (size_t)key->machine ^ (size_t)dso_ptr ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct machine *session_machine, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + /* + * The cached value is the base of the invariant. We add the + * offset into the VMA (start - map__start), plus the map's + * pgoff, to get the precise virtual address within this chunk. + */ + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + /* + * For unmapped memory (e.g. kernel anonymous), the cached value + * was stored offset by pgoff. Adding pgoff yields the true remap_addr. + */ + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + struct addr_location prev_al; + bool is_contiguous = false; + + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__end(prev_al.map) == start) + is_contiguous = true; + } + addr_location__exit(&prev_al); + + if (is_contiguous) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + struct perf_env *env = session_machine ? session_machine->env : NULL; + bool is_64 = env ? perf_env__kernel_is_64_bit(env) : (sizeof(void *) == 8); + u64 kernel_start_addr = is_64 ? kernel_space_start_64 : kernel_space_start_32; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_start_addr : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + /* + * Subtract pgoff from the base virtual address so that + * when the lookup path adds pgoff back, it perfectly + * cancels out and returns remap_addr. + */ + *new_remap_val = remap_addr - pgoff; + } + } else { + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + is_anon_memory(event->mmap.filename) || is_no_dso_memory(event->mmap.filename)) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + is_anon_memory(event->mmap2.filename) || is_no_dso_memory(event->mmap2.filename)) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, machine, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + + +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist) +{ + struct evsel *evsel; + bool needs_swap = false; + + if (pevlist && *pevlist) { + evsel = evlist__last(*pevlist); + if (evsel) + needs_swap = evsel->needs_swap; + } + + if (event->header.size >= (offsetof(struct perf_record_header_attr, + attr.sample_type) + sizeof(u64))) { + u64 st = event->attr.attr.sample_type; + + if (needs_swap) + st = bswap_64(st); + + st &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (needs_swap) + st = bswap_64(st); + + event->attr.attr.sample_type = st; + } + + if (event->header.size >= (offsetof(struct perf_record_header_attr, + attr.type) + sizeof(u32))) { + u32 type = event->attr.attr.type; + + if (needs_swap) + type = bswap_32(type); + + if (type == PERF_TYPE_BREAKPOINT && + event->header.size >= (offsetof(struct perf_record_header_attr, + attr.bp_addr) + sizeof(u64))) { + event->attr.attr.bp_addr = 0; + } else if (type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu; + + pmu = perf_pmus__find_by_type(type); + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + if (event->header.size >= + (offsetof(struct perf_record_header_attr, + attr.config1) + sizeof(u64))) + event->attr.attr.config1 = 0; + if (event->header.size >= + (offsetof(struct perf_record_header_attr, + attr.config2) + sizeof(u64))) + event->attr.attr.config2 = 0; + } + } + } +} + +void aslr_tool__strip_evlist(struct perf_tool *tool __maybe_unused, + struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + struct rb_node *nd; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + + while ((nd = rb_first_cached(&aslr->machines.guests)) != NULL) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + rb_erase_cached(nd, &aslr->machines.guests); + machine__delete(machine); + } + + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..2b82f711bc67 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; +struct evlist; +union perf_event; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist); +void aslr_tool__strip_evlist(struct perf_tool *tool, struct evlist *evlist); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v17 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-07 6:09 ` [PATCH v17 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-07 8:27 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-07 8:27 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: - [High] Modifying memory-mapped read-only event buffers in place causes a segmentation fault. - [High] Inconsistent handling of `(start - map__start(al.map))` for user space mappings leads to corrupted virtual address calculations. - [High] KSYMBOL unregister events are emitted with an incorrect, newly allocated address instead of the original remapped address. -- commit c190853279a63eb364ce133684668cb85c64d5e1 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..2c5fafbe5d846 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, > + struct machine *session_machine, > + struct thread *aslr_thread, > + u8 cpumode, u64 start, > + u64 len, u64 pgoff) > +{ [ ... ] > + if (cpumode == PERF_RECORD_MISC_KERNEL || > + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { > + if (al.map) { > + *new_remap_val = remap_addr - > + (start - map__start(al.map)) - > + map__pgoff(al.map); > + } else { > + /* > + * Subtract pgoff from the base virtual address so that > + * when the lookup path adds pgoff back, it perfectly > + * cancels out and returns remap_addr. > + */ > + *new_remap_val = remap_addr - pgoff; > + } > + } else { > + *new_remap_val = remap_addr - (al.map ? map__pgoff(al.map) : pgoff); [Severity: High] Is there a missing subtraction of (start - map__start(al.map)) for the user space mapping here? The kernel space path just above subtracts it. If perf merges adjacent VMAs, start might not equal map__start(al.map), which could result in returning an address inflated by that difference on subsequent lookups and corrupting the virtual address calculations. > + } > + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); [ ... ] > +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); > + if (err) > + return err; > + > + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); > + if (!thread) > + return -ENOMEM; > + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); > + /* Remaps the ksymbol.start */ > + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, machine, thread, [Severity: High] For KSYMBOL unregister events, does calling perf_event__process_ksymbol() earlier in this function delete the map from aslr_machine->kmaps before we can look it up here? If the map is deleted beforehand, aslr_tool__findnew_mapping() might fail to find the original map, fall back to generating a new unmapped key, and emit a mismatched remapped address instead of the original one. > + PERF_RECORD_MISC_KERNEL, > + event->ksymbol.addr, > + event->ksymbol.len, > + /*pgoff=*/0); [ ... ] > +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist) > +{ [ ... ] > + if (event->header.size >= (offsetof(struct perf_record_header_attr, > + attr.sample_type) + sizeof(u64))) { > + u64 st = event->attr.attr.sample_type; > + > + if (needs_swap) > + st = bswap_64(st); > + > + st &= ASLR_SUPPORTED_SAMPLE_TYPE; > + > + if (needs_swap) > + st = bswap_64(st); > + > + event->attr.attr.sample_type = st; [Severity: High] Does this code modify the event payload in place? When processing a native-endian perf.data file, perf memory-maps the file's data section with PROT_READ. If a PERF_RECORD_HEADER_ATTR event is located in this read-only mapped data section, could writing directly to event->attr.attr.sample_type trigger a segmentation fault? > + } -- Sashiko AI review · https://sashiko.dev/#/patchset/20260607060933.3274263-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v17 3/5] perf inject/aslr: Implement sample address remapping 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-07 6:09 ` [PATCH v17 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-07 6:09 ` [PATCH v17 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-07 6:09 ` Ian Rogers 2026-06-07 6:27 ` sashiko-bot 2026-06-07 6:09 ` [PATCH v17 4/5] perf aslr: Strip sample registers Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-07 6:09 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/util/aslr.c | 478 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 485 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 2c5fafbe5d84..8980599b0158 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -20,6 +20,7 @@ #include <linux/zalloc.h> #include <inttypes.h> #include <unistd.h> +#include <byteswap.h> /** * struct remap_addresses_key - Key for mapping original addresses to remapped ones. @@ -112,6 +113,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -599,13 +654,428 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + bool orig_needs_swap; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + orig_needs_swap = evsel->needs_swap; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + u64 remapped; \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + remapped = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + if (orig_needs_swap) \ + remapped = bswap_64(remapped); \ + out_array[j++] = remapped; \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + if (orig_needs_swap) + nr = bswap_64(nr); + out_array[j++] = in_array[i++]; + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + if (orig_needs_swap) + nr = bswap_64(nr); + out_array[j++] = in_array[i++]; + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (orig_needs_swap) + addr = bswap_64(addr); + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = orig_needs_swap ? bswap_64(addr) : addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + addr = aslr_tool__remap_address(aslr, thread, cpumode, addr); + if (orig_needs_swap) + addr = bswap_64(addr); + out_array[j++] = addr; + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + if (orig_needs_swap) + nr = bswap_64(nr); + out_array[j++] = in_array[i++]; + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + u64 from = in_array[i++]; + u64 to = in_array[i++]; + + if (orig_needs_swap) { + from = bswap_64(from); + to = bswap_64(to); + } + + from = aslr_tool__remap_address(aslr, thread, sample->cpumode, from); + to = aslr_tool__remap_address(aslr, thread, sample->cpumode, to); + + if (orig_needs_swap) { + from = bswap_64(from); + to = bswap_64(to); + } + + out_array[j++] = from; + out_array[j++] = to; + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); + i += nr; + j += nr; + /* TODO: confirm branch counters don't leak ASLR information. */ + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; - return delegate->sample(delegate, event, sample, machine); + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + size = in_array[i]; + if (orig_needs_swap) + size = bswap_64(size); + out_array[j++] = in_array[i++]; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = __evsel__parse_sample(evsel, new_event, &new_sample, orig_needs_swap); + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v17 3/5] perf inject/aslr: Implement sample address remapping 2026-06-07 6:09 ` [PATCH v17 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-07 6:27 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-07 6:27 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Incorrect double byte-swapping of already natively-swapped 64-bit array fields corrupts cross-endian parsing and address remapping. - [High] The entire sample event is dropped if it includes branch stack counters, silently breaking profiles collected with LBR counters. -- commit 99425ccaa2ec1a33c2b9f564c32b12db661696fe Author: Ian Rogers <irogers@google.com> perf inject/aslr: Implement sample address remapping Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 2c5fafbe5d846..8980599b0158f 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -599,13 +654,428 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, > { [ ... ] > +#define REMAP_U64(addr_field) \ > + do { \ > + u64 remapped; \ > + if (CHECK_BOUNDS(1, 1)) { \ > + ret = -EFAULT; \ > + goto out_put; \ > + } \ > + remapped = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ > + if (orig_needs_swap) \ > + remapped = bswap_64(remapped); \ [Severity: High] Does this code incorrectly double byte-swap the remapped addresses? The perf core framework, perf_event__all64_swap(), already byte-swaps the entire PERF_RECORD_SAMPLE array payload to host endianness before passing it to the tool callbacks. By calling bswap_64() again in aslr_tool__process_sample when orig_needs_swap is true, won't this mix host and file endianness in the out_array, corrupting the output perf.data layout and breaking cross-endian parsing downstream? > + out_array[j++] = remapped; \ > + i++; \ > + } while (0) [ ... ] > + if (sample_type & PERF_SAMPLE_CALLCHAIN) { > + u64 nr; > + > + if (CHECK_BOUNDS(1, 1)) { > + ret = -EFAULT; > + goto out_put; > + } > + nr = in_array[i]; > + if (orig_needs_swap) > + nr = bswap_64(nr); [Severity: High] Can this double-swapping of the nr counter cause parsing loops to fail? Since in_array[i] is already in host endianness, wouldn't swapping it back into file endianness produce a massive integer value for nr? > + out_array[j++] = in_array[i++]; [ ... ] > + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { > + u64 nr; > + > + if (CHECK_BOUNDS(1, 1)) { > + ret = -EFAULT; > + goto out_put; > + } > + nr = in_array[i]; > + if (orig_needs_swap) > + nr = bswap_64(nr); [Severity: High] Similarly, could swapping nr here cause the branch stack iteration bounds checks to unconditionally return -EFAULT and drop the event? > + out_array[j++] = in_array[i++]; [ ... ] > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { > + if (nr > max_i - i || nr > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + memcpy(&out_array[j], &in_array[i], nr * sizeof(u64)); > + i += nr; > + j += nr; > + /* TODO: confirm branch counters don't leak ASLR information. */ > + pr_debug("Dropping sample branch counters as possible ASLR leak\n"); > + ret = 0; > + goto out_put; [Severity: High] Does this logic silently discard the entire sample event when branch counters are present? By executing ret = 0 and jumping to out_put in aslr_tool__process_sample, the delegate->sample invocation is bypassed. Won't this break profiles collected with LBR counters (e.g. perf record -b) by completely dropping all branch stack counter samples instead of just omitting the specific fields? > + } > + } [ ... ] > + if (sample_type & PERF_SAMPLE_STACK_USER) { > + u64 size; > > - return delegate->sample(delegate, event, sample, machine); > + if (CHECK_BOUNDS(1, 1)) { > + ret = -EFAULT; > + goto out_put; > + } > + size = in_array[i]; > + if (orig_needs_swap) > + size = bswap_64(size); [Severity: High] Could double-swapping the PERF_SAMPLE_STACK_USER size corrupt the payload boundaries? Like the nr values above, swapping the already host-endian size might trigger a bounds failure in the subsequent u64_words bounds check. > + out_array[j++] = in_array[i++]; > + if (size > 0) { -- Sashiko AI review · https://sashiko.dev/#/patchset/20260607060933.3274263-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v17 4/5] perf aslr: Strip sample registers 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-07 6:09 ` [PATCH v17 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-07 6:09 ` Ian Rogers 2026-06-07 6:27 ` sashiko-bot 2026-06-07 6:09 ` [PATCH v17 5/5] perf test: Add inject ASLR test Ian Rogers 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-07 6:09 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Extend the ASLR tool stripping helpers to drop register dump payloads by masking out the relevant perf_event_attr fields (sample_regs_user, sample_regs_intr) when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap so that sample sizes can be properly parsed even when bits are stripped from the pipeline. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/builtin-inject.c | 31 +++- tools/perf/util/aslr.c | 298 ++++++++++++++++++++++++++---------- tools/perf/util/aslr.h | 9 +- 3 files changed, 248 insertions(+), 90 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 6cc9c6dbf608..abb2228bc5bd 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -241,6 +241,9 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (ret) return ret; + if (inject->aslr) + aslr_tool__strip_attr_event(event, *pevlist); + /* If the output isn't a pipe then the attributes will be written as part of the header. */ if (!inject->output.is_pipe) return 0; @@ -279,8 +282,7 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; - if (inject->aslr) - aslr_tool__strip_attr_event(event, pevlist); + if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; @@ -2599,6 +2601,10 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } + + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2857,6 +2863,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2875,10 +2893,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2943,8 +2968,6 @@ int cmd_inject(int argc, const char **argv) goto out_delete; ret = __cmd_inject(&inject); - if (inject.aslr) - aslr_tool__strip_evlist(tool, inject.session->evlist); guest_session__exit(&inject.guest_session); diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 8980599b0158..bf6bb2715357 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -18,6 +18,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> #include <byteswap.h> @@ -46,6 +47,23 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; + int orig_sample_size; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; }; @@ -60,6 +78,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -123,6 +146,8 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, u64 *remapped_invariant_ptr = NULL; u64 remap_addr = 0; u8 effective_cpumode = cpumode; + struct dso *dso; + const char *dso_name; if (!aslr_thread) return 0; /* No thread. */ @@ -148,9 +173,15 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, } } + dso = map__dso(al.map); + dso_name = dso ? dso__long_name(dso) : NULL; + key.machine = maps__machine(thread__maps(aslr_thread)); - key.dso = map__dso(al.map); - key.invariant = map__start(al.map) - map__pgoff(al.map); + key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + key.invariant = map__start(al.map) - map__pgoff(al.map); + else + key.invariant = map__start(al.map); key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? kernel_pid : thread__pid(aslr_thread); @@ -659,6 +690,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -671,6 +703,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; bool orig_needs_swap; del_tool = container_of(tool, struct delegate_tool, tool); @@ -683,7 +719,24 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -736,25 +789,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -789,7 +842,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -861,7 +914,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = addr; } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -880,7 +933,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -938,19 +991,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, goto out_put; } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -982,39 +1041,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -1054,11 +1119,20 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); - + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; perf_sample__init(&new_sample, /*all=*/ true); ret = __evsel__parse_sample(evsel, new_event, &new_sample, orig_needs_swap); if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -1067,6 +1141,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1120,43 +1200,44 @@ static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __mayb return 0; } - -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist) +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist) { struct evsel *evsel; bool needs_swap = false; - if (pevlist && *pevlist) { - evsel = evlist__last(*pevlist); - if (evsel) - needs_swap = evsel->needs_swap; - } + if (!evlist) + return; + + evsel = evlist__last(evlist); + if (evsel) + needs_swap = evsel->needs_swap; if (event->header.size >= (offsetof(struct perf_record_header_attr, attr.sample_type) + sizeof(u64))) { - u64 st = event->attr.attr.sample_type; - - if (needs_swap) - st = bswap_64(st); - - st &= ASLR_SUPPORTED_SAMPLE_TYPE; + if (needs_swap) { + u64 st = bswap_64(event->attr.attr.sample_type); - if (needs_swap) - st = bswap_64(st); + st &= ASLR_SUPPORTED_SAMPLE_TYPE; + event->attr.attr.sample_type = bswap_64(st); + } else { + event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + } - event->attr.attr.sample_type = st; + if (event->header.size >= + (offsetof(struct perf_record_header_attr, attr.sample_regs_user) + sizeof(u64))) + event->attr.attr.sample_regs_user = 0; + if (event->header.size >= + (offsetof(struct perf_record_header_attr, attr.sample_regs_intr) + sizeof(u64))) + event->attr.attr.sample_regs_intr = 0; } if (event->header.size >= (offsetof(struct perf_record_header_attr, attr.type) + sizeof(u32))) { - u32 type = event->attr.attr.type; - - if (needs_swap) - type = bswap_32(type); + u32 type = needs_swap ? bswap_32(event->attr.attr.type) : event->attr.attr.type; if (type == PERF_TYPE_BREAKPOINT && event->header.size >= (offsetof(struct perf_record_header_attr, - attr.bp_addr) + sizeof(u64))) { + attr.bp_addr) + sizeof(u64))) { event->attr.attr.bp_addr = 0; } else if (type >= PERF_TYPE_MAX) { struct perf_pmu *pmu; @@ -1177,28 +1258,6 @@ void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlis } } -void aslr_tool__strip_evlist(struct perf_tool *tool __maybe_unused, - struct evlist *evlist) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - else if (evsel->core.attr.type >= PERF_TYPE_MAX) { - struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); - - if (pmu && (!strcmp(pmu->name, "kprobe") || - !strcmp(pmu->name, "uprobe"))) { - evsel->core.attr.config1 = 0; - evsel->core.attr.config2 = 0; - } - } - } -} - static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) { delegate_tool__init(&aslr->tool, delegate); @@ -1212,6 +1271,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1274,9 +1336,13 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); @@ -1290,3 +1356,69 @@ void aslr_tool__delete(struct perf_tool *tool) machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + priv->orig_sample_size = evsel->sample_size; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + evsel->sample_size = __evsel__sample_size(evsel->core.attr.sample_type); + evsel__calc_id_pos(evsel); + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) { + evsel->core.attr.bp_addr = 0; + } else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + evsel->sample_size = priv->orig_sample_size; + evsel__calc_id_pos(evsel); + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index 2b82f711bc67..522e31c8e2c0 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -34,8 +34,11 @@ struct evlist; union perf_event; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); -void aslr_tool__delete(struct perf_tool *aslr); -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist); -void aslr_tool__strip_evlist(struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__delete(struct perf_tool *tool); + +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v17 4/5] perf aslr: Strip sample registers 2026-06-07 6:09 ` [PATCH v17 4/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-07 6:27 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-07 6:27 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] ASLR stripping of breakpoint and kprobe/uprobe fields is bypassed, and sample_type is corrupted, when processing cross-endian traces due to incorrect bswap logic. -- commit dd031a94e79bda6ae3f56773d70deec897c2f043 Author: Ian Rogers <irogers@google.com> perf aslr: Strip sample registers Extend the ASLR tool stripping helpers to drop register dump payloads by masking out the relevant perf_event_attr fields (sample_regs_user, sample_regs_intr) when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap so that sample sizes can be properly parsed even when bits are stripped from the pipeline. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 8980599b0158f..bf6bb27153573 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -1120,43 +1200,44 @@ static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __mayb > return 0; > } > > - > -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist) > +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist) > { > struct evsel *evsel; > bool needs_swap = false; > [ ... ] > if (event->header.size >= (offsetof(struct perf_record_header_attr, > attr.sample_type) + sizeof(u64))) { > - u64 st = event->attr.attr.sample_type; > - > - if (needs_swap) > - st = bswap_64(st); > - > - st &= ASLR_SUPPORTED_SAMPLE_TYPE; > + if (needs_swap) { > + u64 st = bswap_64(event->attr.attr.sample_type); > > - if (needs_swap) > - st = bswap_64(st); > + st &= ASLR_SUPPORTED_SAMPLE_TYPE; > + event->attr.attr.sample_type = bswap_64(st); [Severity: High] Does this code inadvertently corrupt the sample_type bitmask? Since perf_session__process_user_event() calls event_swap(), which delegates to perf_event__hdr_attr_swap(), the event->attr.attr structure is already byte-swapped in-place to native endianness before the tool->attr() callback is invoked. Swapping it again here when needs_swap is true would corrupt the value before applying the ASLR_SUPPORTED_SAMPLE_TYPE mask. > + } else { > + event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + } > > - event->attr.attr.sample_type = st; > + if (event->header.size >= > + (offsetof(struct perf_record_header_attr, attr.sample_regs_user) + sizeof(u64))) > + event->attr.attr.sample_regs_user = 0; > + if (event->header.size >= > + (offsetof(struct perf_record_header_attr, attr.sample_regs_intr) + sizeof(u64))) > + event->attr.attr.sample_regs_intr = 0; > } > > if (event->header.size >= (offsetof(struct perf_record_header_attr, > attr.type) + sizeof(u32))) { > - u32 type = event->attr.attr.type; > - > - if (needs_swap) > - type = bswap_32(type); > + u32 type = needs_swap ? bswap_32(event->attr.attr.type) : event->attr.attr.type; [Severity: High] Is it possible this logic bypasses ASLR scrubbing for cross-endian traces? Because event->attr.attr.type was already swapped to native endianness prior to this tool callback, calling bswap_32() here would cause the type == PERF_TYPE_BREAKPOINT check below to evaluate to false (e.g., native 5 becomes 0x05000000). This would bypass the scrubbing of bp_addr, config1, and config2, potentially leaking kernel ASLR addresses. > > if (type == PERF_TYPE_BREAKPOINT && > event->header.size >= (offsetof(struct perf_record_header_attr, > - attr.bp_addr) + sizeof(u64))) { > + attr.bp_addr) + sizeof(u64))) { > event->attr.attr.bp_addr = 0; > } else if (type >= PERF_TYPE_MAX) { > struct perf_pmu *pmu; -- Sashiko AI review · https://sashiko.dev/#/patchset/20260607060933.3274263-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v17 5/5] perf test: Add inject ASLR test 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-07 6:09 ` [PATCH v17 4/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-07 6:09 ` Ian Rogers 2026-06-07 6:18 ` sashiko-bot 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-07 6:09 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into. - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Note on kernel DSO normalization in the test script: The test script deliberately normalizes all kernel DSOs to a generic [kernel] tag before diffing, as obfuscating physical kernel addresses forces perf report to occasionally shift samples between individual modules and [kernel.kallsyms] due to the lack of valid host module boundary maps. Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/tests/shell/inject_aslr.sh | 519 ++++++++++++++++++++++++++ 1 file changed, 519 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..db5497508259 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,519 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1" + local report2="${temp_dir}/report2" + local report1_clean="${temp_dir}/report1.clean" + local report2_clean="${temp_dir}/report2.clean" + local diff_file="${temp_dir}/diff" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record -e cycles:u --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr +test_regs_stripping + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v17 5/5] perf test: Add inject ASLR test 2026-06-07 6:09 ` [PATCH v17 5/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-07 6:18 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-07 6:18 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 4 potential issue(s) to consider: - [Medium] The `test_regs_stripping` test contains broken logic that causes it to unconditionally pass, resulting in a false negative. - [Medium] User space ASLR report tests omit raw hex address filtering, causing flaky test failures. - [Medium] The kernel ASLR test is vulnerable to race conditions on high-performance systems due to an extremely brief workload, causing flaky test failures. - [Low] Test harness artifacts are overwritten by subsequent tests upon failure, impeding debugging. -- commit 130aed1f94b88ff586372e491e562e4121d94067 Author: Ian Rogers <irogers@google.com> perf test: Add inject ASLR test This commit adds a new shell test suite for the perf inject --aslr feature. It provides coverage for basic user space sample remapping, pipe mode, callchain remapping, dropped samples leaking physical addresses, and kernel address remapping tests. diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 0000000000000..db54975082599 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh [ ... ] > +prog="perf test -w noploop" > +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" > +err=0 > +kprog="dd if=/dev/zero of=/dev/null bs=1M count=500" [Severity: Medium] Will this workload complete too quickly on high-performance systems? On modern machines, a 500MB transfer might complete in a few milliseconds. At default sampling frequencies, this extremely short duration could yield zero kernel samples, causing test_kernel_aslr and test_kernel_report_aslr to spuriously fail since they assert kernel samples are present. Is it possible to increase the transfer size or use a different workload to ensure enough samples are gathered? [ ... ] > +test_report_aslr() { > + echo "Test perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + # Use -b to inject build-ids and force ordered events processing in both > + perf inject -b -i "${data}" -o "${data_clean}" > + perf inject -v -b --aslr -i "${data}" -o "${data2}" > + > + local report1="${temp_dir}/report1" > + local report2="${temp_dir}/report2" > + local report1_clean="${temp_dir}/report1.clean" > + local report2_clean="${temp_dir}/report2.clean" > + local diff_file="${temp_dir}/diff" [Severity: Low] Will these fixed filenames cause debugging issues if multiple tests fail? Since ${temp_dir} is shared globally across the test script, these paths evaluate to the same files in test_report_aslr, test_pipe_report_aslr, and test_pipe_out_report_aslr. Because the test script accumulates failures with err=1 and continues execution, passing tests will silently overwrite the artifacts of earlier failing tests. Should these names include the test function name to preserve artifacts properly for debugging? > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true [Severity: Medium] Does this output comparison need raw hex address filtering to prevent flaky test failures? If unresolved symbols are present in user space samples (such as [vdso] or libc), perf report will output their raw hex addresses. Because the --aslr injection shifts these addresses, the text output differs, and the diff will spuriously fail the test. Other tests like test_kernel_report_aslr use a filter to handle this: grep -v -E '0x[0-9a-f]{8,}|0000000000000000' Would it make sense to apply that same filtering here, and similarly in test_pipe_report_aslr and test_pipe_out_report_aslr? [ ... ] > + local script_dump="${temp_dir}/script_regs_dump" > + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true > + if grep -q "PERF_SAMPLE_REGS_USER" "${script_dump}"; then > + echo "User registers stripping test [Failed - register dumps still present]" > + err=1 > + else > + echo "User registers stripping test [Success]" > + fi > +} [Severity: Medium] Will this grep condition ever evaluate to true? The test checks for the literal string "PERF_SAMPLE_REGS_USER" in the perf script -D output. However, the tool formats this flag natively as "REGS_USER" and prints payloads as "... user regs: mask ...". Because "PERF_SAMPLE_REGS_USER" is never printed, the grep command will unconditionally fail. This causes the test to always report success, even if the --aslr register stripping logic fails. Could this be updated to match the actual output strings generated by perf? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260607060933.3274263-1-irogers@google.com?part=5 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-07 6:09 ` [PATCH v17 5/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-07 21:36 ` Ian Rogers 2026-06-07 21:36 ` [PATCH v18 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 5 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-07 21:36 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - We use thread__find_map to look up the mapping immediately preceding the new one in the original address space (at start - 1). If the preceding mapping was also remapped, we place the new mapping contiguously after it in the remapped space. This preserves contiguity of split mappings (e.g., symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If no contiguous mapping is found, we insert a 1-page gap from the highest allocated address (remapped_max) to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v17: - Patch 2: Reordered ksymbol deletion logic to ensure `perf_event__process_ksymbol` deletes the map *after* the `aslr_tool__findnew_mapping` translates the unregister offsets. - Patch 2: Changed `aslr_tool__delete` to cleanly handle guest machine deletion memory leaks. - Patch 2: Resolved read-only segfaults on memory-mapped perf.data headers during attribute stripping by using deep copies in `perf_event__repipe_attr`. - Patch 2: Fixed user space remap invariant logic to include `(start - map__start(al.map))` preventing negative overflows on module offset boundaries. - Patch 3: Removed duplicate `bswap_64` payload byte-swapping inside the array logic, allowing the host endianness macros `COPY_U64()` to handle it dynamically. - Patch 3: Fixed LBR branch sample starvation by explicitly reading branch counters instead of dropping the entire sample. - Patch 5: Fixed test flakiness by grepping out physical hex addresses `0x[0-9a-f]{8,}` instead of matching exact address strings. - Patch 5: Parameterized temp reports and updated test to scale with `/dev/urandom` continuous random reads. - Patch Series: Added Signed-off-by tags uniformly and Assisted-by tags to track assistance. Changes since v16: - Patch 2: Refactored inline ASLR stripping logic out of builtin-inject.c and into dedicated helpers (aslr_tool__strip_attr_event and aslr_tool__strip_evlist) in aslr.c to better separate concerns. - Patch 2: Fixed guest machine allocation memory leak in aslr_tool__delete() where machines__exit() explicitly skipped freeing the guest processes tree. - Patch 3: Fixed bounds-check violations during cross-endian parsing inside aslr_tool__process_sample() by correctly applying bswap_64() to raw offsets, iteration counts, sizes, and addresses prior to logical evaluation when orig_needs_swap is active. - Patch 4: Fixed pipe mode parser misalignment bug by safely fetching needs_swap from the initialized evsel rather than blindly intercepting HEADER_ATTR events prior to session parsing. - Patch 4: Resolved checkpatch.pl line length warnings in the bswap_64 endianness swapping logic. - Patch Series: Reordered the final two patches. "perf aslr: Strip sample registers" is now Patch 4, and "perf test: Add inject ASLR test" is now Patch 5. This ensures the register stripping logic is fully introduced before the comprehensive shell tests validate it, preventing bisectability test failures and easing merge conflicts. - Patch 5: Fixed "User registers stripping test" starvation when run as root by explicitly using '-e cycles:u' during recording, preventing the ring buffer from overflowing with kernel samples. Changes since v15: - Patch 2: Added bounds checking for event->header.size before writing to breakpoint fields to avoid heap buffer overflow on older ABI events. - Patch 2: Fixed asymmetric calculation bug in aslr_tool__findnew_mapping() where pgoff for anonymous kernel memory was not properly subtracted upon insertion, causing the lookup addition to overflow. - Patch 2: Added detailed comments documenting the symmetric lookup and insertion math for unmapped and mapped memory blocks. - Patch 5: Add missing kprobe and uprobe scrubbing of config1 and config2 during aslr_tool__strip_evlist() to strictly conform with repipe constraints. Changes since v14: - Patch 2: Removed unnecessary vertical whitespace in builtin-inject.c. - Patch 2: Added comments explaining why pgoff is assigned for anonymous memory maps to prevent ASLR leaks. - Patch 2: Removed orig_last_end tracking and refactored contiguous mapping detection to use thread__find_map(..., start - 1, ...) based on Gabriel's feedback. - Patch 2: Scrub kprobe/uprobe event config1 and config2 fields to prevent address leaks. - Patch 2: Overwrite pgoff with the remapped start address for anonymous mappings (detected via is_anon_memory and is_no_dso_memory). - Patch 3: Fix C90 mixed declaration error for orig_needs_swap. - Patch 3: Temporarily disable evsel->needs_swap during the secondary evsel__parse_sample() call to prevent branch stack double-swapping bugs. Changes since v13: - Patch 2: Added a NULL check for env before calling perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the recorded environment has no headers. - Patch 5: Fixed sample_size and id_pos going out of sync during aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of using evsel__reset_sample_bit(), which was acting as a no-op due to early bit clearing and corrupted sample_size, the tool now directly updates sample_type and recomputes sample_size/id_pos dynamically. Added orig_sample_size to aslr_evsel_priv to correctly restore the state. Changes since v12: - Patch 2: Fixed potential NULL pointer dereference in remap_addresses__hash() when handling unmapped memory events (key->dso is NULL) under REFCNT_CHECKING. - Patch 2: Dynamically detect machine architecture bitness via perf_env__kernel_is_64_bit() to select appropriate kernel_space_start boundaries, avoiding 64-bit address injection on 32-bit platforms. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf aslr: Strip sample registers perf test: Add inject ASLR test tools/perf/builtin-inject.c | 81 +- tools/perf/tests/shell/inject_aslr.sh | 519 +++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1398 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 44 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2230 insertions(+), 71 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v18 1/5] perf maps: Add maps__mutate_mapping 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-07 21:36 ` Ian Rogers 2026-06-07 21:55 ` sashiko-bot 2026-06-07 21:36 ` [PATCH v18 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-07 21:36 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..b1b8efe42149 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v18 1/5] perf maps: Add maps__mutate_mapping 2026-06-07 21:36 ` [PATCH v18 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-07 21:55 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-07 21:55 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 3 potential issue(s) to consider: - [High] The patch fails to add the mandatory `maps__load_maps()` pre-loading call to `perf inject`, violating its own architectural contract and leaving the tool vulnerable to deadlocks. - [High] `maps__load_first` triggers a guaranteed read-to-write self-deadlock due to the new locking rules. - [High] Lockless reads of map boundary fields (`start`, `end`, `pgoff`) in `maps__find_symbol_by_name` and `maps__find_symbol` cause TOCTOU/inconsistent reads. -- commit b899514520f2642fd2ff0bdc70f3cfeac8916515 Author: Ian Rogers <irogers@google.com> Subject: perf maps: Add maps__mutate_mapping This patch introduces maps__mutate_mapping() to safely update map address boundaries under a write lock, fixing a race condition with concurrent map lookups. It also introduces maps__load_maps() to pre-load kernel maps and updates symbol parsing paths to use the new synchronized mutator. > lock. Callers (such as perf inject) must pre-load all kernel symbol > maps up front at startup using maps__load_maps(), completely bypassing > dynamic runtime mutations. > > Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") > > Signed-off-by: Ian Rogers <irogers@google.com> > Assisted-by: Antigravity:gemini-3.1-pro [Severity: High] The commit message explicitly mandates that perf inject must pre-load maps up front to avoid deadlocks. Does tools/perf/builtin-inject.c need to be updated in this patch to include the maps__load_maps() call? Without it, perf inject might remain exposed to the recursive read-to-write self-deadlock during lazy symbol loading. > diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c > index 923935ee21b66..b1b8efe421496 100644 > --- a/tools/perf/util/maps.c > +++ b/tools/perf/util/maps.c [ ... ] > @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) > #endif > } > > +/** > + * maps__mutate_mapping - Apply write-protected mutations to a map. > + * @maps: The maps collection containing the map. > + * @map: The map to mutate. > + * @mutate_cb: Callback function that performs the actual mutations. > + * @data: Private data passed to the callback. > + * > + * This acquires the write lock on the maps semaphore to safely protect > + * concurrent readers from seeing partially mutated or unsorted map boundaries. > + * > + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if > + * the caller already holds the read lock (e.g., during maps__for_each_map() or > + * maps__find() iteration paths that trigger lazy symbol loading). To completely > + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via > + * maps__load_maps()) under a clean, single-threaded context before entering > + * multi-threaded event processing loops. > + */ [Severity: High] Does the newly introduced locking contract cause a read-to-write self-deadlock in the existing maps__load_first() API? If another subsystem calls maps__load_first(), it acquires the read lock: maps__load_first() down_read(maps__lock(maps)); map__load(...) If map__load() triggers lazy symbol loading, won't it eventually call maps__mutate_mapping() which unconditionally takes the write lock via down_write(maps__lock(maps)), deadlocking the thread? [ ... ] > +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) > { > + struct map **maps_copy; > + unsigned int nr_maps; > + struct symbol *sym = NULL; > > + if (!maps) > + return NULL; > > + /* > + * First, ensure all maps are loaded. We pre-load them outside of any > + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. > + */ > + maps__load_maps(maps); > + > + /* > + * Create a local snapshot of the maps while holding the read lock. > + * This prevents deadlocking if iteration triggers further map insertions. > + */ > + down_read(maps__lock(maps)); > + nr_maps = maps__nr_maps(maps); > + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); > + if (maps_copy) { > + for (unsigned int i = 0; i < nr_maps; i++) { > + struct map *map = maps__maps_by_address(maps)[i]; > + > + maps_copy[i] = map__get(map); > + } > } > + up_read(maps__lock(maps)); > > + if (!maps_copy) > + return NULL; > > + for (unsigned int i = 0; i < nr_maps; i++) { > + struct map *map = maps_copy[i]; > + > + sym = map__find_symbol_by_name(map, name); > + if (sym && map__contains_symbol(map, sym)) { [Severity: High] Since the maps->lock read lock was dropped prior to this loop, is it safe to call map__contains_symbol() here? map__contains_symbol() accesses map->start and map->end. Concurrently, a thread processing an mmap event could invoke maps__mutate_mapping(), which mutates these exact boundary fields while holding the maps->lock write lock. Will this cause a data race resulting in torn boundary reads? The same lockless read pattern appears to exist in maps__find_symbol(): maps__find_symbol() ... if (map != NULL && map__load(map) >= 0) result = map__find_symbol(map, map__map_ip(map, addr)); where map__map_ip() accesses map->start and map->pgoff after the read lock was dropped by maps__find(). > + if (mapp) > + *mapp = map__get(map); > + break; > + } > + sym = NULL; > + } > + > + for (unsigned int i = 0; i < nr_maps; i++) > + map__put(maps_copy[i]); > > - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); > - return args.sym; > + free(maps_copy); > + return sym; > } -- Sashiko AI review · https://sashiko.dev/#/patchset/20260607213700.3563842-1-irogers@google.com?part=1 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v18 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-07 21:36 ` [PATCH v18 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-07 21:36 ` Ian Rogers 2026-06-07 21:52 ` sashiko-bot 2026-06-07 21:36 ` [PATCH v18 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-07 21:36 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/builtin-inject.c | 61 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 825 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 ++ 4 files changed, 919 insertions(+), 9 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..8bb37095e2de 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -24,6 +25,7 @@ #include "util/string2.h" #include "util/symbol.h" #include "util/synthetic-events.h" +#include "util/pmus.h" #include "util/thread.h" #include "util/namespaces.h" #include "util/unwind.h" @@ -124,6 +126,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -234,20 +237,36 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, u64 *ids; int ret; + union perf_event *aslr_event = NULL; + ret = perf_event__process_attr(tool, event, pevlist); if (ret) return ret; + if (inject->aslr) { + aslr_event = malloc(event->header.size); + if (!aslr_event) + return -ENOMEM; + memcpy(aslr_event, event, event->header.size); + aslr_tool__strip_attr_event(aslr_event, pevlist); + event = aslr_event; + } + /* If the output isn't a pipe then the attributes will be written as part of the header. */ - if (!inject->output.is_pipe) - return 0; + if (!inject->output.is_pipe) { + ret = 0; + goto out; + } - if (!inject->itrace_synth_opts.set) - return perf_event__repipe_synth(tool, event); + if (!inject->itrace_synth_opts.set) { + ret = perf_event__repipe_synth(tool, event); + goto out; + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); - return -EINVAL; + ret = -EINVAL; + goto out; } /* @@ -263,7 +282,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, raw_attr_size > event->header.size - sizeof(event->header))) { pr_err("Attribute event size %u is too small for attr.size %u\n", event->header.size, raw_attr_size); - return -EINVAL; + ret = -EINVAL; + goto out; } memset(&attr, 0, sizeof(attr)); @@ -281,8 +301,11 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; } - return perf_event__synthesize_attr(tool, &attr, (u32)n_ids, ids, + ret = perf_event__synthesize_attr(tool, &attr, (u32)n_ids, ids, perf_event__repipe_synth_cb); +out: + free(aslr_event); + return ret; } static int perf_event__repipe_event_update(const struct perf_tool *tool, @@ -2594,7 +2617,6 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } - session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2726,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2735,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2756,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2854,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2922,6 +2961,8 @@ int cmd_inject(int argc, const char **argv) goto out_delete; ret = __cmd_inject(&inject); + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); guest_session__exit(&inject.guest_session); @@ -2929,6 +2970,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..e45f68c60493 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,825 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" +#include "pmus.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start_64 = 0xffff800010000000ULL; +static const u64 kernel_space_start_32 = 0x80000000ULL; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + void *dso_ptr = key->dso ? RC_CHK_ACCESS(key->dso) : NULL; + + return (size_t)key->machine ^ (size_t)dso_ptr ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct machine *session_machine, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + /* + * The cached value is the base of the invariant. We add the + * offset into the VMA (start - map__start), plus the map's + * pgoff, to get the precise virtual address within this chunk. + */ + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + /* + * For unmapped memory (e.g. kernel anonymous), the cached value + * was stored offset by pgoff. Adding pgoff yields the true remap_addr. + */ + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + struct addr_location prev_al; + bool is_contiguous = false; + + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__end(prev_al.map) == start) + is_contiguous = true; + } + addr_location__exit(&prev_al); + + if (is_contiguous) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + struct perf_env *env = session_machine ? session_machine->env : NULL; + bool is_64 = env ? perf_env__kernel_is_64_bit(env) : (sizeof(void *) == 8); + u64 kernel_start_addr = is_64 ? kernel_space_start_64 : kernel_space_start_32; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_start_addr : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + /* + * Subtract pgoff from the base virtual address so that + * when the lookup path adds pgoff back, it perfectly + * cancels out and returns remap_addr. + */ + *new_remap_val = remap_addr - pgoff; + } + } else { + *new_remap_val = remap_addr - (al.map ? (start - map__start(al.map)) + map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + is_anon_memory(event->mmap.filename) || is_no_dso_memory(event->mmap.filename)) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || + is_anon_memory(event->mmap2.filename) || is_no_dso_memory(event->mmap2.filename)) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + /* Remaps the ksymbol.start before process_ksymbol potentially deletes the map */ + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, machine, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + if (err) { + thread__put(thread); + return err; + } + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + + +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist) +{ + struct evsel *evsel; + bool needs_swap = false; + + if (pevlist && *pevlist) { + evsel = evlist__last(*pevlist); + if (evsel) + needs_swap = evsel->needs_swap; + } + + if (event->header.size >= (offsetof(struct perf_record_header_attr, + attr.sample_type) + sizeof(u64))) { + u64 st = event->attr.attr.sample_type; + + if (needs_swap) + st = bswap_64(st); + + st &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (needs_swap) + st = bswap_64(st); + + event->attr.attr.sample_type = st; + } + + if (event->header.size >= (offsetof(struct perf_record_header_attr, + attr.type) + sizeof(u32))) { + u32 type = event->attr.attr.type; + + if (needs_swap) + type = bswap_32(type); + + if (type == PERF_TYPE_BREAKPOINT && + event->header.size >= (offsetof(struct perf_record_header_attr, + attr.bp_addr) + sizeof(u64))) { + event->attr.attr.bp_addr = 0; + } else if (type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu; + + pmu = perf_pmus__find_by_type(type); + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + if (event->header.size >= + (offsetof(struct perf_record_header_attr, + attr.config1) + sizeof(u64))) + event->attr.attr.config1 = 0; + if (event->header.size >= + (offsetof(struct perf_record_header_attr, + attr.config2) + sizeof(u64))) + event->attr.attr.config2 = 0; + } + } + } +} + +void aslr_tool__strip_evlist(struct perf_tool *tool __maybe_unused, + struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + struct rb_node *nd; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + + while ((nd = rb_first_cached(&aslr->machines.guests)) != NULL) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + rb_erase_cached(nd, &aslr->machines.guests); + machine__delete(machine); + } + + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..2b82f711bc67 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; +struct evlist; +union perf_event; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist); +void aslr_tool__strip_evlist(struct perf_tool *tool, struct evlist *evlist); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v18 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-07 21:36 ` [PATCH v18 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-07 21:52 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-07 21:52 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Incorrect `pgoff` overwrite for kernel modules breaks symbol resolution. - [Medium] KSYMBOL REGISTER and UNREGISTER events receive mismatched remapped addresses due to incorrect lookup ordering. -- commit bea273ddc97f1917658141d7ddbe3f4f2db6086b Author: Ian Rogers <irogers@google.com> perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 0000000000000..e45f68c604937 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +static int aslr_tool__process_mmap(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + /* Remaps the mmap.start. */ > + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, > + event->mmap.start, > + event->mmap.len, > + event->mmap.pgoff); > + /* > + * For anonymous memory (and kernel maps), the kernel populates the > + * event's pgoff field with the original un-obfuscated virtual address > + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). > + * We must overwrite pgoff with the new remapped byte address to prevent > + * leaking the original ASLR layout. > + */ > + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || > + is_anon_memory(event->mmap.filename) || is_no_dso_memory(event->mmap.filename)) > + new_event->mmap.pgoff = new_event->mmap.start; [Severity: High] Will unconditionally overwriting pgoff with the remapped start address for kernel maps break symbol resolution for kernel modules? For kernel modules synthesized by perf_event__synthesize_modules(), mmap events are emitted with pgoff = 0 or relative file offsets. By overwriting pgoff with the remapped start address here, the correct map offset is destroyed. When perf report calculates the map ip (ip - start + pgoff), would it yield the absolute remapped virtual address instead of the correct relative offset, causing symbol lookups in .ko files to fail? > + err = delegate->mmap(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} > + > +static int aslr_tool__process_mmap2(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + /* Remaps the mmap.start. */ > + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, > + event->mmap2.start, > + event->mmap2.len, > + event->mmap2.pgoff); > + /* > + * For anonymous memory (and kernel maps), the kernel populates the > + * event's pgoff field with the original un-obfuscated virtual address > + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). > + * We must overwrite pgoff with the new remapped byte address to prevent > + * leaking the original ASLR layout. > + */ > + if (cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL || > + is_anon_memory(event->mmap2.filename) || is_no_dso_memory(event->mmap2.filename)) > + new_event->mmap2.pgoff = new_event->mmap2.start; [Severity: High] Like the issue in aslr_tool__process_mmap() above, does overwriting pgoff here similarly corrupt map offsets for mmap2 events from kernel modules? > + err = delegate->mmap2(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} [ ... ] > +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, > + union perf_event *event, > + struct perf_sample *sample, > + struct machine *machine) > +{ [ ... ] > + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); > + /* Remaps the ksymbol.start before process_ksymbol potentially deletes the map */ > + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, machine, thread, > + PERF_RECORD_MISC_KERNEL, > + event->ksymbol.addr, > + event->ksymbol.len, > + /*pgoff=*/0); > + > + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); [Severity: Medium] Does calling aslr_tool__findnew_mapping() before perf_event__process_ksymbol() cause ksymbol unregister events to receive a mismatched remapped address? For a bpf register event, the map does not exist yet in aslr_machine, so aslr_tool__findnew_mapping() records the new remapping using a key with a NULL dso. When the corresponding unregister event arrives, aslr_tool__findnew_mapping() executes before perf_event__process_ksymbol() removes the map. It finds the existing map, extracts its dso, and looks up the remapping using a key with a non-NULL dso. Since this lookup fails, could the unregister event get assigned a completely different remapped address and break bpf symbol tracking? > + if (err) { > + thread__put(thread); > + return err; > + } > + > + err = delegate->ksymbol(delegate, new_event, sample, machine); > + thread__put(thread); > + return err; > +} -- Sashiko AI review · https://sashiko.dev/#/patchset/20260607213700.3563842-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v18 3/5] perf inject/aslr: Implement sample address remapping 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-07 21:36 ` [PATCH v18 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-07 21:36 ` [PATCH v18 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-07 21:36 ` Ian Rogers 2026-06-07 21:56 ` sashiko-bot 2026-06-07 21:36 ` [PATCH v18 4/5] perf aslr: Strip sample registers Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-07 21:36 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/util/aslr.c | 463 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 470 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index e45f68c60493..03944677ddfc 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -20,6 +20,7 @@ #include <linux/zalloc.h> #include <inttypes.h> #include <unistd.h> +#include <byteswap.h> /** * struct remap_addresses_key - Key for mapping original addresses to remapped ones. @@ -112,6 +113,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -602,13 +657,413 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + bool orig_needs_swap; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + orig_needs_swap = evsel->needs_swap; + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + u64 remapped; \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + remapped = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + if (orig_needs_swap) \ + remapped = bswap_64(remapped); \ + out_array[j++] = remapped; \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) + COPY_U64(); /* pid, tid */ + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); - return delegate->sample(delegate, event, sample, machine); + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = orig_needs_swap ? bswap_64(addr) : addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + addr = aslr_tool__remap_address(aslr, thread, cpumode, addr); + if (orig_needs_swap) + addr = bswap_64(addr); + out_array[j++] = addr; + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + u64 from = in_array[i++]; + u64 to = in_array[i++]; + + if (orig_needs_swap) { + from = bswap_64(from); + to = bswap_64(to); + } + + from = aslr_tool__remap_address(aslr, thread, sample->cpumode, from); + to = aslr_tool__remap_address(aslr, thread, sample->cpumode, to); + + if (orig_needs_swap) { + from = bswap_64(from); + to = bswap_64(to); + } + + out_array[j++] = from; + out_array[j++] = to; + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) + COPY_U64(); + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + size = in_array[i]; + COPY_U64(); + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = __evsel__parse_sample(evsel, new_event, &new_sample, orig_needs_swap); + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v18 3/5] perf inject/aslr: Implement sample address remapping 2026-06-07 21:36 ` [PATCH v18 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-07 21:56 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-07 21:56 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] Incorrect byte-swapping logic corrupts cross-endian ASLR injection output and map lookups. -- commit 6c75b55c8896e5addaf1feb2b5e93b62c2cc71dd Author: Ian Rogers <irogers@google.com> perf inject/aslr: Implement sample address remapping Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index e45f68c604937..03944677ddfcc 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -602,13 +657,413 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, [ ... ] > +#define REMAP_U64(addr_field) \ > + do { \ > + u64 remapped; \ > + if (CHECK_BOUNDS(1, 1)) { \ > + ret = -EFAULT; \ > + goto out_put; \ > + } \ > + remapped = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ > + if (orig_needs_swap) \ > + remapped = bswap_64(remapped); \ > + out_array[j++] = remapped; \ > + i++; \ > + } while (0) [Severity: High] Since perf_session__process_event() applies perf_event__all64_swap() to PERF_RECORD_SAMPLE payloads, converting all 64-bit words in event->sample.array to native endianness before the tool processes them, will applying bswap_64() here write a corrupted foreign-endian address into the output buffer? The output perf.data file is emitted with a native header, so downstream readers wouldn't know to swap these values back. [ ... ] > + for (u64 cntr = 0; cntr < nr; cntr++) { > + if (CHECK_BOUNDS(1, 1)) { > + ret = -EFAULT; > + goto out_put; > + } > + addr = in_array[i++]; > + if (addr >= PERF_CONTEXT_MAX) { > + out_array[j++] = orig_needs_swap ? bswap_64(addr) : addr; [Severity: High] Could this logic result in callchain context markers being incorrectly swapped to foreign endianness before they are written to the output event buffer? [ ... ] > + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { > + u64 nr; > + > + if (CHECK_BOUNDS(1, 1)) { > + ret = -EFAULT; > + goto out_put; > + } > + nr = in_array[i]; > + COPY_U64(); > + > + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) > + COPY_U64(); /* hw_idx */ > + > + if (nr > (ULLONG_MAX / 3)) { > + ret = -EFAULT; > + goto out_put; > + } > + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { > + ret = -EFAULT; > + goto out_put; > + } > + for (u64 cntr = 0; cntr < nr; cntr++) { > + u64 from = in_array[i++]; > + u64 to = in_array[i++]; > + > + if (orig_needs_swap) { > + from = bswap_64(from); > + to = bswap_64(to); > + } > + > + from = aslr_tool__remap_address(aslr, thread, sample->cpumode, from); > + to = aslr_tool__remap_address(aslr, thread, sample->cpumode, to); [Severity: High] If the from and to addresses are swapped to foreign endianness before calling aslr_tool__remap_address(), wouldn't this pass invalid addresses to the thread map lookup, causing it to silently fail and bypass ASLR remapping for branch stacks? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260607213700.3563842-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v18 4/5] perf aslr: Strip sample registers 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-07 21:36 ` [PATCH v18 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-07 21:36 ` Ian Rogers 2026-06-07 21:52 ` sashiko-bot 2026-06-07 21:37 ` [PATCH v18 5/5] perf test: Add inject ASLR test Ian Rogers 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-07 21:36 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Extend the ASLR tool stripping helpers to drop register dump payloads by masking out the relevant perf_event_attr fields (sample_regs_user, sample_regs_intr) when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap so that sample sizes can be properly parsed even when bits are stripped from the pipeline. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/builtin-inject.c | 28 +++- tools/perf/util/aslr.c | 292 +++++++++++++++++++++++++----------- tools/perf/util/aslr.h | 9 +- 3 files changed, 236 insertions(+), 93 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8bb37095e2de..6d6cce4765a7 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -248,7 +248,7 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!aslr_event) return -ENOMEM; memcpy(aslr_event, event, event->header.size); - aslr_tool__strip_attr_event(aslr_event, pevlist); + aslr_tool__strip_attr_event(aslr_event, *pevlist); event = aslr_event; } @@ -297,6 +297,7 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; @@ -2617,6 +2618,10 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } + + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2875,6 +2880,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2893,10 +2910,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2961,8 +2985,6 @@ int cmd_inject(int argc, const char **argv) goto out_delete; ret = __cmd_inject(&inject); - if (inject.aslr) - aslr_tool__strip_evlist(tool, inject.session->evlist); guest_session__exit(&inject.guest_session); diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 03944677ddfc..912efd111bb3 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -18,6 +18,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> #include <byteswap.h> @@ -46,6 +47,23 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; + int orig_sample_size; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; }; @@ -60,6 +78,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -123,6 +146,8 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, u64 *remapped_invariant_ptr = NULL; u64 remap_addr = 0; u8 effective_cpumode = cpumode; + struct dso *dso; + const char *dso_name; if (!aslr_thread) return 0; /* No thread. */ @@ -148,9 +173,15 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, } } + dso = map__dso(al.map); + dso_name = dso ? dso__long_name(dso) : NULL; + key.machine = maps__machine(thread__maps(aslr_thread)); - key.dso = map__dso(al.map); - key.invariant = map__start(al.map) - map__pgoff(al.map); + key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + key.invariant = map__start(al.map) - map__pgoff(al.map); + else + key.invariant = map__start(al.map); key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? kernel_pid : thread__pid(aslr_thread); @@ -662,6 +693,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -674,6 +706,10 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 addr; size_t i; size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; bool orig_needs_swap; del_tool = container_of(tool, struct delegate_tool, tool); @@ -686,7 +722,24 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -739,25 +792,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) + if (orig_sample_type & PERF_SAMPLE_TID) COPY_U64(); /* pid, tid */ - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -790,7 +843,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -858,7 +911,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = addr; } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -877,7 +930,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -928,19 +981,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, COPY_U64(); } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -970,39 +1029,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -1042,11 +1107,20 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); - + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; perf_sample__init(&new_sample, /*all=*/ true); ret = __evsel__parse_sample(evsel, new_event, &new_sample, orig_needs_swap); if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -1055,6 +1129,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1108,43 +1188,30 @@ static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __mayb return 0; } - -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist) +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist) { - struct evsel *evsel; - bool needs_swap = false; - - if (pevlist && *pevlist) { - evsel = evlist__last(*pevlist); - if (evsel) - needs_swap = evsel->needs_swap; - } + if (!evlist) + return; if (event->header.size >= (offsetof(struct perf_record_header_attr, attr.sample_type) + sizeof(u64))) { - u64 st = event->attr.attr.sample_type; - - if (needs_swap) - st = bswap_64(st); - - st &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (needs_swap) - st = bswap_64(st); - - event->attr.attr.sample_type = st; + event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (event->header.size >= + (offsetof(struct perf_record_header_attr, attr.sample_regs_user) + sizeof(u64))) + event->attr.attr.sample_regs_user = 0; + if (event->header.size >= + (offsetof(struct perf_record_header_attr, attr.sample_regs_intr) + sizeof(u64))) + event->attr.attr.sample_regs_intr = 0; } if (event->header.size >= (offsetof(struct perf_record_header_attr, attr.type) + sizeof(u32))) { u32 type = event->attr.attr.type; - if (needs_swap) - type = bswap_32(type); - if (type == PERF_TYPE_BREAKPOINT && event->header.size >= (offsetof(struct perf_record_header_attr, - attr.bp_addr) + sizeof(u64))) { + attr.bp_addr) + sizeof(u64))) { event->attr.attr.bp_addr = 0; } else if (type >= PERF_TYPE_MAX) { struct perf_pmu *pmu; @@ -1165,28 +1232,6 @@ void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlis } } -void aslr_tool__strip_evlist(struct perf_tool *tool __maybe_unused, - struct evlist *evlist) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - else if (evsel->core.attr.type >= PERF_TYPE_MAX) { - struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); - - if (pmu && (!strcmp(pmu->name, "kprobe") || - !strcmp(pmu->name, "uprobe"))) { - evsel->core.attr.config1 = 0; - evsel->core.attr.config2 = 0; - } - } - } -} - static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) { delegate_tool__init(&aslr->tool, delegate); @@ -1200,6 +1245,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1262,9 +1310,13 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); @@ -1278,3 +1330,69 @@ void aslr_tool__delete(struct perf_tool *tool) machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + priv->orig_sample_size = evsel->sample_size; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + evsel->sample_size = __evsel__sample_size(evsel->core.attr.sample_type); + evsel__calc_id_pos(evsel); + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) { + evsel->core.attr.bp_addr = 0; + } else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + evsel->sample_size = priv->orig_sample_size; + evsel__calc_id_pos(evsel); + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index 2b82f711bc67..522e31c8e2c0 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -34,8 +34,11 @@ struct evlist; union perf_event; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); -void aslr_tool__delete(struct perf_tool *aslr); -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist); -void aslr_tool__strip_evlist(struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__delete(struct perf_tool *tool); + +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v18 4/5] perf aslr: Strip sample registers 2026-06-07 21:36 ` [PATCH v18 4/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-07 21:52 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-07 21:52 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] aslr_tool__strip_attr_event corrupts the trailing ID array for events with smaller ABIs by improperly verifying bounds via event->header.size instead of event->attr.attr.size. -- commit e1654455a52059659d22e581a3b84d7f361f3820 Author: Ian Rogers <irogers@google.com> Subject: perf aslr: Strip sample registers Extend the ASLR tool stripping helpers to drop register dump payloads by masking out the relevant perf_event_attr fields (sample_regs_user, sample_regs_intr) when the delegated tool is handling the data. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > index 03944677ddfcc..912efd111bb3f 100644 > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -1108,43 +1188,30 @@ static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __mayb > return 0; > } > > - > -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist) > +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist) > { > - struct evsel *evsel; > - bool needs_swap = false; > - > - if (pevlist && *pevlist) { > - evsel = evlist__last(*pevlist); > - if (evsel) > - needs_swap = evsel->needs_swap; > - } > + if (!evlist) > + return; > > if (event->header.size >= (offsetof(struct perf_record_header_attr, > attr.sample_type) + sizeof(u64))) { > - u64 st = event->attr.attr.sample_type; > - > - if (needs_swap) > - st = bswap_64(st); > - > - st &= ASLR_SUPPORTED_SAMPLE_TYPE; > - > - if (needs_swap) > - st = bswap_64(st); > - > - event->attr.attr.sample_type = st; > + event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + > + if (event->header.size >= > + (offsetof(struct perf_record_header_attr, attr.sample_regs_user) + sizeof(u64))) > + event->attr.attr.sample_regs_user = 0; > + if (event->header.size >= > + (offsetof(struct perf_record_header_attr, attr.sample_regs_intr) + sizeof(u64))) > + event->attr.attr.sample_regs_intr = 0; [Severity: High] Can this corrupt the trailing ID array for events with smaller ABIs? Since event->header.size represents the total size of the record, it includes both the attr payload and the dynamically appended array of u64 IDs. If an older perf.data file uses an ABI where attr.size is smaller than the offset of sample_regs_user (such as ABI 0) but contains multiple appended IDs, event->header.size will still easily exceed this offset check. In this scenario, writing 0 to event->attr.attr.sample_regs_user would write past the end of the actual attr struct and silently overwrite the trailing ID array, causing downstream tools to fail to match events to their IDs. Should this bounds verification check against event->attr.attr.size (falling back to PERF_ATTR_SIZE_VER0 if zero) instead of event->header.size? > } > > if (event->header.size >= (offsetof(struct perf_record_header_attr, > attr.type) + sizeof(u32))) { > u32 type = event->attr.attr.type; > > - if (needs_swap) > - type = bswap_32(type); > - > if (type == PERF_TYPE_BREAKPOINT && -- Sashiko AI review · https://sashiko.dev/#/patchset/20260607213700.3563842-1-irogers@google.com?part=4 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v18 5/5] perf test: Add inject ASLR test 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-07 21:36 ` [PATCH v18 4/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-07 21:37 ` Ian Rogers 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-07 21:37 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into. - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Note on kernel DSO normalization in the test script: The test script deliberately normalizes all kernel DSOs to a generic [kernel] tag before diffing, as obfuscating physical kernel addresses forces perf report to occasionally shift samples between individual modules and [kernel.kallsyms] due to the lack of valid host module boundary maps. Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/tests/shell/inject_aslr.sh | 519 ++++++++++++++++++++++++++ 1 file changed, 519 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..2e469f83675e --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,519 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/urandom of=/dev/null bs=1M count=50" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1_basic" + local report2="${temp_dir}/report2_basic" + local report1_clean="${temp_dir}/report1_basic.clean" + local report2_clean="${temp_dir}/report2_basic.clean" + local diff_file="${temp_dir}/diff_basic" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1_pipe" + local report2="${temp_dir}/report2_pipe" + local report1_clean="${temp_dir}/report1_pipe.clean" + local report2_clean="${temp_dir}/report2_pipe.clean" + local diff_file="${temp_dir}/diff_pipe" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1_pipe_out" + local report2="${temp_dir}/report2_pipe_out" + local report1_clean="${temp_dir}/report1_pipe_out.clean" + local report2_clean="${temp_dir}/report2_pipe_out.clean" + local diff_file="${temp_dir}/diff_pipe_out" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record -e cycles:u --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "user regs:" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr +test_regs_stripping + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-07 21:37 ` [PATCH v18 5/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-08 5:48 ` Ian Rogers 2026-06-08 5:48 ` [PATCH v19 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (6 more replies) 5 siblings, 7 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-08 5:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - We use thread__find_map to look up the mapping immediately preceding the new one in the original address space (at start - 1). If the preceding mapping was also remapped, we place the new mapping contiguously after it in the remapped space. This preserves contiguity of split mappings (e.g., symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If no contiguous mapping is found, we insert a 1-page gap from the highest allocated address (remapped_max) to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v18: - Patch 2 & 3: Squashed the bounds checking boundary fixes into the "Strip sample registers" patch. The array bounds checking now correctly uses 'orig_sample_type' to traverse the event payload, preventing heap corruption when dealing with events that have had their registers stripped by the ASLR tool pipeline. - Patch 2 & 3: Rebased the commit series to properly isolate the sample address remapping logic from the register stripping logic. - Patch 2 & 3: Expanded commit messages to extensively document the cross-endian behavior of 'perf inject'. Because 'perf inject' effectively acts as an endianness converter (writing a host-endian PERF_MAGIC and flushing events exactly as they sit in memory after being byte-swapped by perf_event__all64_swap), all injected events must be perfectly constructed in the host's native endianness. Specifically, perf_event__all64_swap byte-swaps the raw 64-bit payloads, which causes 32-bit sequential fields like PERF_SAMPLE_TID (containing pid and tid) to have their ordering reversed in memory (e.g., [BE_pid][BE_tid] becomes [LE_tid][LE_pid]). The ASLR tool's sample construction logic was expanded to explicitly unpack these fields and repack them sequentially via unions to guarantee a strictly host-endian layout that resolves these inversion anomalies. Similarly, branch stack flags (which are modified in-place to host-endian bitfields by the parser) are copied directly to the newly synthesized event, and 'needs_swap=false' is explicitly used when re-parsing the synthesized event to prevent erroneous double swapping. - Series: Verified cross-endian robustness via the sashiko analyzer. Changes since v17: - Patch 2: Reordered ksymbol deletion logic to ensure `perf_event__process_ksymbol` deletes the map *after* the `aslr_tool__findnew_mapping` translates the unregister offsets. - Patch 2: Changed `aslr_tool__delete` to cleanly handle guest machine deletion memory leaks. - Patch 2: Resolved read-only segfaults on memory-mapped perf.data headers during attribute stripping by using deep copies in `perf_event__repipe_attr`. - Patch 2: Fixed user space remap invariant logic to include `(start - map__start(al.map))` preventing negative overflows on module offset boundaries. - Patch 3: Removed duplicate `bswap_64` payload byte-swapping inside the array logic, allowing the host endianness macros `COPY_U64()` to handle it dynamically. - Patch 3: Fixed LBR branch sample starvation by explicitly reading branch counters instead of dropping the entire sample. - Patch 5: Fixed test flakiness by grepping out physical hex addresses `0x[0-9a-f]{8,}` instead of matching exact address strings. - Patch 5: Parameterized temp reports and updated test to scale with `/dev/urandom` continuous random reads. - Patch Series: Added Signed-off-by tags uniformly and Assisted-by tags to track assistance. Changes since v16: - Patch 2: Refactored inline ASLR stripping logic out of builtin-inject.c and into dedicated helpers (aslr_tool__strip_attr_event and aslr_tool__strip_evlist) in aslr.c to better separate concerns. - Patch 2: Fixed guest machine allocation memory leak in aslr_tool__delete() where machines__exit() explicitly skipped freeing the guest processes tree. - Patch 3: Fixed bounds-check violations during cross-endian parsing inside aslr_tool__process_sample() by correctly applying bswap_64() to raw offsets, iteration counts, sizes, and addresses prior to logical evaluation when orig_needs_swap is active. - Patch 4: Fixed pipe mode parser misalignment bug by safely fetching needs_swap from the initialized evsel rather than blindly intercepting HEADER_ATTR events prior to session parsing. - Patch 4: Resolved checkpatch.pl line length warnings in the bswap_64 endianness swapping logic. - Patch Series: Reordered the final two patches. "perf aslr: Strip sample registers" is now Patch 4, and "perf test: Add inject ASLR test" is now Patch 5. This ensures the register stripping logic is fully introduced before the comprehensive shell tests validate it, preventing bisectability test failures and easing merge conflicts. - Patch 5: Fixed "User registers stripping test" starvation when run as root by explicitly using '-e cycles:u' during recording, preventing the ring buffer from overflowing with kernel samples. Changes since v15: - Patch 2: Added bounds checking for event->header.size before writing to breakpoint fields to avoid heap buffer overflow on older ABI events. - Patch 2: Fixed asymmetric calculation bug in aslr_tool__findnew_mapping() where pgoff for anonymous kernel memory was not properly subtracted upon insertion, causing the lookup addition to overflow. - Patch 2: Added detailed comments documenting the symmetric lookup and insertion math for unmapped and mapped memory blocks. - Patch 5: Add missing kprobe and uprobe scrubbing of config1 and config2 during aslr_tool__strip_evlist() to strictly conform with repipe constraints. Changes since v14: - Patch 2: Removed unnecessary vertical whitespace in builtin-inject.c. - Patch 2: Added comments explaining why pgoff is assigned for anonymous memory maps to prevent ASLR leaks. - Patch 2: Removed orig_last_end tracking and refactored contiguous mapping detection to use thread__find_map(..., start - 1, ...) based on Gabriel's feedback. - Patch 2: Scrub kprobe/uprobe event config1 and config2 fields to prevent address leaks. - Patch 2: Overwrite pgoff with the remapped start address for anonymous mappings (detected via is_anon_memory and is_no_dso_memory). - Patch 3: Fix C90 mixed declaration error for orig_needs_swap. - Patch 3: Temporarily disable evsel->needs_swap during the secondary evsel__parse_sample() call to prevent branch stack double-swapping bugs. Changes since v13: - Patch 2: Added a NULL check for env before calling perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the recorded environment has no headers. - Patch 5: Fixed sample_size and id_pos going out of sync during aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of using evsel__reset_sample_bit(), which was acting as a no-op due to early bit clearing and corrupted sample_size, the tool now directly updates sample_type and recomputes sample_size/id_pos dynamically. Added orig_sample_size to aslr_evsel_priv to correctly restore the state. Changes since v12: - Patch 2: Fixed potential NULL pointer dereference in remap_addresses__hash() when handling unmapped memory events (key->dso is NULL) under REFCNT_CHECKING. - Patch 2: Dynamically detect machine architecture bitness via perf_env__kernel_is_64_bit() to select appropriate kernel_space_start boundaries, avoiding 64-bit address injection on 32-bit platforms. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf aslr: Strip sample registers perf test: Add inject ASLR test tools/perf/builtin-inject.c | 81 +- tools/perf/tests/shell/inject_aslr.sh | 525 +++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1406 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 44 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 149 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2244 insertions(+), 71 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v19 1/5] perf maps: Add maps__mutate_mapping 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-08 5:48 ` Ian Rogers 2026-06-08 5:48 ` [PATCH v19 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (5 subsequent siblings) 6 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-08 5:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 149 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 184 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..b1b8efe42149 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,49 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + if (maps) { + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + } + + if (maps) + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + if (maps) + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +669,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +746,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 186e6d92ac3d..d1e93c0556dd 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1342,6 +1342,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1372,22 +1390,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v19 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-08 5:48 ` [PATCH v19 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-08 5:48 ` Ian Rogers 2026-06-08 5:48 ` [PATCH v19 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (4 subsequent siblings) 6 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-08 5:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/builtin-inject.c | 61 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 814 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 ++ 4 files changed, 908 insertions(+), 9 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..8bb37095e2de 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -24,6 +25,7 @@ #include "util/string2.h" #include "util/symbol.h" #include "util/synthetic-events.h" +#include "util/pmus.h" #include "util/thread.h" #include "util/namespaces.h" #include "util/unwind.h" @@ -124,6 +126,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -234,20 +237,36 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, u64 *ids; int ret; + union perf_event *aslr_event = NULL; + ret = perf_event__process_attr(tool, event, pevlist); if (ret) return ret; + if (inject->aslr) { + aslr_event = malloc(event->header.size); + if (!aslr_event) + return -ENOMEM; + memcpy(aslr_event, event, event->header.size); + aslr_tool__strip_attr_event(aslr_event, pevlist); + event = aslr_event; + } + /* If the output isn't a pipe then the attributes will be written as part of the header. */ - if (!inject->output.is_pipe) - return 0; + if (!inject->output.is_pipe) { + ret = 0; + goto out; + } - if (!inject->itrace_synth_opts.set) - return perf_event__repipe_synth(tool, event); + if (!inject->itrace_synth_opts.set) { + ret = perf_event__repipe_synth(tool, event); + goto out; + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); - return -EINVAL; + ret = -EINVAL; + goto out; } /* @@ -263,7 +282,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, raw_attr_size > event->header.size - sizeof(event->header))) { pr_err("Attribute event size %u is too small for attr.size %u\n", event->header.size, raw_attr_size); - return -EINVAL; + ret = -EINVAL; + goto out; } memset(&attr, 0, sizeof(attr)); @@ -281,8 +301,11 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; } - return perf_event__synthesize_attr(tool, &attr, (u32)n_ids, ids, + ret = perf_event__synthesize_attr(tool, &attr, (u32)n_ids, ids, perf_event__repipe_synth_cb); +out: + free(aslr_event); + return ret; } static int perf_event__repipe_event_update(const struct perf_tool *tool, @@ -2594,7 +2617,6 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } - session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2726,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2735,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2756,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2854,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2922,6 +2961,8 @@ int cmd_inject(int argc, const char **argv) goto out_delete; ret = __cmd_inject(&inject); + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); guest_session__exit(&inject.guest_session); @@ -2929,6 +2970,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4bbc78b1f741..19994e026ae5 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..56fc444fbf54 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,814 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" +#include "pmus.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start_64 = 0xffff800010000000ULL; +static const u64 kernel_space_start_32 = 0x80000000ULL; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + void *dso_ptr = key->dso ? RC_CHK_ACCESS(key->dso) : NULL; + + return (size_t)key->machine ^ (size_t)dso_ptr ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct machine *session_machine, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + /* + * The cached value is the base of the invariant. We add the + * offset into the VMA (start - map__start), plus the map's + * pgoff, to get the precise virtual address within this chunk. + */ + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + /* + * For unmapped memory (e.g. kernel anonymous), the cached value + * was stored offset by pgoff. Adding pgoff yields the true remap_addr. + */ + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + struct addr_location prev_al; + bool is_contiguous = false; + + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__end(prev_al.map) == start) + is_contiguous = true; + } + addr_location__exit(&prev_al); + + if (is_contiguous) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + struct perf_env *env = session_machine ? session_machine->env : NULL; + bool is_64 = env ? perf_env__kernel_is_64_bit(env) : (sizeof(void *) == 8); + u64 kernel_start_addr = is_64 ? kernel_space_start_64 : kernel_space_start_32; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_start_addr : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + /* + * Subtract pgoff from the base virtual address so that + * when the lookup path adds pgoff back, it perfectly + * cancels out and returns remap_addr. + */ + *new_remap_val = remap_addr - pgoff; + } + } else { + *new_remap_val = remap_addr - (al.map ? (start - map__start(al.map)) + + map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (is_anon_memory(event->mmap.filename) || is_no_dso_memory(event->mmap.filename) || + ((cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) && + !is_kernel_module(event->mmap.filename, cpumode))) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (is_anon_memory(event->mmap2.filename) || is_no_dso_memory(event->mmap2.filename) || + ((cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) && + !is_kernel_module(event->mmap2.filename, cpumode))) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + bool is_unregister; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + + is_unregister = (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER); + + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + + if (is_unregister) { + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, machine, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + } else { + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, machine, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + } + if (err) { + thread__put(thread); + return err; + } + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + + +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist __maybe_unused) +{ + u32 attr_size; + + attr_size = event->attr.attr.size ?: PERF_ATTR_SIZE_VER0; + + if (attr_size >= (offsetof(struct perf_event_attr, sample_type) + sizeof(u64))) { + event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + } + + if (attr_size >= (offsetof(struct perf_event_attr, type) + sizeof(u32))) { + u32 type = event->attr.attr.type; + + if (type == PERF_TYPE_BREAKPOINT && + attr_size >= (offsetof(struct perf_event_attr, bp_addr) + sizeof(u64))) { + event->attr.attr.bp_addr = 0; + } else if (type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu; + + pmu = perf_pmus__find_by_type(type); + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + if (attr_size >= (offsetof(struct perf_event_attr, config1) + sizeof(u64))) + event->attr.attr.config1 = 0; + if (attr_size >= (offsetof(struct perf_event_attr, config2) + sizeof(u64))) + event->attr.attr.config2 = 0; + } + } + } +} + +void aslr_tool__strip_evlist(struct perf_tool *tool __maybe_unused, + struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + struct rb_node *nd; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + + while ((nd = rb_first_cached(&aslr->machines.guests)) != NULL) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + rb_erase_cached(nd, &aslr->machines.guests); + machine__delete(machine); + } + + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..2b82f711bc67 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; +struct evlist; +union perf_event; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist); +void aslr_tool__strip_evlist(struct perf_tool *tool, struct evlist *evlist); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v19 3/5] perf inject/aslr: Implement sample address remapping 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-08 5:48 ` [PATCH v19 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-08 5:48 ` [PATCH v19 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-08 5:48 ` Ian Rogers 2026-06-08 6:00 ` sashiko-bot 2026-06-08 5:48 ` [PATCH v19 4/5] perf aslr: Strip sample registers Ian Rogers ` (3 subsequent siblings) 6 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-08 5:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Note on cross-endian compatibility: 'perf inject' functions as an endianness converter. Input files are read, and their events are byte-swapped to host endianness in memory. When the tool emits its output, it writes a host-endian PERF_MAGIC in the file header, thereby marking the output file as host-endian natively. Because the output file is always written in host endianness, events and payloads must be constructed entirely using host-endian layouts. For this reason, this patch explicitly un-packs and repacks PERF_SAMPLE_TID (and PERF_SAMPLE_CPU) using unions to ensure that the sequential 32-bit layout is correctly aligned in host endianness. Similarly, branch stack flags (which are modified in-place to host-endian bitfields by the parser) are copied directly to the newly synthesized event. When re-parsing the newly synthesized event, 'needs_swap=false' is explicitly used to prevent double swapping the already host-endian fields. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/util/aslr.c | 465 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 472 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 56fc444fbf54..c4602a43e04f 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -20,6 +20,7 @@ #include <linux/zalloc.h> #include <inttypes.h> #include <unistd.h> +#include <byteswap.h> /** * struct remap_addresses_key - Key for mapping original addresses to remapped ones. @@ -112,6 +113,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -616,13 +671,415 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + u64 remapped; \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + remapped = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + out_array[j++] = remapped; \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) { + union { + u64 val64; + u32 val32[2]; + } u; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + u.val32[0] = sample->pid; + u.val32[1] = sample->tid; + out_array[j++] = u.val64; + i++; + } + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + addr = aslr_tool__remap_address(aslr, thread, cpumode, addr); + out_array[j++] = addr; + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + u64 from = in_array[i++]; + u64 to = in_array[i++]; + + from = aslr_tool__remap_address(aslr, thread, sample->cpumode, from); + to = aslr_tool__remap_address(aslr, thread, sample->cpumode, to); + + out_array[j++] = from; + out_array[j++] = to; + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) + COPY_U64(); + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; - return delegate->sample(delegate, event, sample, machine); + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + size = in_array[i]; + COPY_U64(); + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = __evsel__parse_sample(evsel, new_event, &new_sample, /*needs_swap=*/false); + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v19 3/5] perf inject/aslr: Implement sample address remapping 2026-06-08 5:48 ` [PATCH v19 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-08 6:00 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-08 6:00 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [High] `PERF_SAMPLE_CPU` payload is blindly copied with `COPY_U64()`, failing to unpack and repack the 32-bit `cpu` and `res` fields, leading to data corruption on cross-endian hosts. -- commit e3128f8fdbd9617d2687dd070d7cac54eeaa5333 Author: Ian Rogers <irogers@google.com> perf inject/aslr: Implement sample address remapping This commit adds sample address remapping logic to the ASLR tool. It handles parsing sample events, remapping IPs, ADDRs, callchains, and branch stacks. It also aims to preserve cross-endian compatibility by properly unpacking and repacking the sequential 32-bit layouts into host endianness. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c [ ... ] > @@ -616,13 +671,415 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, [ ... ] > + if (sample_type & PERF_SAMPLE_TID) { > + union { > + u64 val64; > + u32 val32[2]; > + } u; > + > + if (CHECK_BOUNDS(1, 1)) { > + ret = -EFAULT; > + goto out_put; > + } > + u.val32[0] = sample->pid; > + u.val32[1] = sample->tid; > + out_array[j++] = u.val64; > + i++; > + } > + if (sample_type & PERF_SAMPLE_TIME) > + COPY_U64(); /* time */ > + if (sample_type & PERF_SAMPLE_ADDR) > + REMAP_U64(sample->addr); > + if (sample_type & PERF_SAMPLE_ID) > + COPY_U64(); /* id */ > + if (sample_type & PERF_SAMPLE_STREAM_ID) > + COPY_U64(); /* stream_id */ > + if (sample_type & PERF_SAMPLE_CPU) > + COPY_U64(); /* cpu, res */ [Severity: High] Will this corrupt the CPU ID on cross-endian hosts? When the perf core reads the input file, it byte-swaps all 64-bit payload fields into host endianness. For PERF_SAMPLE_CPU, which consists of two 32-bit fields (cpu and res), this 64-bit byte-swap incorrectly reverses the two fields. The code correctly unpacks and repacks PERF_SAMPLE_TID using a union to safely recover its two 32-bit fields, but uses a blind COPY_U64() here for PERF_SAMPLE_CPU in aslr_tool__process_sample(). When the injected output file is written natively in host endianness, this incorrectly-swapped 64-bit value is permanently saved. Later, when the new profile is parsed, it will read the originally empty res field into data->cpu, corrupting the CPU ID. Should PERF_SAMPLE_CPU be unpacked and repacked similarly to PERF_SAMPLE_TID? -- Sashiko AI review · https://sashiko.dev/#/patchset/20260608054841.3856224-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v19 4/5] perf aslr: Strip sample registers 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (2 preceding siblings ...) 2026-06-08 5:48 ` [PATCH v19 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-08 5:48 ` Ian Rogers 2026-06-08 5:48 ` [PATCH v19 5/5] perf test: Add inject ASLR test Ian Rogers ` (2 subsequent siblings) 6 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-08 5:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Extend the ASLR tool stripping helpers to drop register dump payloads by masking out the relevant perf_event_attr fields (sample_regs_user, sample_regs_intr) when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap so that sample sizes can be properly parsed even when bits are stripped from the pipeline. This is critical for bounded array copying within aslr_tool__process_sample, which relies on orig_sample_type to determine exactly which fields were captured by the kernel before any stripping occurred. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/builtin-inject.c | 28 +++- tools/perf/util/aslr.c | 263 +++++++++++++++++++++++++++--------- tools/perf/util/aslr.h | 9 +- 3 files changed, 230 insertions(+), 70 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8bb37095e2de..6d6cce4765a7 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -248,7 +248,7 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!aslr_event) return -ENOMEM; memcpy(aslr_event, event, event->header.size); - aslr_tool__strip_attr_event(aslr_event, pevlist); + aslr_tool__strip_attr_event(aslr_event, *pevlist); event = aslr_event; } @@ -297,6 +297,7 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; @@ -2617,6 +2618,10 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } + + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2875,6 +2880,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2893,10 +2910,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2961,8 +2985,6 @@ int cmd_inject(int argc, const char **argv) goto out_delete; ret = __cmd_inject(&inject); - if (inject.aslr) - aslr_tool__strip_evlist(tool, inject.session->evlist); guest_session__exit(&inject.guest_session); diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index c4602a43e04f..64d447565a1f 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -18,6 +18,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> #include <byteswap.h> @@ -46,6 +47,23 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; + int orig_sample_size; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; }; @@ -60,6 +78,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -123,6 +146,8 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, u64 *remapped_invariant_ptr = NULL; u64 remap_addr = 0; u8 effective_cpumode = cpumode; + struct dso *dso; + const char *dso_name; if (!aslr_thread) return 0; /* No thread. */ @@ -148,9 +173,15 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, } } + dso = map__dso(al.map); + dso_name = dso ? dso__long_name(dso) : NULL; + key.machine = maps__machine(thread__maps(aslr_thread)); - key.dso = map__dso(al.map); - key.invariant = map__start(al.map) - map__pgoff(al.map); + key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + key.invariant = map__start(al.map) - map__pgoff(al.map); + else + key.invariant = map__start(al.map); key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? kernel_pid : thread__pid(aslr_thread); @@ -676,6 +707,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -693,6 +725,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 orig_regs_user; u64 orig_regs_intr; + del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); delegate = aslr->tool.delegate; @@ -703,7 +736,24 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -754,11 +804,11 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) { + if (orig_sample_type & PERF_SAMPLE_TID) { union { u64 val64; u32 val32[2]; @@ -773,19 +823,19 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = u.val64; i++; } - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -818,7 +868,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -884,7 +934,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = addr; } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -903,7 +953,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -944,19 +994,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, COPY_U64(); } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -986,39 +1042,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -1058,11 +1120,20 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); - + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; perf_sample__init(&new_sample, /*all=*/ true); ret = __evsel__parse_sample(evsel, new_event, &new_sample, /*needs_swap=*/false); if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -1071,6 +1142,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1124,15 +1201,22 @@ static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __mayb return 0; } - -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist __maybe_unused) +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist) { u32 attr_size; + if (!evlist) + return; + attr_size = event->attr.attr.size ?: PERF_ATTR_SIZE_VER0; if (attr_size >= (offsetof(struct perf_event_attr, sample_type) + sizeof(u64))) { event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (attr_size >= (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + event->attr.attr.sample_regs_user = 0; + if (attr_size >= (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + event->attr.attr.sample_regs_intr = 0; } if (attr_size >= (offsetof(struct perf_event_attr, type) + sizeof(u32))) { @@ -1156,28 +1240,6 @@ void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlis } } -void aslr_tool__strip_evlist(struct perf_tool *tool __maybe_unused, - struct evlist *evlist) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - else if (evsel->core.attr.type >= PERF_TYPE_MAX) { - struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); - - if (pmu && (!strcmp(pmu->name, "kprobe") || - !strcmp(pmu->name, "uprobe"))) { - evsel->core.attr.config1 = 0; - evsel->core.attr.config2 = 0; - } - } - } -} - static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) { delegate_tool__init(&aslr->tool, delegate); @@ -1191,6 +1253,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1253,9 +1318,13 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); @@ -1269,3 +1338,69 @@ void aslr_tool__delete(struct perf_tool *tool) machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + priv->orig_sample_size = evsel->sample_size; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + evsel->sample_size = __evsel__sample_size(evsel->core.attr.sample_type); + evsel__calc_id_pos(evsel); + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) { + evsel->core.attr.bp_addr = 0; + } else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + evsel->sample_size = priv->orig_sample_size; + evsel__calc_id_pos(evsel); + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index 2b82f711bc67..522e31c8e2c0 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -34,8 +34,11 @@ struct evlist; union perf_event; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); -void aslr_tool__delete(struct perf_tool *aslr); -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist); -void aslr_tool__strip_evlist(struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__delete(struct perf_tool *tool); + +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v19 5/5] perf test: Add inject ASLR test 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (3 preceding siblings ...) 2026-06-08 5:48 ` [PATCH v19 4/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-08 5:48 ` Ian Rogers 2026-06-10 13:26 ` James Clark 2026-06-08 15:08 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers 6 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-08 5:48 UTC (permalink / raw) To: irogers, acme, namhyung Cc: adrian.hunter, gmx, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into. - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Note on kernel DSO normalization in the test script: The test script deliberately normalizes all kernel DSOs to a generic [kernel] tag before diffing, as obfuscating physical kernel addresses forces perf report to occasionally shift samples between individual modules and [kernel.kallsyms] due to the lack of valid host module boundary maps. Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/tests/shell/inject_aslr.sh | 525 ++++++++++++++++++++++++++ 1 file changed, 525 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..4af21ed2ce9a --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,525 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/urandom of=/dev/null bs=1M count=50" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1_basic" + local report2="${temp_dir}/report2_basic" + local report1_clean="${temp_dir}/report1_basic.clean" + local report2_clean="${temp_dir}/report2_basic.clean" + local diff_file="${temp_dir}/diff_basic" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1_pipe" + local report2="${temp_dir}/report2_pipe" + local report1_clean="${temp_dir}/report1_pipe.clean" + local report2_clean="${temp_dir}/report2_pipe.clean" + local diff_file="${temp_dir}/diff_pipe" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1_pipe_out" + local report2="${temp_dir}/report2_pipe_out" + local report1_clean="${temp_dir}/report1_pipe_out.clean" + local report2_clean="${temp_dir}/report2_pipe_out.clean" + local diff_file="${temp_dir}/diff_pipe_out" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record -e cycles:u --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "user regs:" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +test_kernel_aslr +test_kernel_report_aslr +test_regs_stripping + +cleanup ${err} +exit $err -- 2.54.0.1032.g2f8565e1d1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v19 5/5] perf test: Add inject ASLR test 2026-06-08 5:48 ` [PATCH v19 5/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-10 13:26 ` James Clark 2026-06-10 16:15 ` Ian Rogers 0 siblings, 1 reply; 183+ messages in thread From: James Clark @ 2026-06-10 13:26 UTC (permalink / raw) To: Ian Rogers Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, namhyung On 08/06/2026 6:48 am, Ian Rogers wrote: > Add a new shell test to verify the feature. The test covers: > - Basic address remapping for user space samples. > - Pipe mode coverage for piped into. > - Callchain address remapping. > - Consistency of output before and after injection. > - Pipe mode report consistency. > - Dropping of samples that leak ASLR info (physical addresses). > - Kernel address remapping (utilizing a dedicated kernel-intensive VFS > dd workload to guarantee continuous timer interrupts sampling flow > inside kernel privilege states). > - Kernel report consistency with address normalization. > > The test suite is hardened with global 'set -o pipefail' assertions > to catch pipeline failures, stream-consuming awk processors to handle > SIGPIPE signals, and a dedicated pipe output scenario validating raw > 'perf inject -o -' stdout streams. > > Note on kernel DSO normalization in the test script: > The test script deliberately normalizes all kernel DSOs to a generic > [kernel] tag before diffing, as obfuscating physical kernel addresses > forces perf report to occasionally shift samples between individual > modules and [kernel.kallsyms] due to the lack of valid host module > boundary maps. > > Signed-off-by: Ian Rogers <irogers@google.com> > Assisted-by: Antigravity:gemini-3.1-pro Hi Ian, All the tests pass for me now on x86, and the previous Arm failures are fixed. But I still have the slowness issue on Arm (takes almost an hour to complete) and one new test failure. I narrowed the slowness issue down to check_invariants(). If I compile without DEBUG=1 then it's the same speed as x86. Here's the stack where it spends all of its time: maps__split_kallsyms() at util/symbol.c:1105 __dso__load_kallsyms() at util/symbol.c:1649 dso__load_kallsyms() at util/symbol.c:1655 dso__load_kernel_sym() at util/symbol.c:2244 dso__load() at util/symbol.c:1840 map__load() at util/map.c:351 thread__find_map() at util/event.c:744 machine__resolve()at util/event.c:818 process_sample_event() at builtin-script.c:2695 evlist__deliver_sample() at util/session.c:1807 machines__deliver_event() at util/session.c:1995 perf_session__deliver_event() at util/session.c:2226 ordered_events__deliver_event() at util/session.c:134 do_flush() at util/ordered-events.c:245 __ordered_events__flush()at util/ordered-events.c:324 ordered_events__flush() at util/ordered-events.c:342 __perf_session__process_events() at util/session.c:3578 perf_session__process_events() at util/session.c:3745 __cmd_script() at builtin-script.c:3255 cmd_script() at builtin-script.c:4603 run_builtin() at perf.c:348 handle_internal_command() at perf.c:398 run_argv() at perf.c:442 main() at perf.c:549 The other issue is that test_kernel_report_aslr() fails. report_kernel1.clean looks like: 66.72% dd [kernel.kallsyms] [k] chacha_permute 7.75% dd [kernel.kallsyms] [k] chacha_block_generic 6.14% dd [kernel.kallsyms] [k] _copy_to_iter 3.88% dd [kernel.kallsyms] [k] lock_acquire 2.91% dd [kernel.kallsyms] [k] lock_release 2.75% dd [kernel.kallsyms] [k] rcu_is_watching 2.58% dd [kernel.kallsyms] [k] __arch_copy_to_user 2.26% dd [kernel.kallsyms] [k] __might_resched 1.94% dd [kernel.kallsyms] [k] get_random_bytes_user 1.29% dd [kernel.kallsyms] [k] __might_fault 0.81% dd [kernel.kallsyms] [k] debug_lockdep_rcu_enabled 0.48% dd [kernel.kallsyms] [k] __might_sleep 0.16% dd [kernel.kallsyms] [k] kmem_cache_free 0.16% dd [kernel.kallsyms] [k] seqcount_lockdep_reader_access 0.16% dd [kernel.kallsyms] [k] set_pte_range But report_kernel2.clean is maybe 10x longer and doesn't have any symbols: 8.08% dd [unknown] [k] 0xffff8000108416ec 2.75% dd [unknown] [k] 0xffff80001084170c 1.94% dd [unknown] [k] 0xffff800010176360 1.94% dd [unknown] [k] 0xffff80001084156c 1.45% dd [unknown] [k] 0xffff8000101be4bc ... I think this could be related to why it gets stuck in maps__split_kallsyms() doing check_invariants(). I'm not sure if I can do anything to help debug, or if it's working for you on Arm so we can compare our setups? > --- > tools/perf/tests/shell/inject_aslr.sh | 525 ++++++++++++++++++++++++++ > 1 file changed, 525 insertions(+) > create mode 100755 tools/perf/tests/shell/inject_aslr.sh > > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > new file mode 100755 > index 000000000000..4af21ed2ce9a > --- /dev/null > +++ b/tools/perf/tests/shell/inject_aslr.sh > @@ -0,0 +1,525 @@ > +#!/bin/bash > +# SPDX-License-Identifier: GPL-2.0 > +# perf inject --aslr test > + > +set -e > +set -o pipefail > + > +shelldir=$(dirname "$0") > +# shellcheck source=lib/perf_has_symbol.sh > +. "${shelldir}"/lib/perf_has_symbol.sh > + > +sym="noploop" > + > +skip_test_missing_symbol ${sym} > + > +# Create global temp directory > +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) > + > +prog="perf test -w noploop" > +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" > +err=0 > +kprog="dd if=/dev/urandom of=/dev/null bs=1M count=50" > + > +cleanup() { > + local exit_code=${1:-$?} > + trap - EXIT TERM INT > + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then > + echo "Test failed! Preserving temp directory: ${temp_dir}" > + return > + fi > + # Check if temp_dir is set and looks sane before removing > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > + rm -rf "${temp_dir}" > + fi > +} > + > +trap_cleanup() { > + local exit_code=$? > + echo "Unexpected signal in ${FUNCNAME[1]}" > + cleanup ${exit_code} > + exit ${exit_code} > +} > +trap trap_cleanup EXIT TERM INT > + > +get_noploop_addr() { > + local file=$1 > + perf script -i "$file" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<=NF; i++) { > + if ($i ~ /noploop\+/) { > + if (!found) { > + print $(i-1) > + found=1 > + } > + } > + } > + }' > +} > + > +test_basic_aslr() { > + echo "Test basic ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + perf inject -v --aslr -i "${data}" -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Basic ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Basic ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Basic ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Basic ASLR test [Success]" > + fi > +} > + > +test_pipe_aslr() { > + echo "Test pipe mode ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") > + > + # Use tee to save the original pipe data for comparison > + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Pipe ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Pipe ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Pipe ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Pipe ASLR test [Success]" > + fi > +} > + > +test_callchain_aslr() { > + echo "Test Callchain ASLR remapping" > + local data > + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") > + > + perf record -g -e task-clock:u -o "${data}" ${prog} > + perf inject --aslr -i "${data}" -o "${data2}" > + > + orig_addr=$(get_noploop_addr "${data}") > + new_addr=$(get_noploop_addr "${data2}") > + > + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Callchain ASLR test [Failed - no noploop samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Callchain ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Callchain ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + # Extract callchain addresses (indented lines starting with hex addresses) > + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > + > + if [ -z "$orig_callchain" ]; then > + echo "Callchain ASLR test [Failed - no callchain samples in original file]" > + err=1 > + elif [ -z "$new_callchain" ]; then > + echo "Callchain ASLR test [Failed - callchain data was dropped]" > + err=1 > + elif [ "$orig_callchain" = "$new_callchain" ]; then > + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" > + err=1 > + else > + echo "Callchain ASLR test [Success]" > + fi > + fi > +} > + > +test_report_aslr() { > + echo "Test perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + # Use -b to inject build-ids and force ordered events processing in both > + perf inject -b -i "${data}" -o "${data_clean}" > + perf inject -v -b --aslr -i "${data}" -o "${data2}" > + > + local report1="${temp_dir}/report1_basic" > + local report2="${temp_dir}/report2_basic" > + local report1_clean="${temp_dir}/report1_basic.clean" > + local report2_clean="${temp_dir}/report2_basic.clean" > + local diff_file="${temp_dir}/diff_basic" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | \ > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | \ > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Report ASLR test [Success]" > + fi > +} > + > +test_pipe_report_aslr() { > + echo "Test pipe mode perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + # Use tee to save the original pipe data, then process it with inject -b > + perf record -e task-clock:u -o - ${prog} | \ > + tee "${data}" | \ > + perf inject -b --aslr -o "${data2}" > + perf inject -b -i "${data}" -o "${data_clean}" > + > + local report1="${temp_dir}/report1_pipe" > + local report2="${temp_dir}/report2_pipe" > + local report1_clean="${temp_dir}/report1_pipe.clean" > + local report2_clean="${temp_dir}/report2_pipe.clean" > + local diff_file="${temp_dir}/diff_pipe" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${data2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | \ > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | \ > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Pipe Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Pipe Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Pipe Report ASLR test [Success]" > + fi > +} > + > +test_pipe_out_report_aslr() { > + echo "Test pipe output mode perf report consistency" > + local data > + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + > + perf record -e task-clock:u -o "${data}" ${prog} > + perf inject -b -i "${data}" -o "${data_clean}" > + > + local report1="${temp_dir}/report1_pipe_out" > + local report2="${temp_dir}/report2_pipe_out" > + local report1_clean="${temp_dir}/report1_pipe_out.clean" > + local report2_clean="${temp_dir}/report2_pipe_out.clean" > + local diff_file="${temp_dir}/diff_pipe_out" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' | \ > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | \ > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "Pipe Output Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Pipe Output Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Pipe Output Report ASLR test [Success]" > + fi > +} > + > +test_dropped_samples() { > + echo "Test dropped samples (phys-data)" > + local data > + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") > + local data2 > + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") > + > + # Check if --phys-data is supported by recording a short run > + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then > + echo "Skipping dropped samples test as --phys-data is not supported" > + return > + fi > + > + perf record -e task-clock:u --phys-data -o "${data}" ${prog} > + perf inject --aslr -i "${data}" -o "${data2}" > + > + # Verify that the original file actually contained samples! > + orig_samples=$(perf script -i "${data}" | wc -l) > + if [ "$orig_samples" -eq 0 ]; then > + echo "Dropped samples test [Failed - no samples in original file]" > + err=1 > + else > + # Verify that samples are dropped. > + samples_count=$(perf script -i "${data2}" | wc -l) > + > + if [ "$samples_count" -gt 0 ]; then > + echo "Dropped samples test [Failed - samples were not dropped]" > + err=1 > + else > + echo "Dropped samples test [Success]" > + fi > + fi > +} > + > +test_kernel_aslr() { > + echo "Test kernel ASLR remapping" > + local kdata > + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") > + local kdata2 > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") > + local log_file > + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") > + > + # Try to record kernel samples > + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then > + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" > + return > + fi > + > + # Check for warning about kernel map restriction > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" > + return > + fi > + > + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" > + > + # Check if kernel addresses are remapped. > + # Find the field that ends with :k: (the event name) and take the next field! > + orig_addr=$(perf script -i "${kdata}" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<NF; i++) { > + if ($i ~ /:[k]+:?$/) { > + if (!found) { > + print $(i+1) > + found=1 > + } > + } > + } > + }') > + new_addr=$(perf script -i "${kdata2}" | awk ' > + BEGIN { found=0 } > + { > + for (i=1; i<NF; i++) { > + if ($i ~ /:[k]+:?$/) { > + if (!found) { > + print $(i+1) > + found=1 > + } > + } > + } > + }') > + > + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > + > + if [ -z "$orig_addr" ]; then > + echo "Kernel ASLR test [Failed - no kernel samples in original file]" > + err=1 > + elif [ -z "$new_addr" ]; then > + echo "Kernel ASLR test [Failed - could not find remapped address]" > + err=1 > + elif [ "$orig_addr" = "$new_addr" ]; then > + echo "Kernel ASLR test [Failed - addresses are not remapped]" > + err=1 > + else > + echo "Kernel ASLR test [Success]" > + fi > +} > + > +test_kernel_report_aslr() { > + echo "Test kernel perf report consistency" > + local kdata > + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") > + local kdata2 > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") > + local data_clean > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > + local log_file > + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") > + > + # Try to record kernel samples > + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then > + echo "Skipping kernel report test as recording failed (maybe no permissions)" > + return > + fi > + > + # Check for warning about kernel map restriction > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" > + return > + fi > + > + # Use -b to inject build-ids and force ordered events processing in both > + perf inject -b -i "${kdata}" -o "${data_clean}" > + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" > + > + local report1="${temp_dir}/report_kernel1" > + local report2="${temp_dir}/report_kernel2" > + local report1_clean="${temp_dir}/report_kernel1.clean" > + local report2_clean="${temp_dir}/report_kernel2.clean" > + > + perf report -i "${data_clean}" --stdio > "${report1}" > + perf report -i "${kdata2}" --stdio > "${report2}" > + > + # Strip headers and compare lines with percentages > + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true > + > + # Normalize kernel DSOs and addresses in clean reports > + # This allows kernel modules to be either a module or kernel.kallsyms > + local report1_norm="${temp_dir}/report_kernel1.norm" > + local report2_norm="${temp_dir}/report_kernel2.norm" > + local diff_file="${temp_dir}/diff_kernel" > + > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ > + sort > "${report1_norm}" || true > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ > + sort > "${report2_norm}" || true > + > + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true > + > + if [ ! -s "${report1_norm}" ]; then > + echo "Kernel Report ASLR test [Failed - no samples captured]" > + err=1 > + elif [ -s "${diff_file}" ]; then > + echo "Kernel Report ASLR test [Failed - reports differ]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + else > + echo "Kernel Report ASLR test [Success]" > + fi > +} > + > +test_regs_stripping() { > + echo "Test user register stripping" > + local rdata="${temp_dir}/perf.data.regs" > + local rdata2="${temp_dir}/perf.data.regs.injected" > + local rdata_clean="${temp_dir}/perf.data.regs.clean" > + > + if ! perf record -e cycles:u --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then > + echo "Skipping user registers test as recording failed (unsupported flag/platform)" > + return > + fi > + > + perf inject -b -i "${rdata}" -o "${rdata_clean}" > + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" > + > + local report1="${temp_dir}/report_regs1" > + local report2="${temp_dir}/report_regs2" > + local report1_clean="${temp_dir}/report_regs1.clean" > + local report2_clean="${temp_dir}/report_regs2.clean" > + local diff_file="${temp_dir}/diff_regs" > + > + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true > + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true > + > + grep '%' "${report1}" | grep -v '^#' | \ > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ > + sort > "${report1_clean}" || true > + grep '%' "${report2}" | grep -v '^#' | \ > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ > + sort > "${report2_clean}" || true > + > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > + > + if [ ! -s "${report1_clean}" ]; then > + echo "User registers stripping test [Failed - profile trace starved/empty]" > + err=1 > + return > + elif [ -s "${diff_file}" ]; then > + echo "User registers stripping test [Failed - report parsing differs]" > + echo "Showing first 20 lines of diff:" > + head -n 20 "${diff_file}" > + err=1 > + return > + fi > + > + local script_dump="${temp_dir}/script_regs_dump" > + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true > + if grep -q "user regs:" "${script_dump}"; then > + echo "User registers stripping test [Failed - register dumps still present]" > + err=1 > + else > + echo "User registers stripping test [Success]" > + fi > +} > + > +test_basic_aslr > +test_pipe_aslr > +test_callchain_aslr > +test_report_aslr > +test_pipe_report_aslr > +test_pipe_out_report_aslr > +test_dropped_samples > +test_kernel_aslr > +test_kernel_report_aslr > +test_regs_stripping > + > +cleanup ${err} > +exit $err ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v19 5/5] perf test: Add inject ASLR test 2026-06-10 13:26 ` James Clark @ 2026-06-10 16:15 ` Ian Rogers 2026-06-11 8:32 ` James Clark 0 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-10 16:15 UTC (permalink / raw) To: James Clark Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, namhyung On Wed, Jun 10, 2026 at 6:26 AM James Clark <james.clark@linaro.org> wrote: > > > > On 08/06/2026 6:48 am, Ian Rogers wrote: > > Add a new shell test to verify the feature. The test covers: > > - Basic address remapping for user space samples. > > - Pipe mode coverage for piped into. > > - Callchain address remapping. > > - Consistency of output before and after injection. > > - Pipe mode report consistency. > > - Dropping of samples that leak ASLR info (physical addresses). > > - Kernel address remapping (utilizing a dedicated kernel-intensive VFS > > dd workload to guarantee continuous timer interrupts sampling flow > > inside kernel privilege states). > > - Kernel report consistency with address normalization. > > > > The test suite is hardened with global 'set -o pipefail' assertions > > to catch pipeline failures, stream-consuming awk processors to handle > > SIGPIPE signals, and a dedicated pipe output scenario validating raw > > 'perf inject -o -' stdout streams. > > > > Note on kernel DSO normalization in the test script: > > The test script deliberately normalizes all kernel DSOs to a generic > > [kernel] tag before diffing, as obfuscating physical kernel addresses > > forces perf report to occasionally shift samples between individual > > modules and [kernel.kallsyms] due to the lack of valid host module > > boundary maps. > > > > Signed-off-by: Ian Rogers <irogers@google.com> > > Assisted-by: Antigravity:gemini-3.1-pro > > Hi Ian, > > All the tests pass for me now on x86, and the previous Arm failures are > fixed. But I still have the slowness issue on Arm (takes almost an hour > to complete) and one new test failure. > > I narrowed the slowness issue down to check_invariants(). If I compile > without DEBUG=1 then it's the same speed as x86. Here's the stack where > it spends all of its time: > > maps__split_kallsyms() at util/symbol.c:1105 > __dso__load_kallsyms() at util/symbol.c:1649 > dso__load_kallsyms() at util/symbol.c:1655 > dso__load_kernel_sym() at util/symbol.c:2244 > dso__load() at util/symbol.c:1840 > map__load() at util/map.c:351 > thread__find_map() at util/event.c:744 > machine__resolve()at util/event.c:818 > process_sample_event() at builtin-script.c:2695 > evlist__deliver_sample() at util/session.c:1807 > machines__deliver_event() at util/session.c:1995 > perf_session__deliver_event() at util/session.c:2226 > ordered_events__deliver_event() at util/session.c:134 > do_flush() at util/ordered-events.c:245 > __ordered_events__flush()at util/ordered-events.c:324 > ordered_events__flush() at util/ordered-events.c:342 > __perf_session__process_events() at util/session.c:3578 > perf_session__process_events() at util/session.c:3745 > __cmd_script() at builtin-script.c:3255 > cmd_script() at builtin-script.c:4603 > run_builtin() at perf.c:348 > handle_internal_command() at perf.c:398 > run_argv() at perf.c:442 > main() at perf.c:549 > > The other issue is that test_kernel_report_aslr() fails. > report_kernel1.clean looks like: > > 66.72% dd [kernel.kallsyms] [k] chacha_permute > 7.75% dd [kernel.kallsyms] [k] chacha_block_generic > 6.14% dd [kernel.kallsyms] [k] _copy_to_iter > 3.88% dd [kernel.kallsyms] [k] lock_acquire > 2.91% dd [kernel.kallsyms] [k] lock_release > 2.75% dd [kernel.kallsyms] [k] rcu_is_watching > 2.58% dd [kernel.kallsyms] [k] __arch_copy_to_user > 2.26% dd [kernel.kallsyms] [k] __might_resched > 1.94% dd [kernel.kallsyms] [k] get_random_bytes_user > 1.29% dd [kernel.kallsyms] [k] __might_fault > 0.81% dd [kernel.kallsyms] [k] debug_lockdep_rcu_enabled > 0.48% dd [kernel.kallsyms] [k] __might_sleep > 0.16% dd [kernel.kallsyms] [k] kmem_cache_free > 0.16% dd [kernel.kallsyms] [k] seqcount_lockdep_reader_access > 0.16% dd [kernel.kallsyms] [k] set_pte_range > > But report_kernel2.clean is maybe 10x longer and doesn't have any symbols: > > 8.08% dd [unknown] [k] 0xffff8000108416ec > 2.75% dd [unknown] [k] 0xffff80001084170c > 1.94% dd [unknown] [k] 0xffff800010176360 > 1.94% dd [unknown] [k] 0xffff80001084156c > 1.45% dd [unknown] [k] 0xffff8000101be4bc > ... > > I think this could be related to why it gets stuck in > maps__split_kallsyms() doing check_invariants(). I'm not sure if I can > do anything to help debug, or if it's working for you on Arm so we can > compare our setups? So my focus has been making this solid on x86 and I hadn't been testing on ARM. Kernel symbolization is always a pain. Perhaps for now we should just skip the ASLR testing on ARM, and fix it in follow-up patches. Thanks, Ian > > --- > > tools/perf/tests/shell/inject_aslr.sh | 525 ++++++++++++++++++++++++++ > > 1 file changed, 525 insertions(+) > > create mode 100755 tools/perf/tests/shell/inject_aslr.sh > > > > diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh > > new file mode 100755 > > index 000000000000..4af21ed2ce9a > > --- /dev/null > > +++ b/tools/perf/tests/shell/inject_aslr.sh > > @@ -0,0 +1,525 @@ > > +#!/bin/bash > > +# SPDX-License-Identifier: GPL-2.0 > > +# perf inject --aslr test > > + > > +set -e > > +set -o pipefail > > + > > +shelldir=$(dirname "$0") > > +# shellcheck source=lib/perf_has_symbol.sh > > +. "${shelldir}"/lib/perf_has_symbol.sh > > + > > +sym="noploop" > > + > > +skip_test_missing_symbol ${sym} > > + > > +# Create global temp directory > > +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) > > + > > +prog="perf test -w noploop" > > +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" > > +err=0 > > +kprog="dd if=/dev/urandom of=/dev/null bs=1M count=50" > > + > > +cleanup() { > > + local exit_code=${1:-$?} > > + trap - EXIT TERM INT > > + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then > > + echo "Test failed! Preserving temp directory: ${temp_dir}" > > + return > > + fi > > + # Check if temp_dir is set and looks sane before removing > > + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then > > + rm -rf "${temp_dir}" > > + fi > > +} > > + > > +trap_cleanup() { > > + local exit_code=$? > > + echo "Unexpected signal in ${FUNCNAME[1]}" > > + cleanup ${exit_code} > > + exit ${exit_code} > > +} > > +trap trap_cleanup EXIT TERM INT > > + > > +get_noploop_addr() { > > + local file=$1 > > + perf script -i "$file" | awk ' > > + BEGIN { found=0 } > > + { > > + for (i=1; i<=NF; i++) { > > + if ($i ~ /noploop\+/) { > > + if (!found) { > > + print $(i-1) > > + found=1 > > + } > > + } > > + } > > + }' > > +} > > + > > +test_basic_aslr() { > > + echo "Test basic ASLR remapping" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") > > + > > + perf record -e task-clock:u -o "${data}" ${prog} > > + perf inject -v --aslr -i "${data}" -o "${data2}" > > + > > + orig_addr=$(get_noploop_addr "${data}") > > + new_addr=$(get_noploop_addr "${data2}") > > + > > + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > > + > > + if [ -z "$orig_addr" ]; then > > + echo "Basic ASLR test [Failed - no noploop samples in original file]" > > + err=1 > > + elif [ -z "$new_addr" ]; then > > + echo "Basic ASLR test [Failed - could not find remapped address]" > > + err=1 > > + elif [ "$orig_addr" = "$new_addr" ]; then > > + echo "Basic ASLR test [Failed - addresses are not remapped]" > > + err=1 > > + else > > + echo "Basic ASLR test [Success]" > > + fi > > +} > > + > > +test_pipe_aslr() { > > + echo "Test pipe mode ASLR remapping" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") > > + > > + # Use tee to save the original pipe data for comparison > > + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" > > + > > + orig_addr=$(get_noploop_addr "${data}") > > + new_addr=$(get_noploop_addr "${data2}") > > + > > + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > > + > > + if [ -z "$orig_addr" ]; then > > + echo "Pipe ASLR test [Failed - no noploop samples in original file]" > > + err=1 > > + elif [ -z "$new_addr" ]; then > > + echo "Pipe ASLR test [Failed - could not find remapped address]" > > + err=1 > > + elif [ "$orig_addr" = "$new_addr" ]; then > > + echo "Pipe ASLR test [Failed - addresses are not remapped]" > > + err=1 > > + else > > + echo "Pipe ASLR test [Success]" > > + fi > > +} > > + > > +test_callchain_aslr() { > > + echo "Test Callchain ASLR remapping" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") > > + > > + perf record -g -e task-clock:u -o "${data}" ${prog} > > + perf inject --aslr -i "${data}" -o "${data2}" > > + > > + orig_addr=$(get_noploop_addr "${data}") > > + new_addr=$(get_noploop_addr "${data2}") > > + > > + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > > + > > + if [ -z "$orig_addr" ]; then > > + echo "Callchain ASLR test [Failed - no noploop samples in original file]" > > + err=1 > > + elif [ -z "$new_addr" ]; then > > + echo "Callchain ASLR test [Failed - could not find remapped address]" > > + err=1 > > + elif [ "$orig_addr" = "$new_addr" ]; then > > + echo "Callchain ASLR test [Failed - addresses are not remapped]" > > + err=1 > > + else > > + # Extract callchain addresses (indented lines starting with hex addresses) > > + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > > + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') > > + > > + if [ -z "$orig_callchain" ]; then > > + echo "Callchain ASLR test [Failed - no callchain samples in original file]" > > + err=1 > > + elif [ -z "$new_callchain" ]; then > > + echo "Callchain ASLR test [Failed - callchain data was dropped]" > > + err=1 > > + elif [ "$orig_callchain" = "$new_callchain" ]; then > > + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" > > + err=1 > > + else > > + echo "Callchain ASLR test [Success]" > > + fi > > + fi > > +} > > + > > +test_report_aslr() { > > + echo "Test perf report consistency" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") > > + local data_clean > > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > > + > > + perf record -e task-clock:u -o "${data}" ${prog} > > + # Use -b to inject build-ids and force ordered events processing in both > > + perf inject -b -i "${data}" -o "${data_clean}" > > + perf inject -v -b --aslr -i "${data}" -o "${data2}" > > + > > + local report1="${temp_dir}/report1_basic" > > + local report2="${temp_dir}/report2_basic" > > + local report1_clean="${temp_dir}/report1_basic.clean" > > + local report2_clean="${temp_dir}/report2_basic.clean" > > + local diff_file="${temp_dir}/diff_basic" > > + > > + perf report -i "${data_clean}" --stdio > "${report1}" > > + perf report -i "${data2}" --stdio > "${report2}" > > + > > + # Strip headers and compare lines with percentages > > + grep '%' "${report1}" | grep -v '^#' | \ > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' | \ > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true > > + > > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_clean}" ]; then > > + echo "Report ASLR test [Failed - no samples captured]" > > + err=1 > > + elif [ -s "${diff_file}" ]; then > > + echo "Report ASLR test [Failed - reports differ]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + else > > + echo "Report ASLR test [Success]" > > + fi > > +} > > + > > +test_pipe_report_aslr() { > > + echo "Test pipe mode perf report consistency" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") > > + local data_clean > > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > > + > > + # Use tee to save the original pipe data, then process it with inject -b > > + perf record -e task-clock:u -o - ${prog} | \ > > + tee "${data}" | \ > > + perf inject -b --aslr -o "${data2}" > > + perf inject -b -i "${data}" -o "${data_clean}" > > + > > + local report1="${temp_dir}/report1_pipe" > > + local report2="${temp_dir}/report2_pipe" > > + local report1_clean="${temp_dir}/report1_pipe.clean" > > + local report2_clean="${temp_dir}/report2_pipe.clean" > > + local diff_file="${temp_dir}/diff_pipe" > > + > > + perf report -i "${data_clean}" --stdio > "${report1}" > > + perf report -i "${data2}" --stdio > "${report2}" > > + > > + # Strip headers and compare lines with percentages > > + grep '%' "${report1}" | grep -v '^#' | \ > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' | \ > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true > > + > > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_clean}" ]; then > > + echo "Pipe Report ASLR test [Failed - no samples captured]" > > + err=1 > > + elif [ -s "${diff_file}" ]; then > > + echo "Pipe Report ASLR test [Failed - reports differ]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + else > > + echo "Pipe Report ASLR test [Success]" > > + fi > > +} > > + > > +test_pipe_out_report_aslr() { > > + echo "Test pipe output mode perf report consistency" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") > > + local data_clean > > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > > + > > + perf record -e task-clock:u -o "${data}" ${prog} > > + perf inject -b -i "${data}" -o "${data_clean}" > > + > > + local report1="${temp_dir}/report1_pipe_out" > > + local report2="${temp_dir}/report2_pipe_out" > > + local report1_clean="${temp_dir}/report1_pipe_out.clean" > > + local report2_clean="${temp_dir}/report2_pipe_out.clean" > > + local diff_file="${temp_dir}/diff_pipe_out" > > + > > + perf report -i "${data_clean}" --stdio > "${report1}" > > + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" > > + > > + # Strip headers and compare lines with percentages > > + grep '%' "${report1}" | grep -v '^#' | \ > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' | \ > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true > > + > > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_clean}" ]; then > > + echo "Pipe Output Report ASLR test [Failed - no samples captured]" > > + err=1 > > + elif [ -s "${diff_file}" ]; then > > + echo "Pipe Output Report ASLR test [Failed - reports differ]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + else > > + echo "Pipe Output Report ASLR test [Success]" > > + fi > > +} > > + > > +test_dropped_samples() { > > + echo "Test dropped samples (phys-data)" > > + local data > > + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") > > + local data2 > > + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") > > + > > + # Check if --phys-data is supported by recording a short run > > + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then > > + echo "Skipping dropped samples test as --phys-data is not supported" > > + return > > + fi > > + > > + perf record -e task-clock:u --phys-data -o "${data}" ${prog} > > + perf inject --aslr -i "${data}" -o "${data2}" > > + > > + # Verify that the original file actually contained samples! > > + orig_samples=$(perf script -i "${data}" | wc -l) > > + if [ "$orig_samples" -eq 0 ]; then > > + echo "Dropped samples test [Failed - no samples in original file]" > > + err=1 > > + else > > + # Verify that samples are dropped. > > + samples_count=$(perf script -i "${data2}" | wc -l) > > + > > + if [ "$samples_count" -gt 0 ]; then > > + echo "Dropped samples test [Failed - samples were not dropped]" > > + err=1 > > + else > > + echo "Dropped samples test [Success]" > > + fi > > + fi > > +} > > + > > +test_kernel_aslr() { > > + echo "Test kernel ASLR remapping" > > + local kdata > > + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") > > + local kdata2 > > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") > > + local log_file > > + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") > > + > > + # Try to record kernel samples > > + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then > > + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" > > + return > > + fi > > + > > + # Check for warning about kernel map restriction > > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > > + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" > > + return > > + fi > > + > > + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" > > + > > + # Check if kernel addresses are remapped. > > + # Find the field that ends with :k: (the event name) and take the next field! > > + orig_addr=$(perf script -i "${kdata}" | awk ' > > + BEGIN { found=0 } > > + { > > + for (i=1; i<NF; i++) { > > + if ($i ~ /:[k]+:?$/) { > > + if (!found) { > > + print $(i+1) > > + found=1 > > + } > > + } > > + } > > + }') > > + new_addr=$(perf script -i "${kdata2}" | awk ' > > + BEGIN { found=0 } > > + { > > + for (i=1; i<NF; i++) { > > + if ($i ~ /:[k]+:?$/) { > > + if (!found) { > > + print $(i+1) > > + found=1 > > + } > > + } > > + } > > + }') > > + > > + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" > > + > > + if [ -z "$orig_addr" ]; then > > + echo "Kernel ASLR test [Failed - no kernel samples in original file]" > > + err=1 > > + elif [ -z "$new_addr" ]; then > > + echo "Kernel ASLR test [Failed - could not find remapped address]" > > + err=1 > > + elif [ "$orig_addr" = "$new_addr" ]; then > > + echo "Kernel ASLR test [Failed - addresses are not remapped]" > > + err=1 > > + else > > + echo "Kernel ASLR test [Success]" > > + fi > > +} > > + > > +test_kernel_report_aslr() { > > + echo "Test kernel perf report consistency" > > + local kdata > > + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") > > + local kdata2 > > + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") > > + local data_clean > > + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") > > + local log_file > > + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") > > + > > + # Try to record kernel samples > > + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then > > + echo "Skipping kernel report test as recording failed (maybe no permissions)" > > + return > > + fi > > + > > + # Check for warning about kernel map restriction > > + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then > > + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" > > + return > > + fi > > + > > + # Use -b to inject build-ids and force ordered events processing in both > > + perf inject -b -i "${kdata}" -o "${data_clean}" > > + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" > > + > > + local report1="${temp_dir}/report_kernel1" > > + local report2="${temp_dir}/report_kernel2" > > + local report1_clean="${temp_dir}/report_kernel1.clean" > > + local report2_clean="${temp_dir}/report_kernel2.clean" > > + > > + perf report -i "${data_clean}" --stdio > "${report1}" > > + perf report -i "${kdata2}" --stdio > "${report2}" > > + > > + # Strip headers and compare lines with percentages > > + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true > > + > > + # Normalize kernel DSOs and addresses in clean reports > > + # This allows kernel modules to be either a module or kernel.kallsyms > > + local report1_norm="${temp_dir}/report_kernel1.norm" > > + local report2_norm="${temp_dir}/report_kernel2.norm" > > + local diff_file="${temp_dir}/diff_kernel" > > + > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ > > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ > > + sort > "${report1_norm}" || true > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ > > + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ > > + sort > "${report2_norm}" || true > > + > > + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_norm}" ]; then > > + echo "Kernel Report ASLR test [Failed - no samples captured]" > > + err=1 > > + elif [ -s "${diff_file}" ]; then > > + echo "Kernel Report ASLR test [Failed - reports differ]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + else > > + echo "Kernel Report ASLR test [Success]" > > + fi > > +} > > + > > +test_regs_stripping() { > > + echo "Test user register stripping" > > + local rdata="${temp_dir}/perf.data.regs" > > + local rdata2="${temp_dir}/perf.data.regs.injected" > > + local rdata_clean="${temp_dir}/perf.data.regs.clean" > > + > > + if ! perf record -e cycles:u --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then > > + echo "Skipping user registers test as recording failed (unsupported flag/platform)" > > + return > > + fi > > + > > + perf inject -b -i "${rdata}" -o "${rdata_clean}" > > + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" > > + > > + local report1="${temp_dir}/report_regs1" > > + local report2="${temp_dir}/report_regs2" > > + local report1_clean="${temp_dir}/report_regs1.clean" > > + local report2_clean="${temp_dir}/report_regs2.clean" > > + local diff_file="${temp_dir}/diff_regs" > > + > > + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true > > + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true > > + > > + grep '%' "${report1}" | grep -v '^#' | \ > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ > > + sort > "${report1_clean}" || true > > + grep '%' "${report2}" | grep -v '^#' | \ > > + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ > > + sort > "${report2_clean}" || true > > + > > + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true > > + > > + if [ ! -s "${report1_clean}" ]; then > > + echo "User registers stripping test [Failed - profile trace starved/empty]" > > + err=1 > > + return > > + elif [ -s "${diff_file}" ]; then > > + echo "User registers stripping test [Failed - report parsing differs]" > > + echo "Showing first 20 lines of diff:" > > + head -n 20 "${diff_file}" > > + err=1 > > + return > > + fi > > + > > + local script_dump="${temp_dir}/script_regs_dump" > > + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true > > + if grep -q "user regs:" "${script_dump}"; then > > + echo "User registers stripping test [Failed - register dumps still present]" > > + err=1 > > + else > > + echo "User registers stripping test [Success]" > > + fi > > +} > > + > > +test_basic_aslr > > +test_pipe_aslr > > +test_callchain_aslr > > +test_report_aslr > > +test_pipe_report_aslr > > +test_pipe_out_report_aslr > > +test_dropped_samples > > +test_kernel_aslr > > +test_kernel_report_aslr > > +test_regs_stripping > > + > > +cleanup ${err} > > +exit $err > ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v19 5/5] perf test: Add inject ASLR test 2026-06-10 16:15 ` Ian Rogers @ 2026-06-11 8:32 ` James Clark 0 siblings, 0 replies; 183+ messages in thread From: James Clark @ 2026-06-11 8:32 UTC (permalink / raw) To: Ian Rogers Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz, acme, namhyung On 10/06/2026 5:15 pm, Ian Rogers wrote: > On Wed, Jun 10, 2026 at 6:26 AM James Clark <james.clark@linaro.org> wrote: >> >> >> >> On 08/06/2026 6:48 am, Ian Rogers wrote: >>> Add a new shell test to verify the feature. The test covers: >>> - Basic address remapping for user space samples. >>> - Pipe mode coverage for piped into. >>> - Callchain address remapping. >>> - Consistency of output before and after injection. >>> - Pipe mode report consistency. >>> - Dropping of samples that leak ASLR info (physical addresses). >>> - Kernel address remapping (utilizing a dedicated kernel-intensive VFS >>> dd workload to guarantee continuous timer interrupts sampling flow >>> inside kernel privilege states). >>> - Kernel report consistency with address normalization. >>> >>> The test suite is hardened with global 'set -o pipefail' assertions >>> to catch pipeline failures, stream-consuming awk processors to handle >>> SIGPIPE signals, and a dedicated pipe output scenario validating raw >>> 'perf inject -o -' stdout streams. >>> >>> Note on kernel DSO normalization in the test script: >>> The test script deliberately normalizes all kernel DSOs to a generic >>> [kernel] tag before diffing, as obfuscating physical kernel addresses >>> forces perf report to occasionally shift samples between individual >>> modules and [kernel.kallsyms] due to the lack of valid host module >>> boundary maps. >>> >>> Signed-off-by: Ian Rogers <irogers@google.com> >>> Assisted-by: Antigravity:gemini-3.1-pro >> >> Hi Ian, >> >> All the tests pass for me now on x86, and the previous Arm failures are >> fixed. But I still have the slowness issue on Arm (takes almost an hour >> to complete) and one new test failure. >> >> I narrowed the slowness issue down to check_invariants(). If I compile >> without DEBUG=1 then it's the same speed as x86. Here's the stack where >> it spends all of its time: >> >> maps__split_kallsyms() at util/symbol.c:1105 >> __dso__load_kallsyms() at util/symbol.c:1649 >> dso__load_kallsyms() at util/symbol.c:1655 >> dso__load_kernel_sym() at util/symbol.c:2244 >> dso__load() at util/symbol.c:1840 >> map__load() at util/map.c:351 >> thread__find_map() at util/event.c:744 >> machine__resolve()at util/event.c:818 >> process_sample_event() at builtin-script.c:2695 >> evlist__deliver_sample() at util/session.c:1807 >> machines__deliver_event() at util/session.c:1995 >> perf_session__deliver_event() at util/session.c:2226 >> ordered_events__deliver_event() at util/session.c:134 >> do_flush() at util/ordered-events.c:245 >> __ordered_events__flush()at util/ordered-events.c:324 >> ordered_events__flush() at util/ordered-events.c:342 >> __perf_session__process_events() at util/session.c:3578 >> perf_session__process_events() at util/session.c:3745 >> __cmd_script() at builtin-script.c:3255 >> cmd_script() at builtin-script.c:4603 >> run_builtin() at perf.c:348 >> handle_internal_command() at perf.c:398 >> run_argv() at perf.c:442 >> main() at perf.c:549 >> >> The other issue is that test_kernel_report_aslr() fails. >> report_kernel1.clean looks like: >> >> 66.72% dd [kernel.kallsyms] [k] chacha_permute >> 7.75% dd [kernel.kallsyms] [k] chacha_block_generic >> 6.14% dd [kernel.kallsyms] [k] _copy_to_iter >> 3.88% dd [kernel.kallsyms] [k] lock_acquire >> 2.91% dd [kernel.kallsyms] [k] lock_release >> 2.75% dd [kernel.kallsyms] [k] rcu_is_watching >> 2.58% dd [kernel.kallsyms] [k] __arch_copy_to_user >> 2.26% dd [kernel.kallsyms] [k] __might_resched >> 1.94% dd [kernel.kallsyms] [k] get_random_bytes_user >> 1.29% dd [kernel.kallsyms] [k] __might_fault >> 0.81% dd [kernel.kallsyms] [k] debug_lockdep_rcu_enabled >> 0.48% dd [kernel.kallsyms] [k] __might_sleep >> 0.16% dd [kernel.kallsyms] [k] kmem_cache_free >> 0.16% dd [kernel.kallsyms] [k] seqcount_lockdep_reader_access >> 0.16% dd [kernel.kallsyms] [k] set_pte_range >> >> But report_kernel2.clean is maybe 10x longer and doesn't have any symbols: >> >> 8.08% dd [unknown] [k] 0xffff8000108416ec >> 2.75% dd [unknown] [k] 0xffff80001084170c >> 1.94% dd [unknown] [k] 0xffff800010176360 >> 1.94% dd [unknown] [k] 0xffff80001084156c >> 1.45% dd [unknown] [k] 0xffff8000101be4bc >> ... >> >> I think this could be related to why it gets stuck in >> maps__split_kallsyms() doing check_invariants(). I'm not sure if I can >> do anything to help debug, or if it's working for you on Arm so we can >> compare our setups? > > So my focus has been making this solid on x86 and I hadn't been > testing on ARM. Kernel symbolization is always a pain. Perhaps for now > we should just skip the ASLR testing on ARM, and fix it in follow-up > patches. > > Thanks, > Ian > Makes sense, you'll probably have to skip both test_kernel_report_aslr() and test_kernel_aslr() even though the second one passes eventually because it takes so long. >>> --- >>> tools/perf/tests/shell/inject_aslr.sh | 525 ++++++++++++++++++++++++++ >>> 1 file changed, 525 insertions(+) >>> create mode 100755 tools/perf/tests/shell/inject_aslr.sh >>> >>> diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh >>> new file mode 100755 >>> index 000000000000..4af21ed2ce9a >>> --- /dev/null >>> +++ b/tools/perf/tests/shell/inject_aslr.sh >>> @@ -0,0 +1,525 @@ >>> +#!/bin/bash >>> +# SPDX-License-Identifier: GPL-2.0 >>> +# perf inject --aslr test >>> + >>> +set -e >>> +set -o pipefail >>> + >>> +shelldir=$(dirname "$0") >>> +# shellcheck source=lib/perf_has_symbol.sh >>> +. "${shelldir}"/lib/perf_has_symbol.sh >>> + >>> +sym="noploop" >>> + >>> +skip_test_missing_symbol ${sym} >>> + >>> +# Create global temp directory >>> +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) >>> + >>> +prog="perf test -w noploop" >>> +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" >>> +err=0 >>> +kprog="dd if=/dev/urandom of=/dev/null bs=1M count=50" >>> + >>> +cleanup() { >>> + local exit_code=${1:-$?} >>> + trap - EXIT TERM INT >>> + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then >>> + echo "Test failed! Preserving temp directory: ${temp_dir}" >>> + return >>> + fi >>> + # Check if temp_dir is set and looks sane before removing >>> + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then >>> + rm -rf "${temp_dir}" >>> + fi >>> +} >>> + >>> +trap_cleanup() { >>> + local exit_code=$? >>> + echo "Unexpected signal in ${FUNCNAME[1]}" >>> + cleanup ${exit_code} >>> + exit ${exit_code} >>> +} >>> +trap trap_cleanup EXIT TERM INT >>> + >>> +get_noploop_addr() { >>> + local file=$1 >>> + perf script -i "$file" | awk ' >>> + BEGIN { found=0 } >>> + { >>> + for (i=1; i<=NF; i++) { >>> + if ($i ~ /noploop\+/) { >>> + if (!found) { >>> + print $(i-1) >>> + found=1 >>> + } >>> + } >>> + } >>> + }' >>> +} >>> + >>> +test_basic_aslr() { >>> + echo "Test basic ASLR remapping" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") >>> + >>> + perf record -e task-clock:u -o "${data}" ${prog} >>> + perf inject -v --aslr -i "${data}" -o "${data2}" >>> + >>> + orig_addr=$(get_noploop_addr "${data}") >>> + new_addr=$(get_noploop_addr "${data2}") >>> + >>> + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>> + >>> + if [ -z "$orig_addr" ]; then >>> + echo "Basic ASLR test [Failed - no noploop samples in original file]" >>> + err=1 >>> + elif [ -z "$new_addr" ]; then >>> + echo "Basic ASLR test [Failed - could not find remapped address]" >>> + err=1 >>> + elif [ "$orig_addr" = "$new_addr" ]; then >>> + echo "Basic ASLR test [Failed - addresses are not remapped]" >>> + err=1 >>> + else >>> + echo "Basic ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_pipe_aslr() { >>> + echo "Test pipe mode ASLR remapping" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") >>> + >>> + # Use tee to save the original pipe data for comparison >>> + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" >>> + >>> + orig_addr=$(get_noploop_addr "${data}") >>> + new_addr=$(get_noploop_addr "${data2}") >>> + >>> + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>> + >>> + if [ -z "$orig_addr" ]; then >>> + echo "Pipe ASLR test [Failed - no noploop samples in original file]" >>> + err=1 >>> + elif [ -z "$new_addr" ]; then >>> + echo "Pipe ASLR test [Failed - could not find remapped address]" >>> + err=1 >>> + elif [ "$orig_addr" = "$new_addr" ]; then >>> + echo "Pipe ASLR test [Failed - addresses are not remapped]" >>> + err=1 >>> + else >>> + echo "Pipe ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_callchain_aslr() { >>> + echo "Test Callchain ASLR remapping" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") >>> + >>> + perf record -g -e task-clock:u -o "${data}" ${prog} >>> + perf inject --aslr -i "${data}" -o "${data2}" >>> + >>> + orig_addr=$(get_noploop_addr "${data}") >>> + new_addr=$(get_noploop_addr "${data2}") >>> + >>> + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>> + >>> + if [ -z "$orig_addr" ]; then >>> + echo "Callchain ASLR test [Failed - no noploop samples in original file]" >>> + err=1 >>> + elif [ -z "$new_addr" ]; then >>> + echo "Callchain ASLR test [Failed - could not find remapped address]" >>> + err=1 >>> + elif [ "$orig_addr" = "$new_addr" ]; then >>> + echo "Callchain ASLR test [Failed - addresses are not remapped]" >>> + err=1 >>> + else >>> + # Extract callchain addresses (indented lines starting with hex addresses) >>> + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') >>> + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') >>> + >>> + if [ -z "$orig_callchain" ]; then >>> + echo "Callchain ASLR test [Failed - no callchain samples in original file]" >>> + err=1 >>> + elif [ -z "$new_callchain" ]; then >>> + echo "Callchain ASLR test [Failed - callchain data was dropped]" >>> + err=1 >>> + elif [ "$orig_callchain" = "$new_callchain" ]; then >>> + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" >>> + err=1 >>> + else >>> + echo "Callchain ASLR test [Success]" >>> + fi >>> + fi >>> +} >>> + >>> +test_report_aslr() { >>> + echo "Test perf report consistency" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") >>> + local data_clean >>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>> + >>> + perf record -e task-clock:u -o "${data}" ${prog} >>> + # Use -b to inject build-ids and force ordered events processing in both >>> + perf inject -b -i "${data}" -o "${data_clean}" >>> + perf inject -v -b --aslr -i "${data}" -o "${data2}" >>> + >>> + local report1="${temp_dir}/report1_basic" >>> + local report2="${temp_dir}/report2_basic" >>> + local report1_clean="${temp_dir}/report1_basic.clean" >>> + local report2_clean="${temp_dir}/report2_basic.clean" >>> + local diff_file="${temp_dir}/diff_basic" >>> + >>> + perf report -i "${data_clean}" --stdio > "${report1}" >>> + perf report -i "${data2}" --stdio > "${report2}" >>> + >>> + # Strip headers and compare lines with percentages >>> + grep '%' "${report1}" | grep -v '^#' | \ >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' | \ >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true >>> + >>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_clean}" ]; then >>> + echo "Report ASLR test [Failed - no samples captured]" >>> + err=1 >>> + elif [ -s "${diff_file}" ]; then >>> + echo "Report ASLR test [Failed - reports differ]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + else >>> + echo "Report ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_pipe_report_aslr() { >>> + echo "Test pipe mode perf report consistency" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") >>> + local data_clean >>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>> + >>> + # Use tee to save the original pipe data, then process it with inject -b >>> + perf record -e task-clock:u -o - ${prog} | \ >>> + tee "${data}" | \ >>> + perf inject -b --aslr -o "${data2}" >>> + perf inject -b -i "${data}" -o "${data_clean}" >>> + >>> + local report1="${temp_dir}/report1_pipe" >>> + local report2="${temp_dir}/report2_pipe" >>> + local report1_clean="${temp_dir}/report1_pipe.clean" >>> + local report2_clean="${temp_dir}/report2_pipe.clean" >>> + local diff_file="${temp_dir}/diff_pipe" >>> + >>> + perf report -i "${data_clean}" --stdio > "${report1}" >>> + perf report -i "${data2}" --stdio > "${report2}" >>> + >>> + # Strip headers and compare lines with percentages >>> + grep '%' "${report1}" | grep -v '^#' | \ >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' | \ >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true >>> + >>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_clean}" ]; then >>> + echo "Pipe Report ASLR test [Failed - no samples captured]" >>> + err=1 >>> + elif [ -s "${diff_file}" ]; then >>> + echo "Pipe Report ASLR test [Failed - reports differ]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + else >>> + echo "Pipe Report ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_pipe_out_report_aslr() { >>> + echo "Test pipe output mode perf report consistency" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") >>> + local data_clean >>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>> + >>> + perf record -e task-clock:u -o "${data}" ${prog} >>> + perf inject -b -i "${data}" -o "${data_clean}" >>> + >>> + local report1="${temp_dir}/report1_pipe_out" >>> + local report2="${temp_dir}/report2_pipe_out" >>> + local report1_clean="${temp_dir}/report1_pipe_out.clean" >>> + local report2_clean="${temp_dir}/report2_pipe_out.clean" >>> + local diff_file="${temp_dir}/diff_pipe_out" >>> + >>> + perf report -i "${data_clean}" --stdio > "${report1}" >>> + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" >>> + >>> + # Strip headers and compare lines with percentages >>> + grep '%' "${report1}" | grep -v '^#' | \ >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' | \ >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true >>> + >>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_clean}" ]; then >>> + echo "Pipe Output Report ASLR test [Failed - no samples captured]" >>> + err=1 >>> + elif [ -s "${diff_file}" ]; then >>> + echo "Pipe Output Report ASLR test [Failed - reports differ]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + else >>> + echo "Pipe Output Report ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_dropped_samples() { >>> + echo "Test dropped samples (phys-data)" >>> + local data >>> + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") >>> + local data2 >>> + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") >>> + >>> + # Check if --phys-data is supported by recording a short run >>> + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then >>> + echo "Skipping dropped samples test as --phys-data is not supported" >>> + return >>> + fi >>> + >>> + perf record -e task-clock:u --phys-data -o "${data}" ${prog} >>> + perf inject --aslr -i "${data}" -o "${data2}" >>> + >>> + # Verify that the original file actually contained samples! >>> + orig_samples=$(perf script -i "${data}" | wc -l) >>> + if [ "$orig_samples" -eq 0 ]; then >>> + echo "Dropped samples test [Failed - no samples in original file]" >>> + err=1 >>> + else >>> + # Verify that samples are dropped. >>> + samples_count=$(perf script -i "${data2}" | wc -l) >>> + >>> + if [ "$samples_count" -gt 0 ]; then >>> + echo "Dropped samples test [Failed - samples were not dropped]" >>> + err=1 >>> + else >>> + echo "Dropped samples test [Success]" >>> + fi >>> + fi >>> +} >>> + >>> +test_kernel_aslr() { >>> + echo "Test kernel ASLR remapping" >>> + local kdata >>> + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") >>> + local kdata2 >>> + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") >>> + local log_file >>> + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") >>> + >>> + # Try to record kernel samples >>> + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then >>> + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" >>> + return >>> + fi >>> + >>> + # Check for warning about kernel map restriction >>> + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then >>> + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" >>> + return >>> + fi >>> + >>> + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" >>> + >>> + # Check if kernel addresses are remapped. >>> + # Find the field that ends with :k: (the event name) and take the next field! >>> + orig_addr=$(perf script -i "${kdata}" | awk ' >>> + BEGIN { found=0 } >>> + { >>> + for (i=1; i<NF; i++) { >>> + if ($i ~ /:[k]+:?$/) { >>> + if (!found) { >>> + print $(i+1) >>> + found=1 >>> + } >>> + } >>> + } >>> + }') >>> + new_addr=$(perf script -i "${kdata2}" | awk ' >>> + BEGIN { found=0 } >>> + { >>> + for (i=1; i<NF; i++) { >>> + if ($i ~ /:[k]+:?$/) { >>> + if (!found) { >>> + print $(i+1) >>> + found=1 >>> + } >>> + } >>> + } >>> + }') >>> + >>> + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" >>> + >>> + if [ -z "$orig_addr" ]; then >>> + echo "Kernel ASLR test [Failed - no kernel samples in original file]" >>> + err=1 >>> + elif [ -z "$new_addr" ]; then >>> + echo "Kernel ASLR test [Failed - could not find remapped address]" >>> + err=1 >>> + elif [ "$orig_addr" = "$new_addr" ]; then >>> + echo "Kernel ASLR test [Failed - addresses are not remapped]" >>> + err=1 >>> + else >>> + echo "Kernel ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_kernel_report_aslr() { >>> + echo "Test kernel perf report consistency" >>> + local kdata >>> + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") >>> + local kdata2 >>> + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") >>> + local data_clean >>> + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") >>> + local log_file >>> + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") >>> + >>> + # Try to record kernel samples >>> + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then >>> + echo "Skipping kernel report test as recording failed (maybe no permissions)" >>> + return >>> + fi >>> + >>> + # Check for warning about kernel map restriction >>> + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then >>> + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" >>> + return >>> + fi >>> + >>> + # Use -b to inject build-ids and force ordered events processing in both >>> + perf inject -b -i "${kdata}" -o "${data_clean}" >>> + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" >>> + >>> + local report1="${temp_dir}/report_kernel1" >>> + local report2="${temp_dir}/report_kernel2" >>> + local report1_clean="${temp_dir}/report_kernel1.clean" >>> + local report2_clean="${temp_dir}/report_kernel2.clean" >>> + >>> + perf report -i "${data_clean}" --stdio > "${report1}" >>> + perf report -i "${kdata2}" --stdio > "${report2}" >>> + >>> + # Strip headers and compare lines with percentages >>> + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true >>> + >>> + # Normalize kernel DSOs and addresses in clean reports >>> + # This allows kernel modules to be either a module or kernel.kallsyms >>> + local report1_norm="${temp_dir}/report_kernel1.norm" >>> + local report2_norm="${temp_dir}/report_kernel2.norm" >>> + local diff_file="${temp_dir}/diff_kernel" >>> + >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ >>> + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ >>> + sort > "${report1_norm}" || true >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ >>> + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ >>> + sort > "${report2_norm}" || true >>> + >>> + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_norm}" ]; then >>> + echo "Kernel Report ASLR test [Failed - no samples captured]" >>> + err=1 >>> + elif [ -s "${diff_file}" ]; then >>> + echo "Kernel Report ASLR test [Failed - reports differ]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + else >>> + echo "Kernel Report ASLR test [Success]" >>> + fi >>> +} >>> + >>> +test_regs_stripping() { >>> + echo "Test user register stripping" >>> + local rdata="${temp_dir}/perf.data.regs" >>> + local rdata2="${temp_dir}/perf.data.regs.injected" >>> + local rdata_clean="${temp_dir}/perf.data.regs.clean" >>> + >>> + if ! perf record -e cycles:u --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then >>> + echo "Skipping user registers test as recording failed (unsupported flag/platform)" >>> + return >>> + fi >>> + >>> + perf inject -b -i "${rdata}" -o "${rdata_clean}" >>> + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" >>> + >>> + local report1="${temp_dir}/report_regs1" >>> + local report2="${temp_dir}/report_regs2" >>> + local report1_clean="${temp_dir}/report_regs1.clean" >>> + local report2_clean="${temp_dir}/report_regs2.clean" >>> + local diff_file="${temp_dir}/diff_regs" >>> + >>> + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true >>> + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true >>> + >>> + grep '%' "${report1}" | grep -v '^#' | \ >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ >>> + sort > "${report1_clean}" || true >>> + grep '%' "${report2}" | grep -v '^#' | \ >>> + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ >>> + sort > "${report2_clean}" || true >>> + >>> + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true >>> + >>> + if [ ! -s "${report1_clean}" ]; then >>> + echo "User registers stripping test [Failed - profile trace starved/empty]" >>> + err=1 >>> + return >>> + elif [ -s "${diff_file}" ]; then >>> + echo "User registers stripping test [Failed - report parsing differs]" >>> + echo "Showing first 20 lines of diff:" >>> + head -n 20 "${diff_file}" >>> + err=1 >>> + return >>> + fi >>> + >>> + local script_dump="${temp_dir}/script_regs_dump" >>> + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true >>> + if grep -q "user regs:" "${script_dump}"; then >>> + echo "User registers stripping test [Failed - register dumps still present]" >>> + err=1 >>> + else >>> + echo "User registers stripping test [Success]" >>> + fi >>> +} >>> + >>> +test_basic_aslr >>> +test_pipe_aslr >>> +test_callchain_aslr >>> +test_report_aslr >>> +test_pipe_report_aslr >>> +test_pipe_out_report_aslr >>> +test_dropped_samples >>> +test_kernel_aslr >>> +test_kernel_report_aslr >>> +test_regs_stripping >>> + >>> +cleanup ${err} >>> +exit $err >> ^ permalink raw reply [flat|nested] 183+ messages in thread
* Re: [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (4 preceding siblings ...) 2026-06-08 5:48 ` [PATCH v19 5/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-08 15:08 ` Ian Rogers 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers 6 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-08 15:08 UTC (permalink / raw) To: irogers, acme, namhyung, gmx Cc: adrian.hunter, james.clark, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Sun, Jun 7, 2026 at 10:48 PM Ian Rogers <irogers@google.com> wrote: > > This patch series introduces the new 'perf inject --aslr' feature to > remap virtual memory addresses or drop physical memory event leaks > when profile record data is shared between machines. Bundled with this > feature is a bug fix inside the core map tracking tool that hardens > perf session analysis against concurrent lookup data races. So the sashiko review is down to just 1 "high" category issue on patch 3: """ Will this corrupt the CPU ID on cross-endian hosts? When the perf core reads the input file, it byte-swaps all 64-bit payload fields into host endianness. For PERF_SAMPLE_CPU, which consists of two 32-bit fields (cpu and res), this 64-bit byte-swap incorrectly reverses the two fields. The code correctly unpacks and repacks PERF_SAMPLE_TID using a union to safely recover its two 32-bit fields, but uses a blind COPY_U64() here for PERF_SAMPLE_CPU in aslr_tool__process_sample(). When the injected output file is written natively in host endianness, this incorrectly-swapped 64-bit value is permanently saved. Later, when the new profile is parsed, it will read the originally empty res field into data->cpu, corrupting the CPU ID. Should PERF_SAMPLE_CPU be unpacked and repacked similarly to PERF_SAMPLE_TID? """ So the problem is that cross-endian (perf.data is big-endian and output is little-endian, or vice versa) is broken all over the place is perf inject. The event swapping code doesn't unswap when writing the data out, for example. Nearly all of the last 10 workarounds attempted to resolve cross-endian issues, but this is a much larger job and belongs on the TODO list. Thanks, Ian > Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: > > The ASLR tool virtualizes the address space of the recorded processes by > intercepting MMAP and MMAP2 events to build a consistent translation > database, which is subsequently used to rewrite sample addresses. > > It maintains two primary lookup databases using hash maps: > 1. 'remap_addresses': Maps an original mapping key to its new remapped > base address. The key uses topological invariant coordinates: > (machine, dso, invariant). The invariant is computed as (start - pgoff) > for DSO-backed mappings. This invariant remains constant even when > perf's internal overlap-resolution splits a VMA into fragmented > pieces, ensuring split maps resolve consistently back to the same > remapped base. > 2. 'top_addresses': Tracks the allocation state per process (machine, pid). > It maintains 'remapped_max' (the highest allocated address in the > virtualized space). > > For each MMAP/MMAP2 event: > - We look up the DSO and invariant key in 'remap_addresses'. If found, we > reuse the translation, preserving the offset within the mapping. > - If not found, we allocate a new remapped address space: > - We use thread__find_map to look up the mapping immediately preceding > the new one in the original address space (at start - 1). If > the preceding > mapping was also remapped, we place the new mapping > contiguously after it in the remapped space. This preserves > contiguity of split mappings (e.g., symbols split by HugeTLB, > or anonymous .bss segments adjacent to initialized data). > - If no contiguous mapping is found, we insert a 1-page gap from > the highest allocated address (remapped_max) to prevent accidental > merging of unrelated VMAs. > - The event's start address (and pgoff for kernel maps) is rewritten, > and the event is delegated to the output writer. > > To remain strictly conservative and guarantee security, the tool > scrubs breakpoint addresses (bp_addr) from all synthesized stream > headers, completely drops PERF_RECORD_TEXT_POKE events to prevent > absolute immediate pointer operands leaks, and drops unsupported > complex payloads (such as user register stacks, raw tracepoints, and > hardware AUX tracing frames). > > Verification is reinforced with shell test ('inject_aslr.sh'). > > Prerequisite Bug Fix (Patch 1). During development, a core map > indexing issue was identified and resolved to prevent concurrent > lookup data races during session analysis. > > Changes since v18: > - Patch 2 & 3: Squashed the bounds checking boundary fixes into the "Strip > sample registers" patch. The array bounds checking now correctly uses > 'orig_sample_type' to traverse the event payload, preventing heap > corruption when dealing with events that have had their registers > stripped by the ASLR tool pipeline. > - Patch 2 & 3: Rebased the commit series to properly isolate the sample > address remapping logic from the register stripping logic. > - Patch 2 & 3: Expanded commit messages to extensively document the > cross-endian behavior of 'perf inject'. Because 'perf inject' effectively > acts as an endianness converter (writing a host-endian PERF_MAGIC and > flushing events exactly as they sit in memory after being byte-swapped > by perf_event__all64_swap), all injected events must be perfectly > constructed in the host's native endianness. Specifically, > perf_event__all64_swap byte-swaps the raw 64-bit payloads, which causes > 32-bit sequential fields like PERF_SAMPLE_TID (containing pid and tid) > to have their ordering reversed in memory (e.g., [BE_pid][BE_tid] becomes > [LE_tid][LE_pid]). The ASLR tool's sample construction logic was > expanded to explicitly unpack these fields and repack them sequentially > via unions to guarantee a strictly host-endian layout that resolves > these inversion anomalies. Similarly, branch stack flags (which are > modified in-place to host-endian bitfields by the parser) are copied > directly to the newly synthesized event, and 'needs_swap=false' is explicitly > used when re-parsing the synthesized event to prevent erroneous double > swapping. > - Series: Verified cross-endian robustness via the sashiko analyzer. > > Changes since v17: > - Patch 2: Reordered ksymbol deletion logic to ensure > `perf_event__process_ksymbol` deletes the map *after* the > `aslr_tool__findnew_mapping` translates the unregister offsets. > - Patch 2: Changed `aslr_tool__delete` to cleanly handle guest machine > deletion memory leaks. > - Patch 2: Resolved read-only segfaults on memory-mapped perf.data > headers during attribute stripping by using deep copies in > `perf_event__repipe_attr`. > - Patch 2: Fixed user space remap invariant logic to include > `(start - map__start(al.map))` preventing negative overflows on module > offset boundaries. > - Patch 3: Removed duplicate `bswap_64` payload byte-swapping inside the > array logic, allowing the host endianness macros `COPY_U64()` to > handle it dynamically. > - Patch 3: Fixed LBR branch sample starvation by explicitly reading branch > counters instead of dropping the entire sample. > - Patch 5: Fixed test flakiness by grepping out physical hex addresses > `0x[0-9a-f]{8,}` instead of matching exact address strings. > - Patch 5: Parameterized temp reports and updated test to scale with > `/dev/urandom` continuous random reads. > - Patch Series: Added Signed-off-by tags uniformly and Assisted-by tags to > track assistance. > > Changes since v16: > - Patch 2: Refactored inline ASLR stripping logic out of builtin-inject.c > and into dedicated helpers (aslr_tool__strip_attr_event and > aslr_tool__strip_evlist) in aslr.c to better separate concerns. > - Patch 2: Fixed guest machine allocation memory leak in > aslr_tool__delete() where machines__exit() explicitly skipped freeing > the guest processes tree. > - Patch 3: Fixed bounds-check violations during cross-endian parsing inside > aslr_tool__process_sample() by correctly applying bswap_64() to raw > offsets, iteration counts, sizes, and addresses prior to logical > evaluation when orig_needs_swap is active. > - Patch 4: Fixed pipe mode parser misalignment bug by safely fetching > needs_swap from the initialized evsel rather than blindly intercepting > HEADER_ATTR events prior to session parsing. > - Patch 4: Resolved checkpatch.pl line length warnings in the bswap_64 > endianness swapping logic. > - Patch Series: Reordered the final two patches. "perf aslr: Strip > sample registers" is now Patch 4, and "perf test: Add inject ASLR > test" is now Patch 5. This ensures the register stripping logic > is fully introduced before the comprehensive shell tests validate it, > preventing bisectability test failures and easing merge conflicts. > - Patch 5: Fixed "User registers stripping test" starvation when run as > root by explicitly using '-e cycles:u' during recording, preventing > the ring buffer from overflowing with kernel samples. > > Changes since v15: > - Patch 2: Added bounds checking for event->header.size before writing > to breakpoint fields to avoid heap buffer overflow on older ABI events. > - Patch 2: Fixed asymmetric calculation bug in aslr_tool__findnew_mapping() > where pgoff for anonymous kernel memory was not properly subtracted upon > insertion, causing the lookup addition to overflow. > - Patch 2: Added detailed comments documenting the symmetric lookup and > insertion math for unmapped and mapped memory blocks. > - Patch 5: Add missing kprobe and uprobe scrubbing of config1 and > config2 during aslr_tool__strip_evlist() to strictly conform with > repipe constraints. > > Changes since v14: > - Patch 2: Removed unnecessary vertical whitespace in builtin-inject.c. > - Patch 2: Added comments explaining why pgoff is assigned for > anonymous memory maps to prevent ASLR leaks. > - Patch 2: Removed orig_last_end tracking and refactored contiguous mapping > detection to use thread__find_map(..., start - 1, ...) based on Gabriel's > feedback. > - Patch 2: Scrub kprobe/uprobe event config1 and config2 fields to prevent > address leaks. > - Patch 2: Overwrite pgoff with the remapped start address for anonymous > mappings (detected via is_anon_memory and is_no_dso_memory). > - Patch 3: Fix C90 mixed declaration error for orig_needs_swap. > - Patch 3: Temporarily disable evsel->needs_swap during the secondary > evsel__parse_sample() call to prevent branch stack double-swapping bugs. > > Changes since v13: > - Patch 2: Added a NULL check for env before calling > perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the > recorded environment has no headers. > - Patch 5: Fixed sample_size and id_pos going out of sync during > aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of > using evsel__reset_sample_bit(), which was acting as a no-op due to > early bit clearing and corrupted sample_size, the tool now directly > updates sample_type and recomputes sample_size/id_pos dynamically. > Added orig_sample_size to aslr_evsel_priv to correctly restore the > state. > > Changes since v12: > - Patch 2: Fixed potential NULL pointer dereference in > remap_addresses__hash() when handling unmapped memory events (key->dso > is NULL) under REFCNT_CHECKING. > - Patch 2: Dynamically detect machine architecture bitness via > perf_env__kernel_is_64_bit() to select appropriate kernel_space_start > boundaries, avoiding 64-bit address injection on 32-bit platforms. > > Changes since v11: > - Patch 1: Fixed struct dso name accessor in maps.c by using > dso__name() instead of ->name. > - Patch 2: Fixed hash function in aslr.c to hash the underlying > dso pointer using RC_CHK_ACCESS to support reference count checking. > > Changes since v10: > - Patch 1: Added explicit tracking array logic in maps__load_maps() > to correctly accumulate valid maps (skipping NULL entries after > failures) and safely return the exact populated count, resolving > out-of-bounds pointer iteration panics. > - Patch 3: Fixed endianness bug during cross-endian sample parsing > by passing evsel->needs_swap instead of false to __evsel__parse_sample > in aslr.c, ensuring correct 32-bit field byte unswapping for packed > fields. Refactored evsel__parse_sample to take a needs_swap argument > via __evsel__parse_sample. > - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions > to capture and propagate the correct pipeline failure status code > instead of unconditionally returning success or failing the test. > > Changes since v9: > - Patch 1: Added `-ENOMEM` error check inside > `maps__find_symbol_by_name()` and return `NULL` early. Added map > sorting state invalidation on early return in `maps__load_maps()`. > - Patch 2: Fixed encapsulation by using `thread__maps()` and > `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added > `pr_warning_once` warning when raw auxtrace data is dropped. > - Patch 3: Fixed encapsulation by using `thread__maps()` and > `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped > `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid > branch stack endianness corruption on cross-endian files. Fixed ISO > C90 warning for declaration-after-statement for `orig_needs_swap`. > - Patch 4: Fixed duplicate cleanup by explicitly removing trap > handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. > - Patch 5: Fixed heap corruption by adding size bounds checking before > writing to `sample_regs_user` and `sample_regs_intr` fields. Added > missing register mask clearing logic for the `itrace` synthesis path > of `perf_event__repipe_attr()`. > > Ian Rogers (5): > perf maps: Add maps__mutate_mapping > perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking > perf inject/aslr: Implement sample address remapping > perf aslr: Strip sample registers > perf test: Add inject ASLR test > > tools/perf/builtin-inject.c | 81 +- > tools/perf/tests/shell/inject_aslr.sh | 525 +++++++++ > tools/perf/util/Build | 1 + > tools/perf/util/aslr.c | 1406 +++++++++++++++++++++++++ > tools/perf/util/aslr.h | 44 + > tools/perf/util/evsel.c | 6 +- > tools/perf/util/evsel.h | 10 +- > tools/perf/util/machine.c | 32 +- > tools/perf/util/maps.c | 149 ++- > tools/perf/util/maps.h | 3 + > tools/perf/util/symbol-elf.c | 41 +- > tools/perf/util/symbol.c | 17 +- > 12 files changed, 2244 insertions(+), 71 deletions(-) > create mode 100755 tools/perf/tests/shell/inject_aslr.sh > create mode 100644 tools/perf/util/aslr.c > create mode 100644 tools/perf/util/aslr.h > > -- > 2.54.0.1032.g2f8565e1d1-goog > ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v20 0/5] perf tools: Add inject --aslr feature 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers ` (5 preceding siblings ...) 2026-06-08 15:08 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers @ 2026-06-11 16:41 ` Ian Rogers 2026-06-11 16:41 ` [PATCH v20 1/5] perf maps: Add maps__mutate_mapping Ian Rogers ` (5 more replies) 6 siblings, 6 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-11 16:41 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz This patch series introduces the new 'perf inject --aslr' feature to remap virtual memory addresses or drop physical memory event leaks when profile record data is shared between machines. Bundled with this feature is a bug fix inside the core map tracking tool that hardens perf session analysis against concurrent lookup data races. Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: The ASLR tool virtualizes the address space of the recorded processes by intercepting MMAP and MMAP2 events to build a consistent translation database, which is subsequently used to rewrite sample addresses. It maintains two primary lookup databases using hash maps: 1. 'remap_addresses': Maps an original mapping key to its new remapped base address. The key uses topological invariant coordinates: (machine, dso, invariant). The invariant is computed as (start - pgoff) for DSO-backed mappings. This invariant remains constant even when perf's internal overlap-resolution splits a VMA into fragmented pieces, ensuring split maps resolve consistently back to the same remapped base. 2. 'top_addresses': Tracks the allocation state per process (machine, pid). It maintains 'remapped_max' (the highest allocated address in the virtualized space). For each MMAP/MMAP2 event: - We look up the DSO and invariant key in 'remap_addresses'. If found, we reuse the translation, preserving the offset within the mapping. - If not found, we allocate a new remapped address space: - We use thread__find_map to look up the mapping immediately preceding the new one in the original address space (at start - 1). If the preceding mapping was also remapped, we place the new mapping contiguously after it in the remapped space. This preserves contiguity of split mappings (e.g., symbols split by HugeTLB, or anonymous .bss segments adjacent to initialized data). - If no contiguous mapping is found, we insert a 1-page gap from the highest allocated address (remapped_max) to prevent accidental merging of unrelated VMAs. - The event's start address (and pgoff for kernel maps) is rewritten, and the event is delegated to the output writer. To remain strictly conservative and guarantee security, the tool scrubs breakpoint addresses (bp_addr) from all synthesized stream headers, completely drops PERF_RECORD_TEXT_POKE events to prevent absolute immediate pointer operands leaks, and drops unsupported complex payloads (such as user register stacks, raw tracepoints, and hardware AUX tracing frames). Verification is reinforced with shell test ('inject_aslr.sh'). Prerequisite Bug Fix (Patch 1). During development, a core map indexing issue was identified and resolved to prevent concurrent lookup data races during session analysis. Changes since v19: - Patch 1: Group lock and unlock operations inside maps__mutate_mapping() into a single conditional block to resolve Clang 15 -Wthread-safety-analysis compilation errors. - Patch 5: Skip kernel-based ASLR test cases (test_kernel_aslr and test_kernel_report_aslr) on ARM architectures (aarch64 and arm*) to bypass high latency constraints and symbolization inconsistencies. Changes since v18: - Patch 2 & 3: Squashed the bounds checking boundary fixes into the "Strip sample registers" patch. The array bounds checking now correctly uses 'orig_sample_type' to traverse the event payload, preventing heap corruption when dealing with events that have had their registers stripped by the ASLR tool pipeline. - Patch 2 & 3: Rebased the commit series to properly isolate the sample address remapping logic from the register stripping logic. - Patch 2 & 3: Expanded commit messages to extensively document the cross-endian behavior of 'perf inject'. Because 'perf inject' effectively acts as an endianness converter (writing a host-endian PERF_MAGIC and flushing events exactly as they sit in memory after being byte-swapped by perf_event__all64_swap), all injected events must be perfectly constructed in the host's native endianness. Specifically, perf_event__all64_swap byte-swaps the raw 64-bit payloads, which causes 32-bit sequential fields like PERF_SAMPLE_TID (containing pid and tid) to have their ordering reversed in memory (e.g., [BE_pid][BE_tid] becomes [LE_tid][LE_pid]). The ASLR tool's sample construction logic was expanded to explicitly unpack these fields and repack them sequentially via unions to guarantee a strictly host-endian layout that resolves these inversion anomalies. Similarly, branch stack flags (which are modified in-place to host-endian bitfields by the parser) are copied directly to the newly synthesized event, and 'needs_swap=false' is explicitly used when re-parsing the synthesized event to prevent erroneous double swapping. - Series: Verified cross-endian robustness via the sashiko analyzer. Changes since v17: - Patch 2: Reordered ksymbol deletion logic to ensure `perf_event__process_ksymbol` deletes the map *after* the `aslr_tool__findnew_mapping` translates the unregister offsets. - Patch 2: Changed `aslr_tool__delete` to cleanly handle guest machine deletion memory leaks. - Patch 2: Resolved read-only segfaults on memory-mapped perf.data headers during attribute stripping by using deep copies in `perf_event__repipe_attr`. - Patch 2: Fixed user space remap invariant logic to include `(start - map__start(al.map))` preventing negative overflows on module offset boundaries. - Patch 3: Removed duplicate `bswap_64` payload byte-swapping inside the array logic, allowing the host endianness macros `COPY_U64()` to handle it dynamically. - Patch 3: Fixed LBR branch sample starvation by explicitly reading branch counters instead of dropping the entire sample. - Patch 5: Fixed test flakiness by grepping out physical hex addresses `0x[0-9a-f]{8,}` instead of matching exact address strings. - Patch 5: Parameterized temp reports and updated test to scale with `/dev/urandom` continuous random reads. - Patch Series: Added Signed-off-by tags uniformly and Assisted-by tags to track assistance. Changes since v16: - Patch 2: Refactored inline ASLR stripping logic out of builtin-inject.c and into dedicated helpers (aslr_tool__strip_attr_event and aslr_tool__strip_evlist) in aslr.c to better separate concerns. - Patch 2: Fixed guest machine allocation memory leak in aslr_tool__delete() where machines__exit() explicitly skipped freeing the guest processes tree. - Patch 3: Fixed bounds-check violations during cross-endian parsing inside aslr_tool__process_sample() by correctly applying bswap_64() to raw offsets, iteration counts, sizes, and addresses prior to logical evaluation when orig_needs_swap is active. - Patch 4: Fixed pipe mode parser misalignment bug by safely fetching needs_swap from the initialized evsel rather than blindly intercepting HEADER_ATTR events prior to session parsing. - Patch 4: Resolved checkpatch.pl line length warnings in the bswap_64 endianness swapping logic. - Patch Series: Reordered the final two patches. "perf aslr: Strip sample registers" is now Patch 4, and "perf test: Add inject ASLR test" is now Patch 5. This ensures the register stripping logic is fully introduced before the comprehensive shell tests validate it, preventing bisectability test failures and easing merge conflicts. - Patch 5: Fixed "User registers stripping test" starvation when run as root by explicitly using '-e cycles:u' during recording, preventing the ring buffer from overflowing with kernel samples. Changes since v15: - Patch 2: Added bounds checking for event->header.size before writing to breakpoint fields to avoid heap buffer overflow on older ABI events. - Patch 2: Fixed asymmetric calculation bug in aslr_tool__findnew_mapping() where pgoff for anonymous kernel memory was not properly subtracted upon insertion, causing the lookup addition to overflow. - Patch 2: Added detailed comments documenting the symmetric lookup and insertion math for unmapped and mapped memory blocks. - Patch 5: Add missing kprobe and uprobe scrubbing of config1 and config2 during aslr_tool__strip_evlist() to strictly conform with repipe constraints. Changes since v14: - Patch 2: Removed unnecessary vertical whitespace in builtin-inject.c. - Patch 2: Added comments explaining why pgoff is assigned for anonymous memory maps to prevent ASLR leaks. - Patch 2: Removed orig_last_end tracking and refactored contiguous mapping detection to use thread__find_map(..., start - 1, ...) based on Gabriel's feedback. - Patch 2: Scrub kprobe/uprobe event config1 and config2 fields to prevent address leaks. - Patch 2: Overwrite pgoff with the remapped start address for anonymous mappings (detected via is_anon_memory and is_no_dso_memory). - Patch 3: Fix C90 mixed declaration error for orig_needs_swap. - Patch 3: Temporarily disable evsel->needs_swap during the secondary evsel__parse_sample() call to prevent branch stack double-swapping bugs. Changes since v13: - Patch 2: Added a NULL check for env before calling perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the recorded environment has no headers. - Patch 5: Fixed sample_size and id_pos going out of sync during aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of using evsel__reset_sample_bit(), which was acting as a no-op due to early bit clearing and corrupted sample_size, the tool now directly updates sample_type and recomputes sample_size/id_pos dynamically. Added orig_sample_size to aslr_evsel_priv to correctly restore the state. Changes since v12: - Patch 2: Fixed potential NULL pointer dereference in remap_addresses__hash() when handling unmapped memory events (key->dso is NULL) under REFCNT_CHECKING. - Patch 2: Dynamically detect machine architecture bitness via perf_env__kernel_is_64_bit() to select appropriate kernel_space_start boundaries, avoiding 64-bit address injection on 32-bit platforms. Changes since v11: - Patch 1: Fixed struct dso name accessor in maps.c by using dso__name() instead of ->name. - Patch 2: Fixed hash function in aslr.c to hash the underlying dso pointer using RC_CHK_ACCESS to support reference count checking. Changes since v10: - Patch 1: Added explicit tracking array logic in maps__load_maps() to correctly accumulate valid maps (skipping NULL entries after failures) and safely return the exact populated count, resolving out-of-bounds pointer iteration panics. - Patch 3: Fixed endianness bug during cross-endian sample parsing by passing evsel->needs_swap instead of false to __evsel__parse_sample in aslr.c, ensuring correct 32-bit field byte unswapping for packed fields. Refactored evsel__parse_sample to take a needs_swap argument via __evsel__parse_sample. - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions to capture and propagate the correct pipeline failure status code instead of unconditionally returning success or failing the test. Changes since v9: - Patch 1: Added `-ENOMEM` error check inside `maps__find_symbol_by_name()` and return `NULL` early. Added map sorting state invalidation on early return in `maps__load_maps()`. - Patch 2: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added `pr_warning_once` warning when raw auxtrace data is dropped. - Patch 3: Fixed encapsulation by using `thread__maps()` and `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid branch stack endianness corruption on cross-endian files. Fixed ISO C90 warning for declaration-after-statement for `orig_needs_swap`. - Patch 4: Fixed duplicate cleanup by explicitly removing trap handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. - Patch 5: Fixed heap corruption by adding size bounds checking before writing to `sample_regs_user` and `sample_regs_intr` fields. Added missing register mask clearing logic for the `itrace` synthesis path of `perf_event__repipe_attr()`. Ian Rogers (5): perf maps: Add maps__mutate_mapping perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking perf inject/aslr: Implement sample address remapping perf aslr: Strip sample registers perf test: Add inject ASLR test tools/perf/builtin-inject.c | 81 +- tools/perf/tests/shell/inject_aslr.sh | 533 ++++++++++ tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 1406 +++++++++++++++++++++++++ tools/perf/util/aslr.h | 44 + tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- tools/perf/util/machine.c | 32 +- tools/perf/util/maps.c | 148 ++- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 +- tools/perf/util/symbol.c | 17 +- 12 files changed, 2251 insertions(+), 71 deletions(-) create mode 100755 tools/perf/tests/shell/inject_aslr.sh create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h -- 2.54.0.1099.g489fc7bff1-goog ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v20 1/5] perf maps: Add maps__mutate_mapping 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers @ 2026-06-11 16:41 ` Ian Rogers 2026-06-11 16:41 ` [PATCH v20 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers ` (4 subsequent siblings) 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-11 16:41 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz During kernel ELF symbol parsing (dso__process_kernel_symbol), proc kallsyms image loading (dso__load_kernel_sym, dso__load_guest_kernel_sym), and dynamic kernel memory map alignment updates (machine__update_kernel_mmap), the loader directly modifies live virtual address boundary keys fields on map objects. If these boundaries are mutated while the map pointer actively resides inside the parent maps cache array list (kmaps) outside of any lock closure, an unsafe concurrent window is exposed where parallel worker lookup threads (e.g., inside perf top) can mistakenly assume the cache remains sorted based on stale parameters, executing binary search queries (bsearch) across an unsorted range and triggering lookup failures. Fix this by introducing maps__mutate_mapping() that explicitly acquires the parent maps write semaphore lock, executes an incoming mutation callback block to perform the field updates under lock protection, and invalidates the sorted tracking flags prior to releasing the write lock. This guarantees synchronization invariants, closing the concurrent lookup race window. The adjacent module alignment pass inside machine__create_kernel_maps() is safely preserved as a high-performance lockless pass, as its invocation lifecycle bounds remain strictly single-threaded by contract during session initialization construction. To safely support this unconditional down_write write lock mutator without recursive read-to-write self-deadlock upgrades during lazy symbol loading, we introduce a public maps__load_maps() API. It copies map pointers under a brief read lock and force-loads all modules locklessly outside the lock. Callers (such as perf inject) must pre-load all kernel symbol maps up front at startup using maps__load_maps(), completely bypassing dynamic runtime mutations. Fixes: 39b12f781271 ("perf tools: Make it possible to read object code from vmlinux") Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/util/machine.c | 32 +++++--- tools/perf/util/maps.c | 148 ++++++++++++++++++++++++++++------- tools/perf/util/maps.h | 3 + tools/perf/util/symbol-elf.c | 41 ++++++---- tools/perf/util/symbol.c | 17 +++- 5 files changed, 183 insertions(+), 58 deletions(-) diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index da1ad58758af..1ea06fde14e0 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1539,22 +1539,30 @@ static void machine__set_kernel_mmap(struct machine *machine, map__set_end(machine->vmlinux_map, ~0ULL); } -static int machine__update_kernel_mmap(struct machine *machine, - u64 start, u64 end) +struct kernel_mmap_mutation_ctx { + u64 start; + u64 end; +}; + +static int kernel_mmap_mutate_cb(struct map *map, void *data) { - struct map *orig, *updated; - int err; + struct kernel_mmap_mutation_ctx *ctx = data; - orig = machine->vmlinux_map; - updated = map__get(orig); + map__set_start(map, ctx->start); + map__set_end(map, ctx->end); + if (ctx->start == 0 && ctx->end == 0) + map__set_end(map, ~0ULL); + return 0; +} - machine->vmlinux_map = updated; - maps__remove(machine__kernel_maps(machine), orig); - machine__set_kernel_mmap(machine, start, end); - err = maps__insert(machine__kernel_maps(machine), updated); - map__put(orig); +static int machine__update_kernel_mmap(struct machine *machine, + u64 start, u64 end) +{ + struct kernel_mmap_mutation_ctx ctx = { .start = start, .end = end }; - return err; + return maps__mutate_mapping(machine__kernel_maps(machine), + machine->vmlinux_map, + kernel_mmap_mutate_cb, &ctx); } int machine__create_kernel_maps(struct machine *machine) diff --git a/tools/perf/util/maps.c b/tools/perf/util/maps.c index 923935ee21b6..f808df2fe77b 100644 --- a/tools/perf/util/maps.c +++ b/tools/perf/util/maps.c @@ -576,6 +576,48 @@ void maps__remove(struct maps *maps, struct map *map) #endif } +/** + * maps__mutate_mapping - Apply write-protected mutations to a map. + * @maps: The maps collection containing the map. + * @map: The map to mutate. + * @mutate_cb: Callback function that performs the actual mutations. + * @data: Private data passed to the callback. + * + * This acquires the write lock on the maps semaphore to safely protect + * concurrent readers from seeing partially mutated or unsorted map boundaries. + * + * WARNING: Acquiring down_write() here can trigger a recursive self-deadlock if + * the caller already holds the read lock (e.g., during maps__for_each_map() or + * maps__find() iteration paths that trigger lazy symbol loading). To completely + * avoid this deadlock, all kernel/module maps must be pre-loaded up-front (via + * maps__load_maps()) under a clean, single-threaded context before entering + * multi-threaded event processing loops. + */ +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data) +{ + int err = 0; + + if (maps) { + down_write(maps__lock(maps)); + + err = mutate_cb(map, data); + + RC_CHK_ACCESS(maps)->maps_by_address_sorted = false; + RC_CHK_ACCESS(maps)->maps_by_name_sorted = false; + + up_write(maps__lock(maps)); + +#ifdef HAVE_LIBDW_SUPPORT + libdw__invalidate_dwfl(maps, maps__libdw_addr_space_dwfl(maps)); +#endif + } else { + err = mutate_cb(map, data); + } + + return err; +} + bool maps__empty(struct maps *maps) { bool res; @@ -626,6 +668,41 @@ int maps__for_each_map(struct maps *maps, int (*cb)(struct map *map, void *data) return ret; } +int maps__load_maps(struct maps *maps) +{ + struct map **maps_copy; + unsigned int nr_maps; + int err = 0; + + if (!maps) + return 0; + + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + if (nr_maps == 0) { + up_read(maps__lock(maps)); + return 0; + } + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (!maps_copy) { + up_read(maps__lock(maps)); + return -ENOMEM; + } + for (unsigned int i = 0; i < nr_maps; i++) + maps_copy[i] = map__get(maps__maps_by_address(maps)[i]); + up_read(maps__lock(maps)); + + for (unsigned int i = 0; i < nr_maps; i++) { + if (map__load(maps_copy[i]) < 0) { + pr_warning("Failed to load map %s\n", dso__name(map__dso(maps_copy[i]))); + err = -1; + } + map__put(maps_copy[i]); + } + free(maps_copy); + return err; +} + void maps__remove_maps(struct maps *maps, bool (*cb)(struct map *map, void *data), void *data) { struct map **maps_by_address; @@ -668,40 +745,57 @@ struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp) return result; } -struct maps__find_symbol_by_name_args { - struct map **mapp; - const char *name; - struct symbol *sym; -}; - -static int maps__find_symbol_by_name_cb(struct map *map, void *data) +struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) { - struct maps__find_symbol_by_name_args *args = data; + struct map **maps_copy; + unsigned int nr_maps; + struct symbol *sym = NULL; - args->sym = map__find_symbol_by_name(map, args->name); - if (!args->sym) - return 0; + if (!maps) + return NULL; - if (!map__contains_symbol(map, args->sym)) { - args->sym = NULL; - return 0; + /* + * First, ensure all maps are loaded. We pre-load them outside of any + * read-to-write locks to avoid deadlocks. Even if some fail, we proceed. + */ + maps__load_maps(maps); + + /* + * Create a local snapshot of the maps while holding the read lock. + * This prevents deadlocking if iteration triggers further map insertions. + */ + down_read(maps__lock(maps)); + nr_maps = maps__nr_maps(maps); + maps_copy = calloc(nr_maps, sizeof(*maps_copy)); + if (maps_copy) { + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps__maps_by_address(maps)[i]; + + maps_copy[i] = map__get(map); + } } + up_read(maps__lock(maps)); - if (args->mapp != NULL) - *args->mapp = map__get(map); - return 1; -} + if (!maps_copy) + return NULL; -struct symbol *maps__find_symbol_by_name(struct maps *maps, const char *name, struct map **mapp) -{ - struct maps__find_symbol_by_name_args args = { - .mapp = mapp, - .name = name, - .sym = NULL, - }; + for (unsigned int i = 0; i < nr_maps; i++) { + struct map *map = maps_copy[i]; + + sym = map__find_symbol_by_name(map, name); + if (sym && map__contains_symbol(map, sym)) { + if (mapp) + *mapp = map__get(map); + break; + } + sym = NULL; + } + + for (unsigned int i = 0; i < nr_maps; i++) + map__put(maps_copy[i]); - maps__for_each_map(maps, maps__find_symbol_by_name_cb, &args); - return args.sym; + free(maps_copy); + return sym; } int maps__find_ams(struct maps *maps, struct addr_map_symbol *ams) diff --git a/tools/perf/util/maps.h b/tools/perf/util/maps.h index 5b80b199685e..4ec9b7453a3b 100644 --- a/tools/perf/util/maps.h +++ b/tools/perf/util/maps.h @@ -59,8 +59,11 @@ void maps__set_libdw_addr_space_dwfl(struct maps *maps, void *dwfl); size_t maps__fprintf(struct maps *maps, FILE *fp); +int maps__load_maps(struct maps *maps); int maps__insert(struct maps *maps, struct map *map); void maps__remove(struct maps *maps, struct map *map); +int maps__mutate_mapping(struct maps *maps, struct map *map, + int (*mutate_cb)(struct map *map, void *data), void *data); struct map *maps__find(struct maps *maps, u64 addr); struct symbol *maps__find_symbol(struct maps *maps, u64 addr, struct map **mapp); diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index d84e2e031d43..c301c298ded9 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -1373,6 +1373,24 @@ static u64 ref_reloc(struct kmap *kmap) void __weak arch__sym_update(struct symbol *s __maybe_unused, GElf_Sym *sym __maybe_unused) { } +struct remap_kernel_ctx { + u64 sh_addr; + u64 sh_size; + u64 sh_offset; + struct kmap *kmap; +}; + +static int remap_kernel_cb(struct map *map, void *data) +{ + struct remap_kernel_ctx *ctx = data; + + map__set_start(map, ctx->sh_addr + ref_reloc(ctx->kmap)); + map__set_end(map, map__start(map) + ctx->sh_size); + map__set_pgoff(map, ctx->sh_offset); + map__set_mapping_type(map, MAPPING_TYPE__DSO); + return 0; +} + static int dso__process_kernel_symbol(struct dso *dso, struct map *map, GElf_Sym *sym, GElf_Shdr *shdr, struct maps *kmaps, struct kmap *kmap, @@ -1403,22 +1421,15 @@ static int dso__process_kernel_symbol(struct dso *dso, struct map *map, * map to the kernel dso. */ if (*remap_kernel && dso__kernel(dso) && !kmodule) { + struct remap_kernel_ctx ctx = { + .sh_addr = shdr->sh_addr, + .sh_size = shdr->sh_size, + .sh_offset = shdr->sh_offset, + .kmap = kmap + }; + *remap_kernel = false; - map__set_start(map, shdr->sh_addr + ref_reloc(kmap)); - map__set_end(map, map__start(map) + shdr->sh_size); - map__set_pgoff(map, shdr->sh_offset); - map__set_mapping_type(map, MAPPING_TYPE__DSO); - /* Ensure maps are correctly ordered */ - if (kmaps) { - int err; - struct map *tmp = map__get(map); - - maps__remove(kmaps, map); - err = maps__insert(kmaps, map); - map__put(tmp); - if (err) - return err; - } + maps__mutate_mapping(kmaps, map, remap_kernel_cb, &ctx); } /* diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 0c46b24ee098..2cc911af8c81 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -48,6 +48,13 @@ #include <symbol/kallsyms.h> #include <sys/utsname.h> +static int map_fixup_cb(struct map *map, void *data __maybe_unused) +{ + map__fixup_start(map); + map__fixup_end(map); + return 0; +} + static int dso__load_kernel_sym(struct dso *dso, struct map *map); static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map); @@ -2240,10 +2247,11 @@ static int dso__load_kernel_sym(struct dso *dso, struct map *map) free(kallsyms_allocated_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__KALLSYMS); dso__set_long_name(dso, DSO__NAME_KALLSYMS, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; @@ -2283,10 +2291,11 @@ static int dso__load_guest_kernel_sym(struct dso *dso, struct map *map) if (err > 0) pr_debug("Using %s for symbols\n", kallsyms_filename); if (err > 0 && !dso__is_kcore(dso)) { + struct maps *kmaps = map__kmaps(map); + dso__set_binary_type(dso, DSO_BINARY_TYPE__GUEST_KALLSYMS); dso__set_long_name(dso, machine->mmap_name, false); - map__fixup_start(map); - map__fixup_end(map); + maps__mutate_mapping(kmaps, map, map_fixup_cb, NULL); } return err; -- 2.54.0.1099.g489fc7bff1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v20 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers 2026-06-11 16:41 ` [PATCH v20 1/5] perf maps: Add maps__mutate_mapping Ian Rogers @ 2026-06-11 16:41 ` Ian Rogers 2026-06-11 17:28 ` sashiko-bot 2026-06-11 16:41 ` [PATCH v20 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers ` (3 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-11 16:41 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz If perf.data files are taken from one machine to another they may leak virtual addresses and so weaken ASLR on the machine they are coming from. Add an aslr option for perf inject that remaps all virtual addresses, or drops data/events, so that the virtual address information isn't leaked. This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events (MMAP, MMAP2, COMM, FORK, EXIT, KSYMBOL, TEXT_POKE). Sample events are delegated without remapping for now. Signed-off-by: Ian Rogers <irogers@google.com> Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/builtin-inject.c | 61 ++- tools/perf/util/Build | 1 + tools/perf/util/aslr.c | 814 ++++++++++++++++++++++++++++++++++++ tools/perf/util/aslr.h | 41 ++ 4 files changed, 908 insertions(+), 9 deletions(-) create mode 100644 tools/perf/util/aslr.c create mode 100644 tools/perf/util/aslr.h diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 75ffe31d03fe..8bb37095e2de 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -8,6 +8,7 @@ */ #include "builtin.h" +#include "util/aslr.h" #include "util/color.h" #include "util/dso.h" #include "util/vdso.h" @@ -24,6 +25,7 @@ #include "util/string2.h" #include "util/symbol.h" #include "util/synthetic-events.h" +#include "util/pmus.h" #include "util/thread.h" #include "util/namespaces.h" #include "util/unwind.h" @@ -124,6 +126,7 @@ struct perf_inject { bool in_place_update_dry_run; bool copy_kcore_dir; bool convert_callchain; + bool aslr; const char *input_name; struct perf_data output; u64 bytes_written; @@ -234,20 +237,36 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, u64 *ids; int ret; + union perf_event *aslr_event = NULL; + ret = perf_event__process_attr(tool, event, pevlist); if (ret) return ret; + if (inject->aslr) { + aslr_event = malloc(event->header.size); + if (!aslr_event) + return -ENOMEM; + memcpy(aslr_event, event, event->header.size); + aslr_tool__strip_attr_event(aslr_event, pevlist); + event = aslr_event; + } + /* If the output isn't a pipe then the attributes will be written as part of the header. */ - if (!inject->output.is_pipe) - return 0; + if (!inject->output.is_pipe) { + ret = 0; + goto out; + } - if (!inject->itrace_synth_opts.set) - return perf_event__repipe_synth(tool, event); + if (!inject->itrace_synth_opts.set) { + ret = perf_event__repipe_synth(tool, event); + goto out; + } if (event->header.size < sizeof(struct perf_event_header) + PERF_ATTR_SIZE_VER0) { pr_err("Attribute event size %u is too small\n", event->header.size); - return -EINVAL; + ret = -EINVAL; + goto out; } /* @@ -263,7 +282,8 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, raw_attr_size > event->header.size - sizeof(event->header))) { pr_err("Attribute event size %u is too small for attr.size %u\n", event->header.size, raw_attr_size); - return -EINVAL; + ret = -EINVAL; + goto out; } memset(&attr, 0, sizeof(attr)); @@ -281,8 +301,11 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; } - return perf_event__synthesize_attr(tool, &attr, (u32)n_ids, ids, + ret = perf_event__synthesize_attr(tool, &attr, (u32)n_ids, ids, perf_event__repipe_synth_cb); +out: + free(aslr_event); + return ret; } static int perf_event__repipe_event_update(const struct perf_tool *tool, @@ -2594,7 +2617,6 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } - session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2704,6 +2726,8 @@ int cmd_inject(int argc, const char **argv) unwind__option), OPT_BOOLEAN(0, "convert-callchain", &inject.convert_callchain, "Generate callchains using DWARF and drop register/stack data"), + OPT_BOOLEAN(0, "aslr", &inject.aslr, + "Remap virtual memory addresses similar to ASLR"), OPT_END() }; const char * const inject_usage[] = { @@ -2711,6 +2735,7 @@ int cmd_inject(int argc, const char **argv) NULL }; bool ordered_events; + struct perf_tool *tool = &inject.tool; if (!inject.itrace_synth_opts.set) { /* Disable eager loading of kernel symbols that adds overhead to perf inject. */ @@ -2731,6 +2756,11 @@ int cmd_inject(int argc, const char **argv) if (argc) usage_with_options(inject_usage, options); + if (inject.aslr && inject.convert_callchain) { + pr_err("Error: --aslr and --convert-callchain are mutually exclusive features.\n"); + return -EINVAL; + } + if (inject.strip && !inject.itrace_synth_opts.set) { pr_err("--strip option requires --itrace option\n"); return -1; @@ -2824,12 +2854,21 @@ int cmd_inject(int argc, const char **argv) inject.tool.schedstat_domain = perf_event__repipe_op2_synth; inject.tool.dont_split_sample_group = true; inject.tool.merge_deferred_callchains = false; - inject.session = __perf_session__new(&data, &inject.tool, + if (inject.aslr) { + tool = aslr_tool__new(&inject.tool); + if (!tool) { + ret = -ENOMEM; + goto out_close_output; + } + } + inject.session = __perf_session__new(&data, tool, /*trace_event_repipe=*/inject.output.is_pipe, /*host_env=*/NULL); if (IS_ERR(inject.session)) { ret = PTR_ERR(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); goto out_close_output; } @@ -2922,6 +2961,8 @@ int cmd_inject(int argc, const char **argv) goto out_delete; ret = __cmd_inject(&inject); + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); guest_session__exit(&inject.guest_session); @@ -2929,6 +2970,8 @@ int cmd_inject(int argc, const char **argv) strlist__delete(inject.known_build_ids); zstd_fini(&(inject.session->zstd_data)); perf_session__delete(inject.session); + if (inject.aslr) + aslr_tool__delete(tool); out_close_output: if (!inject.in_place_update) perf_data__close(&inject.output); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index b22cdc24082a..5e2265018826 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -6,6 +6,7 @@ perf-util-y += arm64-frame-pointer-unwind-support.o perf-util-y += addr2line.o perf-util-y += addr_location.o perf-util-y += annotate.o +perf-util-y += aslr.o perf-util-y += blake2s.o perf-util-y += block-info.o perf-util-y += block-range.o diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c new file mode 100644 index 000000000000..56fc444fbf54 --- /dev/null +++ b/tools/perf/util/aslr.c @@ -0,0 +1,814 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aslr.h" + +#include "addr_location.h" +#include "debug.h" +#include "event.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "map.h" +#include "thread.h" +#include "tool.h" +#include "session.h" +#include "data.h" +#include "dso.h" +#include "pmus.h" + +#include <internal/lib.h> /* page_size */ +#include <linux/compiler.h> +#include <linux/zalloc.h> +#include <inttypes.h> +#include <unistd.h> + +/** + * struct remap_addresses_key - Key for mapping original addresses to remapped ones. + * @dso: Pointer to the DSO (Dynamic Shared Object) associated with the mapping. + * @invariant: Unique offset invariant within the VMA (Virtual Memory Area). + * Calculated as `start - pgoff`. This value remains constant when + * perf's internal `maps__fixup_overlap_and_insert` splits a map into + * fragmented VMA pieces due to overlapping events, allowing us to + * resolve split maps consistently back to the original VMA. + * @pid: Process ID associated with the mapping. + */ +struct remap_addresses_key { + struct machine *machine; + struct dso *dso; + u64 invariant; + pid_t pid; +}; + +struct aslr_mapping { + struct list_head node; + u64 orig_start; + u64 len; + u64 remap_start; +}; + +struct process_top_address { + u64 remapped_max; +}; +struct aslr_tool { + /** @tool: The tool implemented here and a pointer to a delegate to process the data. */ + struct delegate_tool tool; + /** @machines: The machines with the input, not remapped, virtual address layout. */ + struct machines machines; + /** @event_copy: Buffer used to create an event to pass to the delegate. */ + char event_copy[PERF_SAMPLE_MAX_SIZE] __aligned(8); + /** @remap_addresses: mapping from remap_addresses_key to remapped address. */ + struct hashmap remap_addresses; + /** @top_addresses: mapping from process to max remapped address. */ + struct hashmap top_addresses; +}; + +static const pid_t kernel_pid = -1; + +/* Start remapping user processes from a small non-zero offset. */ +static const u64 user_space_start = 0x200000; +static const u64 kernel_space_start_64 = 0xffff800010000000ULL; +static const u64 kernel_space_start_32 = 0x80000000ULL; + +static size_t remap_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key = (struct remap_addresses_key *)_key; + void *dso_ptr = key->dso ? RC_CHK_ACCESS(key->dso) : NULL; + + return (size_t)key->machine ^ (size_t)dso_ptr ^ key->invariant ^ key->pid; +} + +static bool remap_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct remap_addresses_key *key1 = (struct remap_addresses_key *)_key1; + struct remap_addresses_key *key2 = (struct remap_addresses_key *)_key2; + + return key1->machine == key2->machine && + RC_CHK_EQUAL(key1->dso, key2->dso) && + key1->invariant == key2->invariant && + key1->pid == key2->pid; +} + +struct top_addresses_key { + struct machine *machine; + pid_t pid; +}; + +static size_t top_addresses__hash(long _key, void *ctx __maybe_unused) +{ + struct top_addresses_key *key = (struct top_addresses_key *)_key; + + return (size_t)key->machine ^ key->pid; +} + +static bool top_addresses__equal(long _key1, long _key2, void *ctx __maybe_unused) +{ + struct top_addresses_key *key1 = (struct top_addresses_key *)_key1; + struct top_addresses_key *key2 = (struct top_addresses_key *)_key2; + + return key1->machine == key2->machine && key1->pid == key2->pid; +} + +static u64 round_up_to_page_size(u64 addr) +{ + return (addr + page_size - 1) & ~((u64)page_size - 1); +} + +struct aslr_machine_priv { + bool kernel_maps_loaded; +}; + +static int aslr_tool__preload_kernel_maps(struct machine *machine) +{ + struct aslr_machine_priv *mpriv = machine->priv; + + if (!mpriv) { + mpriv = zalloc(sizeof(*mpriv)); + if (!mpriv) + return -ENOMEM; + machine->priv = mpriv; + } + + if (!mpriv->kernel_maps_loaded) { + struct maps *kmaps = machine__kernel_maps(machine); + + if (kmaps) { + int err = maps__load_maps(kmaps); + + if (err < 0) { + pr_err("ASLR: Failed to preload kernel maps for machine pid %d\n", + machine->pid); + return err; + } + } + mpriv->kernel_maps_loaded = true; + } + return 0; +} + +static void aslr_tool__free_machine_priv(struct machine *machine) +{ + free(machine->priv); + machine->priv = NULL; +} + +static void aslr_tool__destroy_machines_priv(struct machines *machines) +{ + struct rb_node *nd; + + aslr_tool__free_machine_priv(&machines->host); + for (nd = rb_first_cached(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + aslr_tool__free_machine_priv(machine); + } +} + +static u64 aslr_tool__findnew_mapping(struct aslr_tool *aslr, + struct machine *session_machine, + struct thread *aslr_thread, + u8 cpumode, u64 start, + u64 len, u64 pgoff) +{ + /* Address location for dso lookup. */ + struct addr_location al; + /* Original ASLR address based key for the remap table. */ + struct remap_addresses_key remap_key; + /* The address in the ASLR sanitized address space less pg_off. */ + u64 *remapped_invariant_ptr; + /* Key for the maximum address in a process. */ + struct top_addresses_key top_addr_key; + /* Value in top address table. */ + struct process_top_address *top = NULL; + /* Address in ASLR sanitized address space. */ + u64 remap_addr; + /* Potentially allocated remap table key. */ + struct remap_addresses_key *new_remap_key = NULL; + /* + * Potentially allocated remap table key. + * TODO: Avoid allocation necessary for perf 32-bit binary support. + */ + u64 *new_remap_val = NULL; + int err; + + if (!aslr_thread) + return 0; + + /* The key to look up an incoming address to the outgoing value. */ + addr_location__init(&al); + remap_key.machine = maps__machine(thread__maps(aslr_thread)); + remap_key.pid = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + if (thread__find_map(aslr_thread, cpumode, start, &al)) { + struct dso *dso = map__dso(al.map); + const char *dso_name = dso ? dso__long_name(dso) : NULL; + + remap_key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + remap_key.invariant = map__start(al.map) - map__pgoff(al.map); + else + remap_key.invariant = map__start(al.map); + } else { + remap_key.dso = NULL; + remap_key.invariant = start; + } + + /* The key to look up top allocated address. */ + top_addr_key.machine = remap_key.machine; + top_addr_key.pid = remap_key.pid; + + if (hashmap__find(&aslr->remap_addresses, &remap_key, &remapped_invariant_ptr)) { + /* Mmap already exists. */ + u64 calculated_max; + + if (al.map) { + /* + * The cached value is the base of the invariant. We add the + * offset into the VMA (start - map__start), plus the map's + * pgoff, to get the precise virtual address within this chunk. + */ + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (start - map__start(al.map)); + } else { + /* + * For unmapped memory (e.g. kernel anonymous), the cached value + * was stored offset by pgoff. Adding pgoff yields the true remap_addr. + */ + remap_addr = *remapped_invariant_ptr + pgoff; + } + + calculated_max = remap_addr + len; + + /* See if top mapping was expanded. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + if (calculated_max > top->remapped_max) + top->remapped_max = calculated_max; + } + addr_location__exit(&al); + return remap_addr; + } + /* No mmap, create an entry from the top address. */ + if (hashmap__find(&aslr->top_addresses, &top_addr_key, &top)) { + struct addr_location prev_al; + bool is_contiguous = false; + + /* Current max allocated mmap address within the process. */ + remap_addr = top->remapped_max; + + addr_location__init(&prev_al); + if (thread__find_map(aslr_thread, cpumode, start - 1, &prev_al)) { + if (map__end(prev_al.map) == start) + is_contiguous = true; + } + addr_location__exit(&prev_al); + + if (is_contiguous) { + /* Contiguous mapping, do not add 1 page gap! */ + remap_addr = round_up_to_page_size(remap_addr); + } else { + /* Give 1 page gap from current max page. */ + remap_addr = round_up_to_page_size(remap_addr); + remap_addr += page_size; + } + if (remap_addr + len > top->remapped_max) + top->remapped_max = remap_addr + len; + } else { + /* First address of the process, allocate key and first top address. */ + struct top_addresses_key *tk; + struct process_top_address *top_val; + struct perf_env *env = session_machine ? session_machine->env : NULL; + bool is_64 = env ? perf_env__kernel_is_64_bit(env) : (sizeof(void *) == 8); + u64 kernel_start_addr = is_64 ? kernel_space_start_64 : kernel_space_start_32; + + remap_addr = (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_start_addr : user_space_start; + remap_addr = round_up_to_page_size(remap_addr); + + tk = malloc(sizeof(*tk)); + top_val = malloc(sizeof(*top_val)); + if (!tk || !top_val) { + err = -ENOMEM; + } else { + *tk = top_addr_key; + top_val->remapped_max = remap_addr + len; + err = hashmap__insert(&aslr->top_addresses, tk, top_val, + HASHMAP_ADD, NULL, NULL); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR process top address %m\n"); + free(tk); + free(top_val); + addr_location__exit(&al); + return 0; + } + } + /* Create rmeapping entry. */ + new_remap_key = malloc(sizeof(*new_remap_key)); + new_remap_val = malloc(sizeof(u64)); + if (!new_remap_key || !new_remap_val) { + err = -ENOMEM; + } else { + *new_remap_key = remap_key; + new_remap_key->dso = dso__get(remap_key.dso); + if (cpumode == PERF_RECORD_MISC_KERNEL || + cpumode == PERF_RECORD_MISC_GUEST_KERNEL) { + if (al.map) { + *new_remap_val = remap_addr - + (start - map__start(al.map)) - + map__pgoff(al.map); + } else { + /* + * Subtract pgoff from the base virtual address so that + * when the lookup path adds pgoff back, it perfectly + * cancels out and returns remap_addr. + */ + *new_remap_val = remap_addr - pgoff; + } + } else { + *new_remap_val = remap_addr - (al.map ? (start - map__start(al.map)) + + map__pgoff(al.map) : pgoff); + } + err = hashmap__add(&aslr->remap_addresses, new_remap_key, new_remap_val); + if (err) + dso__put(new_remap_key->dso); + } + if (err) { + errno = -err; + pr_err("Failure to add ASLR remapping %m\n"); + free(new_remap_key); + free(new_remap_val); + addr_location__exit(&al); + return 0; + } + addr_location__exit(&al); + return remap_addr; +} + +static int aslr_tool__process_mmap(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap.pid, event->mmap.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap, &event->mmap, event->mmap.header.size); + /* Remaps the mmap.start. */ + new_event->mmap.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, + event->mmap.start, + event->mmap.len, + event->mmap.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (is_anon_memory(event->mmap.filename) || is_no_dso_memory(event->mmap.filename) || + ((cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) && + !is_kernel_module(event->mmap.filename, cpumode))) + new_event->mmap.pgoff = new_event->mmap.start; + err = delegate->mmap(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_mmap2(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + u8 cpumode; + struct thread *thread; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_mmap2(tool, event, sample, aslr_machine); + if (err) + return err; + + thread = machine__findnew_thread(aslr_machine, event->mmap2.pid, event->mmap2.tid); + if (!thread) + return -ENOMEM; + memcpy(&new_event->mmap2, &event->mmap2, event->mmap2.header.size); + /* Remaps the mmap.start. */ + new_event->mmap2.start = aslr_tool__findnew_mapping(aslr, machine, thread, cpumode, + event->mmap2.start, + event->mmap2.len, + event->mmap2.pgoff); + /* + * For anonymous memory (and kernel maps), the kernel populates the + * event's pgoff field with the original un-obfuscated virtual address + * in bytes (i.e. (addr >> PAGE_SHIFT) << PAGE_SHIFT). + * We must overwrite pgoff with the new remapped byte address to prevent + * leaking the original ASLR layout. + */ + if (is_anon_memory(event->mmap2.filename) || is_no_dso_memory(event->mmap2.filename) || + ((cpumode == PERF_RECORD_MISC_KERNEL || cpumode == PERF_RECORD_MISC_GUEST_KERNEL) && + !is_kernel_module(event->mmap2.filename, cpumode))) + new_event->mmap2.pgoff = new_event->mmap2.start; + err = delegate->mmap2(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_comm(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_comm(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->comm(delegate, event, sample, machine); +} + +static int aslr_tool__process_fork(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_fork(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->fork(delegate, event, sample, machine); +} + +static int aslr_tool__process_exit(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + struct machine *aslr_machine; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + /* Create the thread, map, etc. in the ASLR before virtual address space. */ + err = perf_event__process_exit(tool, event, sample, aslr_machine); + if (err) + return err; + + return delegate->exit(delegate, event, sample, machine); +} + +static int aslr_tool__process_text_poke(const struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + /* Drop in case the instruction encodes an ASLR revealing address. */ + return 0; +} + +static int aslr_tool__process_ksymbol(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + union perf_event *new_event; + struct thread *thread; + struct machine *aslr_machine; + bool is_unregister; + int err; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + new_event = (union perf_event *)aslr->event_copy; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, kernel_pid, 0); + if (!thread) + return -ENOMEM; + + is_unregister = (event->ksymbol.flags & PERF_RECORD_KSYMBOL_FLAGS_UNREGISTER); + + memcpy(&new_event->ksymbol, &event->ksymbol, event->ksymbol.header.size); + + if (is_unregister) { + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, machine, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + } else { + err = perf_event__process_ksymbol(tool, event, sample, aslr_machine); + new_event->ksymbol.addr = aslr_tool__findnew_mapping(aslr, machine, thread, + PERF_RECORD_MISC_KERNEL, + event->ksymbol.addr, + event->ksymbol.len, + /*pgoff=*/0); + } + if (err) { + thread__put(thread); + return err; + } + + err = delegate->ksymbol(delegate, new_event, sample, machine); + thread__put(thread); + return err; +} + +static int aslr_tool__process_sample(const struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct perf_tool *delegate = aslr->tool.delegate; + + return delegate->sample(delegate, event, sample, machine); +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min_t(off_t, n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 aslr_tool__process_auxtrace(const struct perf_tool *tool __maybe_unused, + struct perf_session *session, + union perf_event *event) +{ + pr_warning_once("ASLR: Dropping auxtrace data as it cannot be obfuscated.\n"); + if (perf_data__is_pipe(session->data)) { + /* Copy behavior of the stub by reading all pipe data. */ + int err = skipn(perf_data__fd(session->data), event->auxtrace.size); + + if (err < 0) + return err; + } + return event->auxtrace.size; +} + +static int aslr_tool__process_auxtrace_info(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + +static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused) +{ + return 0; +} + + +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist __maybe_unused) +{ + u32 attr_size; + + attr_size = event->attr.attr.size ?: PERF_ATTR_SIZE_VER0; + + if (attr_size >= (offsetof(struct perf_event_attr, sample_type) + sizeof(u64))) { + event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + } + + if (attr_size >= (offsetof(struct perf_event_attr, type) + sizeof(u32))) { + u32 type = event->attr.attr.type; + + if (type == PERF_TYPE_BREAKPOINT && + attr_size >= (offsetof(struct perf_event_attr, bp_addr) + sizeof(u64))) { + event->attr.attr.bp_addr = 0; + } else if (type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu; + + pmu = perf_pmus__find_by_type(type); + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + if (attr_size >= (offsetof(struct perf_event_attr, config1) + sizeof(u64))) + event->attr.attr.config1 = 0; + if (attr_size >= (offsetof(struct perf_event_attr, config2) + sizeof(u64))) + event->attr.attr.config2 = 0; + } + } + } +} + +void aslr_tool__strip_evlist(struct perf_tool *tool __maybe_unused, + struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) + evsel->core.attr.bp_addr = 0; + else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) +{ + delegate_tool__init(&aslr->tool, delegate); + aslr->tool.tool.ordered_events = true; + + machines__init(&aslr->machines); + + hashmap__init(&aslr->remap_addresses, + remap_addresses__hash, remap_addresses__equal, + /*ctx=*/NULL); + hashmap__init(&aslr->top_addresses, + top_addresses__hash, top_addresses__equal, + /*ctx=*/NULL); + + aslr->tool.tool.sample = aslr_tool__process_sample; + /* read - reads a counter, okay to delegate. */ + aslr->tool.tool.mmap = aslr_tool__process_mmap; + aslr->tool.tool.mmap2 = aslr_tool__process_mmap2; + aslr->tool.tool.comm = aslr_tool__process_comm; + aslr->tool.tool.fork = aslr_tool__process_fork; + aslr->tool.tool.exit = aslr_tool__process_exit; + /* namesspaces, cgroup, lost, lost_sample, aux, */ + /* itrace_start, aux_output_hw_id, context_switch, throttle, unthrottle */ + /* - no virtual addresses. */ + aslr->tool.tool.ksymbol = aslr_tool__process_ksymbol; + /* bpf - no virtual address. */ + aslr->tool.tool.text_poke = aslr_tool__process_text_poke; + /* + * event_update, tracing_data, finished_round, build_id, id_index, + * auxtrace_info, auxtrace_error, time_conv, thread_map, cpu_map, + * stat_config, stat, feature, finished_init, bpf_metadata, compressed, + * auxtrace - no virtual addresses. + */ + aslr->tool.tool.auxtrace = aslr_tool__process_auxtrace; + aslr->tool.tool.auxtrace_info = aslr_tool__process_auxtrace_info; + aslr->tool.tool.auxtrace_error = aslr_tool__process_auxtrace_error; +} + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate) +{ + struct aslr_tool *aslr = zalloc(sizeof(*aslr)); + + if (!aslr) + return NULL; + + aslr_tool__init(aslr, delegate); + return &aslr->tool.tool; +} + +void aslr_tool__delete(struct perf_tool *tool) +{ + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct hashmap_entry *cur; + size_t bkt; + struct rb_node *nd; + + if (!tool) + return; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + + hashmap__for_each_entry(&aslr->remap_addresses, cur, bkt) { + struct remap_addresses_key *key = (struct remap_addresses_key *)cur->pkey; + + if (key) + dso__put(key->dso); + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + hashmap__for_each_entry(&aslr->top_addresses, cur, bkt) { + zfree(&cur->pkey); + zfree(&cur->pvalue); + } + + hashmap__clear(&aslr->remap_addresses); + hashmap__clear(&aslr->top_addresses); + aslr_tool__destroy_machines_priv(&aslr->machines); + machines__destroy_kernel_maps(&aslr->machines); + + while ((nd = rb_first_cached(&aslr->machines.guests)) != NULL) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + rb_erase_cached(nd, &aslr->machines.guests); + machine__delete(machine); + } + + machines__exit(&aslr->machines); + free(aslr); +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h new file mode 100644 index 000000000000..2b82f711bc67 --- /dev/null +++ b/tools/perf/util/aslr.h @@ -0,0 +1,41 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_ASLR_H +#define __PERF_ASLR_H + +#include <linux/perf_event.h> + +#define ASLR_SUPPORTED_SAMPLE_TYPE ( \ + PERF_SAMPLE_IDENTIFIER | \ + PERF_SAMPLE_IP | \ + PERF_SAMPLE_TID | \ + PERF_SAMPLE_TIME | \ + PERF_SAMPLE_ADDR | \ + PERF_SAMPLE_ID | \ + PERF_SAMPLE_STREAM_ID | \ + PERF_SAMPLE_CPU | \ + PERF_SAMPLE_PERIOD | \ + PERF_SAMPLE_READ | \ + PERF_SAMPLE_CALLCHAIN | \ + PERF_SAMPLE_RAW | \ + PERF_SAMPLE_BRANCH_STACK | \ + PERF_SAMPLE_STACK_USER | \ + PERF_SAMPLE_WEIGHT_TYPE | \ + PERF_SAMPLE_DATA_SRC | \ + PERF_SAMPLE_TRANSACTION | \ + PERF_SAMPLE_PHYS_ADDR | \ + PERF_SAMPLE_CGROUP | \ + PERF_SAMPLE_DATA_PAGE_SIZE | \ + PERF_SAMPLE_CODE_PAGE_SIZE | \ + PERF_SAMPLE_AUX) + +struct perf_tool; +struct evsel; +struct evlist; +union perf_event; + +struct perf_tool *aslr_tool__new(struct perf_tool *delegate); +void aslr_tool__delete(struct perf_tool *aslr); +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist); +void aslr_tool__strip_evlist(struct perf_tool *tool, struct evlist *evlist); + +#endif /* __PERF_ASLR_H */ -- 2.54.0.1099.g489fc7bff1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v20 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking 2026-06-11 16:41 ` [PATCH v20 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-11 17:28 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-11 17:28 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 1 potential issue(s) to consider: - [Medium] Heap out-of-bounds read/write in `perf_event__repipe_attr` via TOCTOU race condition -- commit 0ecff6d57b51709063f6cde26a7febea45ab1d6a Author: Ian Rogers <irogers@google.com> perf inject: Add ASLR virtual address remapping This patch introduces the core ASLR remapping tool infrastructure and implements remapping/tracking for metadata events. Sample events are delegated without remapping for now. > diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c > index 75ffe31d03fe..8bb37095e2de 100644 > --- a/tools/perf/builtin-inject.c > +++ b/tools/perf/builtin-inject.c > @@ -234,20 +237,36 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, > u64 *ids; > int ret; > > + union perf_event *aslr_event = NULL; > + > ret = perf_event__process_attr(tool, event, pevlist); > if (ret) > return ret; > > + if (inject->aslr) { > + aslr_event = malloc(event->header.size); > + if (!aslr_event) > + return -ENOMEM; > + memcpy(aslr_event, event, event->header.size); > + aslr_tool__strip_attr_event(aslr_event, pevlist); > + event = aslr_event; > + } [Severity: Medium] Could this introduce a TOCTOU (Time-Of-Check to Time-Of-Use) race condition? Since event points directly to the memory-mapped perf.data file contents, reading event->header.size multiple times without READ_ONCE allows a concurrent modification to change the size between the malloc() and memcpy() calls. This could result in a heap buffer overflow during memcpy(), or bypass the earlier size validation done in perf_event__process_attr(). This would leave malicious sizes in the copied event->attr.attr.size for aslr_tool__strip_attr_event() to blindly trust. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > new file mode 100644 > index 000000000000..56fc444fbf54 > --- /dev/null > +++ b/tools/perf/util/aslr.c [ ... ] > +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist __maybe_unused) > +{ > + u32 attr_size; > + > + attr_size = event->attr.attr.size ?: PERF_ATTR_SIZE_VER0; > + > + if (attr_size >= (offsetof(struct perf_event_attr, sample_type) + sizeof(u64))) { > + event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; > + } > + > + if (attr_size >= (offsetof(struct perf_event_attr, type) + sizeof(u32))) { > + u32 type = event->attr.attr.type; > + > + if (type == PERF_TYPE_BREAKPOINT && > + attr_size >= (offsetof(struct perf_event_attr, bp_addr) + sizeof(u64))) { > + event->attr.attr.bp_addr = 0; > + } else if (type >= PERF_TYPE_MAX) { > + struct perf_pmu *pmu; > + > + pmu = perf_pmus__find_by_type(type); > + if (pmu && (!strcmp(pmu->name, "kprobe") || > + !strcmp(pmu->name, "uprobe"))) { > + if (attr_size >= (offsetof(struct perf_event_attr, config1) + sizeof(u64))) > + event->attr.attr.config1 = 0; > + if (attr_size >= (offsetof(struct perf_event_attr, config2) + sizeof(u64))) > + event->attr.attr.config2 = 0; [Severity: Medium] If a malicious size is copied into the heap buffer due to the TOCTOU race in perf_event__repipe_attr(), does this code perform an out-of-bounds write? Since aslr_tool__strip_attr_event() implicitly trusts the attr_size read from the heap copy, clearing config1 and config2 could overwrite adjacent heap memory if attr_size is large, but the actual allocated event->header.size was small. > + } > + } > + } > +} -- Sashiko AI review · https://sashiko.dev/#/patchset/20260611164122.3974068-1-irogers@google.com?part=2 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v20 3/5] perf inject/aslr: Implement sample address remapping 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers 2026-06-11 16:41 ` [PATCH v20 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-11 16:41 ` [PATCH v20 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers @ 2026-06-11 16:41 ` Ian Rogers 2026-06-11 17:41 ` sashiko-bot 2026-06-11 16:41 ` [PATCH v20 4/5] perf aslr: Strip sample registers Ian Rogers ` (2 subsequent siblings) 5 siblings, 1 reply; 183+ messages in thread From: Ian Rogers @ 2026-06-11 16:41 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add the sample address remapping logic to the ASLR tool. This patch implements aslr_tool__process_sample, which parses sample events, remaps IPs, ADDRs, callchains, and branch stacks using the mappings collected from metadata events, and drops potentially leaking raw, register, stack, physical address, and aux samples. Also adds the aslr_tool__remap_address helper function. Note on cross-endian compatibility: 'perf inject' functions as an endianness converter. Input files are read, and their events are byte-swapped to host endianness in memory. When the tool emits its output, it writes a host-endian PERF_MAGIC in the file header, thereby marking the output file as host-endian natively. Because the output file is always written in host endianness, events and payloads must be constructed entirely using host-endian layouts. For this reason, this patch explicitly un-packs and repacks PERF_SAMPLE_TID (and PERF_SAMPLE_CPU) using unions to ensure that the sequential 32-bit layout is correctly aligned in host endianness. Similarly, branch stack flags (which are modified in-place to host-endian bitfields by the parser) are copied directly to the newly synthesized event. When re-parsing the newly synthesized event, 'needs_swap=false' is explicitly used to prevent double swapping the already host-endian fields. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/util/aslr.c | 465 +++++++++++++++++++++++++++++++++++++++- tools/perf/util/evsel.c | 6 +- tools/perf/util/evsel.h | 10 +- 3 files changed, 472 insertions(+), 9 deletions(-) diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index 56fc444fbf54..c4602a43e04f 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -20,6 +20,7 @@ #include <linux/zalloc.h> #include <inttypes.h> #include <unistd.h> +#include <byteswap.h> /** * struct remap_addresses_key - Key for mapping original addresses to remapped ones. @@ -112,6 +113,60 @@ static u64 round_up_to_page_size(u64 addr) return (addr + page_size - 1) & ~((u64)page_size - 1); } +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, + struct thread *aslr_thread, + u8 cpumode, + u64 addr) +{ + struct addr_location al; + struct remap_addresses_key key; + u64 *remapped_invariant_ptr = NULL; + u64 remap_addr = 0; + u8 effective_cpumode = cpumode; + + if (!aslr_thread) + return 0; /* No thread. */ + + addr_location__init(&al); + if (!thread__find_map(aslr_thread, cpumode, addr, &al)) { + /* + * If lookup fails with specified cpumode, try fallback to the other space + * to be robust against bad cpumode in samples. + */ + if (cpumode == PERF_RECORD_MISC_KERNEL) + effective_cpumode = PERF_RECORD_MISC_USER; + else if (cpumode == PERF_RECORD_MISC_USER) + effective_cpumode = PERF_RECORD_MISC_KERNEL; + else if (cpumode == PERF_RECORD_MISC_GUEST_KERNEL) + effective_cpumode = PERF_RECORD_MISC_GUEST_USER; + else if (cpumode == PERF_RECORD_MISC_GUEST_USER) + effective_cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { + addr_location__exit(&al); + return 0; /* No mmap. */ + } + } + + key.machine = maps__machine(thread__maps(aslr_thread)); + key.dso = map__dso(al.map); + key.invariant = map__start(al.map) - map__pgoff(al.map); + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? + kernel_pid : thread__pid(aslr_thread); + + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + + (addr - map__start(al.map)); + } else { + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", + addr, map__start(al.map), map__size(al.map), key.pid); + } + + addr_location__exit(&al); + return remap_addr; +} + struct aslr_machine_priv { bool kernel_maps_loaded; }; @@ -616,13 +671,415 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { - struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); - struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); - struct perf_tool *delegate = aslr->tool.delegate; + struct evsel *evsel = sample->evsel; + struct delegate_tool *del_tool; + struct aslr_tool *aslr; + struct perf_tool *delegate; + int ret; + u64 sample_type; + struct thread *thread; + struct machine *aslr_machine; + __u64 max_i; + __u64 max_j; + union perf_event *new_event; + struct perf_sample new_sample; + __u64 *in_array, *out_array; + u8 cpumode; + u64 addr; + size_t i; + size_t j; + struct aslr_evsel_priv *priv = NULL; + u64 orig_sample_type; + u64 orig_regs_user; + u64 orig_regs_intr; + + del_tool = container_of(tool, struct delegate_tool, tool); + aslr = container_of(del_tool, struct aslr_tool, tool); + delegate = aslr->tool.delegate; + + + + if (evsel__is_dummy_event(evsel)) + return delegate->sample(delegate, event, sample, machine); + + ret = -EFAULT; + sample_type = evsel->core.attr.sample_type; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); + max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); + new_event = (union perf_event *)aslr->event_copy; + cpumode = sample->cpumode; + i = 0; + j = 0; + + aslr_machine = machines__findnew(&aslr->machines, machine->pid); + if (!aslr_machine) + return -ENOMEM; + if (aslr_tool__preload_kernel_maps(aslr_machine) < 0) + return -ENOMEM; + + thread = machine__findnew_thread(aslr_machine, sample->pid, sample->tid); + + if (!thread) + return -ENOMEM; + + if (max_i > PERF_SAMPLE_MAX_SIZE / sizeof(u64)) + goto out_put; + + new_event->sample.header = event->sample.header; + + in_array = &event->sample.array[0]; + out_array = &new_event->sample.array[0]; + +#define CHECK_BOUNDS(required_i, required_j) \ + (i + (required_i) > max_i || j + (required_j) > max_j) + +#define COPY_U64() \ + do { \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + out_array[j++] = in_array[i++]; \ + } while (0) + +#define REMAP_U64(addr_field) \ + do { \ + u64 remapped; \ + if (CHECK_BOUNDS(1, 1)) { \ + ret = -EFAULT; \ + goto out_put; \ + } \ + remapped = aslr_tool__remap_address(aslr, thread, cpumode, addr_field); \ + out_array[j++] = remapped; \ + i++; \ + } while (0) + + if (sample_type & PERF_SAMPLE_IDENTIFIER) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_IP) + REMAP_U64(sample->ip); + if (sample_type & PERF_SAMPLE_TID) { + union { + u64 val64; + u32 val32[2]; + } u; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + u.val32[0] = sample->pid; + u.val32[1] = sample->tid; + out_array[j++] = u.val64; + i++; + } + if (sample_type & PERF_SAMPLE_TIME) + COPY_U64(); /* time */ + if (sample_type & PERF_SAMPLE_ADDR) + REMAP_U64(sample->addr); + if (sample_type & PERF_SAMPLE_ID) + COPY_U64(); /* id */ + if (sample_type & PERF_SAMPLE_STREAM_ID) + COPY_U64(); /* stream_id */ + if (sample_type & PERF_SAMPLE_CPU) + COPY_U64(); /* cpu, res */ + if (sample_type & PERF_SAMPLE_PERIOD) + COPY_U64(); /* period */ + if (sample_type & PERF_SAMPLE_READ) { + if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } else { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) + COPY_U64(); /* time_enabled */ + if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) + COPY_U64(); /* time_running */ + for (u64 cntr = 0; cntr < nr; cntr++) { + COPY_U64(); /* value */ + if (evsel->core.attr.read_format & PERF_FORMAT_ID) + COPY_U64(); /* id */ + if (evsel->core.attr.read_format & PERF_FORMAT_LOST) + COPY_U64(); /* lost */ + } + } + } + if (sample_type & PERF_SAMPLE_CALLCHAIN) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); + + for (u64 cntr = 0; cntr < nr; cntr++) { + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + addr = in_array[i++]; + if (addr >= PERF_CONTEXT_MAX) { + out_array[j++] = addr; + switch (addr) { + case PERF_CONTEXT_HV: + cpumode = PERF_RECORD_MISC_HYPERVISOR; + break; + case PERF_CONTEXT_KERNEL: + cpumode = PERF_RECORD_MISC_KERNEL; + break; + case PERF_CONTEXT_USER: + cpumode = PERF_RECORD_MISC_USER; + break; + case PERF_CONTEXT_GUEST: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_KERNEL: + cpumode = PERF_RECORD_MISC_GUEST_KERNEL; + break; + case PERF_CONTEXT_GUEST_USER: + cpumode = PERF_RECORD_MISC_GUEST_USER; + break; + case PERF_CONTEXT_USER_DEFERRED: + if (cntr + 1 >= nr) { + pr_debug("Truncated callchain deferred cookie context\n"); + ret = 0; + goto out_put; + } + /* + * Immediately followed by a 64-bit + * stitching cookie. Skip/Copy it! + */ + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j++] = in_array[i++]; + cntr++; + cpumode = PERF_RECORD_MISC_USER; + break; + default: + pr_debug("invalid callchain context: %"PRIx64"\n", addr); + ret = 0; + goto out_put; + } + continue; + } + addr = aslr_tool__remap_address(aslr, thread, cpumode, addr); + out_array[j++] = addr; + } + } + if (sample_type & PERF_SAMPLE_RAW) { + size_t bytes = sizeof(u32) + sample->raw_size; + size_t u64_words = (bytes + 7) / 8; + + if (i + u64_words > max_i || j + u64_words > max_j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], bytes); + i += u64_words; + j += u64_words; + /* + * TODO: certain raw samples can be remapped, such as + * tracepoints by examining their fields. + */ + pr_debug("Dropping raw samples as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + u64 nr; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + nr = in_array[i]; + COPY_U64(); + + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_HW_INDEX) + COPY_U64(); /* hw_idx */ + + if (nr > (ULLONG_MAX / 3)) { + ret = -EFAULT; + goto out_put; + } + if (nr * 3 > max_i - i || nr * 3 > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) { + u64 from = in_array[i++]; + u64 to = in_array[i++]; + + from = aslr_tool__remap_address(aslr, thread, sample->cpumode, from); + to = aslr_tool__remap_address(aslr, thread, sample->cpumode, to); + + out_array[j++] = from; + out_array[j++] = to; + out_array[j++] = in_array[i++]; /* flags */ + } + if (evsel->core.attr.branch_sample_type & PERF_SAMPLE_BRANCH_COUNTERS) { + if (nr > max_i - i || nr > max_j - j) { + ret = -EFAULT; + goto out_put; + } + for (u64 cntr = 0; cntr < nr; cntr++) + COPY_U64(); + } + } + if (sample_type & PERF_SAMPLE_REGS_USER) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping regs user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_STACK_USER) { + u64 size; - return delegate->sample(delegate, event, sample, machine); + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + size = in_array[i]; + COPY_U64(); + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping stack user sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + COPY_U64(); /* perf_sample_weight */ + if (sample_type & PERF_SAMPLE_DATA_SRC) + COPY_U64(); /* data_src */ + if (sample_type & PERF_SAMPLE_TRANSACTION) + COPY_U64(); /* transaction */ + if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (CHECK_BOUNDS(1, 0)) { + ret = -EFAULT; + goto out_put; + } + /* abi */ + COPY_U64(); + /* TODO: can this be less conservative? */ + pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + COPY_U64(); /* phys_addr */ + /* TODO: can this be less conservative? */ + pr_debug("Dropping physical address sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + if (sample_type & PERF_SAMPLE_CGROUP) + COPY_U64(); /* cgroup */ + if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + COPY_U64(); /* data_page_size */ + if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + COPY_U64(); /* code_page_size */ + + if (sample_type & PERF_SAMPLE_AUX) { + u64 size; + + if (CHECK_BOUNDS(1, 1)) { + ret = -EFAULT; + goto out_put; + } + out_array[j] = in_array[i]; + size = out_array[j++]; + i++; + if (size > 0) { + size_t u64_words = size / 8 + (size % 8 ? 1 : 0); + + if (u64_words > max_i - i || u64_words > max_j - j) { + ret = -EFAULT; + goto out_put; + } + memcpy(&out_array[j], &in_array[i], size); + if (size % 8) { + size_t pad = 8 - (size % 8); + + memset(((char *)&out_array[j]) + size, 0, pad); + } + i += u64_words; + j += u64_words; + } + /* TODO: can this be less conservative? */ + pr_debug("Dropping aux sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + if (evsel__is_offcpu_event(evsel)) { + /* TODO: can this be less conservative? */ + pr_debug("Dropping off-CPU sample as possible ASLR leak\n"); + ret = 0; + goto out_put; + } + + new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); + + perf_sample__init(&new_sample, /*all=*/ true); + ret = __evsel__parse_sample(evsel, new_event, &new_sample, /*needs_swap=*/false); + + if (ret) { + perf_sample__exit(&new_sample); + goto out_put; + } + + new_sample.evsel = evsel; + ret = delegate->sample(delegate, new_event, &new_sample, machine); + perf_sample__exit(&new_sample); + +out_put: + thread__put(thread); + return ret; } +#undef CHECK_BOUNDS +#undef COPY_U64 +#undef REMAP_U64 + static int skipn(int fd, off_t n) { char buf[4096]; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 34c03f47a913..05fa0010c858 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -3337,11 +3337,11 @@ static int __set_offcpu_sample(struct perf_sample *data) return -EFAULT; } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *data) +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap) { u64 type = evsel->core.attr.sample_type; - bool swapped = evsel->needs_swap; + bool swapped = needs_swap; const __u64 *array; u16 max_size = event->header.size; const void *endp = (void *)event + max_size; diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 8178858d168a..8009be22cc3f 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -432,8 +432,14 @@ static inline int evsel__read_on_cpu_scaled(struct evsel *evsel, int cpu_map_idx return __evsel__read_on_cpu(evsel, cpu_map_idx, thread, true); } -int evsel__parse_sample(struct evsel *evsel, union perf_event *event, - struct perf_sample *sample); +int __evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data, bool needs_swap); + +static inline int evsel__parse_sample(struct evsel *evsel, union perf_event *event, + struct perf_sample *data) +{ + return __evsel__parse_sample(evsel, event, data, evsel->needs_swap); +} int evsel__parse_sample_timestamp(struct evsel *evsel, union perf_event *event, u64 *timestamp); -- 2.54.0.1099.g489fc7bff1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v20 3/5] perf inject/aslr: Implement sample address remapping 2026-06-11 16:41 ` [PATCH v20 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-11 17:41 ` sashiko-bot 0 siblings, 0 replies; 183+ messages in thread From: sashiko-bot @ 2026-06-11 17:41 UTC (permalink / raw) To: Ian Rogers; +Cc: linux-perf-users Thank you for your contribution! Sashiko AI review found 2 potential issue(s) to consider: - [High] Addresses that cannot be remapped are destructively overwritten with 0. - [Medium] Missing union-based repacking for PERF_SAMPLE_CPU causes cross-endian event synthesis failure. -- commit 4363deb515e79cb7e3488a5d19b247858fb88edd Author: Gabriel Marin <gmx@google.com> Add the sample address remapping logic to the ASLR tool. This patch implements sample event processing for the ASLR tool, parsing sample events and remapping addresses like IPs, callchains, and branch stacks. It also adds the aslr_tool__remap_address helper function. > diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c > --- a/tools/perf/util/aslr.c > +++ b/tools/perf/util/aslr.c > @@ -112,6 +113,60 @@ static u64 round_up_to_page_size(u64 addr) > return (addr + page_size - 1) & ~((u64)page_size - 1); > } > > +static u64 aslr_tool__remap_address(struct aslr_tool *aslr, > + struct thread *aslr_thread, > + u8 cpumode, > + u64 addr) > +{ > + struct addr_location al; > + struct remap_addresses_key key; > + u64 *remapped_invariant_ptr = NULL; > + u64 remap_addr = 0; [ ... ] > + if (!thread__find_map(aslr_thread, effective_cpumode, addr, &al)) { > + addr_location__exit(&al); > + return 0; /* No mmap. */ [Severity: High] Does returning 0 here destructively overwrite valid non-randomized addresses? If an address cannot be mapped (such as JIT code, vsyscall, or missing map regions), returning 0 unconditionally overwrites the sample's IP, ADDR, callchains, and branch stacks. Should this return the original addr instead to avoid clustering profiler trace data at NULL? > + } > + } > + > + key.machine = maps__machine(thread__maps(aslr_thread)); > + key.dso = map__dso(al.map); > + key.invariant = map__start(al.map) - map__pgoff(al.map); > + key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || > + effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? > + kernel_pid : thread__pid(aslr_thread); > + > + if (hashmap__find(&aslr->remap_addresses, &key, &remapped_invariant_ptr)) { > + remap_addr = *remapped_invariant_ptr + map__pgoff(al.map) + > + (addr - map__start(al.map)); > + } else { > + pr_debug("Cannot find a remapped entry for address %lx in mapping %lx(%lx) for pid=%d\n", > + addr, map__start(al.map), map__size(al.map), key.pid); > + } > + > + addr_location__exit(&al); > + return remap_addr; [Severity: High] Similarly, when the mapping is not in the remap_addresses hashmap, this will return remap_addr which was initialized to 0. Would it be safer to default remap_addr to the original addr instead of 0? > +} [ ... ] > + if (sample_type & PERF_SAMPLE_ID) > + COPY_U64(); /* id */ > + if (sample_type & PERF_SAMPLE_STREAM_ID) > + COPY_U64(); /* stream_id */ > + if (sample_type & PERF_SAMPLE_CPU) > + COPY_U64(); /* cpu, res */ [Severity: Medium] The commit message states that both PERF_SAMPLE_TID and PERF_SAMPLE_CPU must be unpacked and repacked using unions to ensure correct 32-bit layout alignment for cross-endian hosts. However, it looks like PERF_SAMPLE_CPU blindly uses COPY_U64() here instead of a union. Since the generic perf event parsing layer byte-swaps the 64-bit payload array, this reverses the internal order of the two 32-bit fields (cpu and res). Does copying this improperly swapped 64-bit word verbatim write the reversed fields directly to the output, corrupting them when reparsed with needs_swap=false? > + if (sample_type & PERF_SAMPLE_PERIOD) > + COPY_U64(); /* period */ -- Sashiko AI review · https://sashiko.dev/#/patchset/20260611164122.3974068-1-irogers@google.com?part=3 ^ permalink raw reply [flat|nested] 183+ messages in thread
* [PATCH v20 4/5] perf aslr: Strip sample registers 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers ` (2 preceding siblings ...) 2026-06-11 16:41 ` [PATCH v20 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers @ 2026-06-11 16:41 ` Ian Rogers 2026-06-11 16:41 ` [PATCH v20 5/5] perf test: Add inject ASLR test Ian Rogers 2026-06-11 18:29 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-11 16:41 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz Extend the ASLR tool stripping helpers to drop register dump payloads by masking out the relevant perf_event_attr fields (sample_regs_user, sample_regs_intr) when the delegated tool is handling the data. struct aslr_evsel_priv maintains the original perf_event_attr values and is looked up via the evsel_orig_attrs hashmap so that sample sizes can be properly parsed even when bits are stripped from the pipeline. This is critical for bounded array copying within aslr_tool__process_sample, which relies on orig_sample_type to determine exactly which fields were captured by the kernel before any stripping occurred. This allows us to keep samples that would otherwise be dropped because they contain registers, while still obfuscating the registers. Co-developed-by: Gabriel Marin <gmx@google.com> Signed-off-by: Gabriel Marin <gmx@google.com> Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/builtin-inject.c | 28 +++- tools/perf/util/aslr.c | 263 +++++++++++++++++++++++++++--------- tools/perf/util/aslr.h | 9 +- 3 files changed, 230 insertions(+), 70 deletions(-) diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 8bb37095e2de..6d6cce4765a7 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -248,7 +248,7 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, if (!aslr_event) return -ENOMEM; memcpy(aslr_event, event, event->header.size); - aslr_tool__strip_attr_event(aslr_event, pevlist); + aslr_tool__strip_attr_event(aslr_event, *pevlist); event = aslr_event; } @@ -297,6 +297,7 @@ static int perf_event__repipe_attr(const struct perf_tool *tool, attr.size = sizeof(struct perf_event_attr); attr.sample_type &= ~PERF_SAMPLE_AUX; + if (inject->itrace_synth_opts.add_last_branch) { attr.sample_type |= PERF_SAMPLE_BRANCH_STACK; attr.branch_sample_type |= PERF_SAMPLE_BRANCH_HW_INDEX; @@ -2617,6 +2618,10 @@ static int __cmd_inject(struct perf_inject *inject) evsel->core.attr.exclude_callchain_user = 0; } } + + if (inject->aslr) + aslr_tool__strip_evlist(inject->session->tool, session->evlist); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__inject_header(session, session->evlist, fd, &inj_fc.fc, @@ -2875,6 +2880,18 @@ int cmd_inject(int argc, const char **argv) if (zstd_init(&(inject.session->zstd_data), 0) < 0) pr_warning("Decompression initialization failed.\n"); + if (inject.aslr) { + struct evsel *evsel; + + evlist__for_each_entry(inject.session->evlist, evsel) { + ret = aslr_tool__cache_orig_attrs(tool, evsel); + if (ret) { + pr_err("Failed to cache original attributes: %d\n", ret); + goto out_delete; + } + } + } + /* Save original section info before feature bits change */ ret = save_section_info(&inject); if (ret) @@ -2893,10 +2910,17 @@ int cmd_inject(int argc, const char **argv) * the input. */ if (!data.is_pipe) { + if (inject.aslr) + aslr_tool__strip_evlist(tool, inject.session->evlist); + ret = perf_event__synthesize_for_pipe(&inject.tool, inject.session, &inject.output, perf_event__repipe); + + if (inject.aslr) + aslr_tool__restore_evlist(tool, inject.session->evlist); + if (ret < 0) goto out_delete; } @@ -2961,8 +2985,6 @@ int cmd_inject(int argc, const char **argv) goto out_delete; ret = __cmd_inject(&inject); - if (inject.aslr) - aslr_tool__strip_evlist(tool, inject.session->evlist); guest_session__exit(&inject.guest_session); diff --git a/tools/perf/util/aslr.c b/tools/perf/util/aslr.c index c4602a43e04f..64d447565a1f 100644 --- a/tools/perf/util/aslr.c +++ b/tools/perf/util/aslr.c @@ -18,6 +18,7 @@ #include <internal/lib.h> /* page_size */ #include <linux/compiler.h> #include <linux/zalloc.h> +#include <errno.h> #include <inttypes.h> #include <unistd.h> #include <byteswap.h> @@ -46,6 +47,23 @@ struct aslr_mapping { u64 remap_start; }; +struct aslr_evsel_priv { + u64 orig_sample_type; + u64 orig_sample_regs_user; + u64 orig_sample_regs_intr; + int orig_sample_size; +}; + +static size_t evsel_hash(long key, void *ctx __maybe_unused) +{ + return (size_t)key; +} + +static bool evsel_equal(long key1, long key2, void *ctx __maybe_unused) +{ + return key1 == key2; +} + struct process_top_address { u64 remapped_max; }; @@ -60,6 +78,11 @@ struct aslr_tool { struct hashmap remap_addresses; /** @top_addresses: mapping from process to max remapped address. */ struct hashmap top_addresses; + /** + * @evsel_orig_attrs: mapping from evsel pointer to its original + * unstripped sample_type and registers bitmasks. + */ + struct hashmap evsel_orig_attrs; }; static const pid_t kernel_pid = -1; @@ -123,6 +146,8 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, u64 *remapped_invariant_ptr = NULL; u64 remap_addr = 0; u8 effective_cpumode = cpumode; + struct dso *dso; + const char *dso_name; if (!aslr_thread) return 0; /* No thread. */ @@ -148,9 +173,15 @@ static u64 aslr_tool__remap_address(struct aslr_tool *aslr, } } + dso = map__dso(al.map); + dso_name = dso ? dso__long_name(dso) : NULL; + key.machine = maps__machine(thread__maps(aslr_thread)); - key.dso = map__dso(al.map); - key.invariant = map__start(al.map) - map__pgoff(al.map); + key.dso = dso; + if (dso && !is_anon_memory(dso_name) && !is_no_dso_memory(dso_name)) + key.invariant = map__start(al.map) - map__pgoff(al.map); + else + key.invariant = map__start(al.map); key.pid = (effective_cpumode == PERF_RECORD_MISC_KERNEL || effective_cpumode == PERF_RECORD_MISC_GUEST_KERNEL) ? kernel_pid : thread__pid(aslr_thread); @@ -676,6 +707,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, struct aslr_tool *aslr; struct perf_tool *delegate; int ret; + int orig_sample_size; u64 sample_type; struct thread *thread; struct machine *aslr_machine; @@ -693,6 +725,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, u64 orig_regs_user; u64 orig_regs_intr; + del_tool = container_of(tool, struct delegate_tool, tool); aslr = container_of(del_tool, struct aslr_tool, tool); delegate = aslr->tool.delegate; @@ -703,7 +736,24 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, return delegate->sample(delegate, event, sample, machine); ret = -EFAULT; - sample_type = evsel->core.attr.sample_type; + + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + orig_sample_type = priv->orig_sample_type; + orig_regs_user = priv->orig_sample_regs_user; + orig_regs_intr = priv->orig_sample_regs_intr; + } else { + orig_sample_type = evsel->core.attr.sample_type; + orig_regs_user = evsel->core.attr.sample_regs_user; + orig_regs_intr = evsel->core.attr.sample_regs_intr; + } + + orig_sample_size = evsel->sample_size; + + sample_type = orig_sample_type; + sample_type &= ~PERF_SAMPLE_REGS_USER; + sample_type &= ~PERF_SAMPLE_REGS_INTR; + sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + max_i = (event->header.size - sizeof(struct perf_event_header)) / sizeof(__u64); max_j = (PERF_SAMPLE_MAX_SIZE - sizeof(struct perf_event_header)) / sizeof(__u64); new_event = (union perf_event *)aslr->event_copy; @@ -754,11 +804,11 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, i++; \ } while (0) - if (sample_type & PERF_SAMPLE_IDENTIFIER) + if (orig_sample_type & PERF_SAMPLE_IDENTIFIER) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_IP) + if (orig_sample_type & PERF_SAMPLE_IP) REMAP_U64(sample->ip); - if (sample_type & PERF_SAMPLE_TID) { + if (orig_sample_type & PERF_SAMPLE_TID) { union { u64 val64; u32 val32[2]; @@ -773,19 +823,19 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = u.val64; i++; } - if (sample_type & PERF_SAMPLE_TIME) + if (orig_sample_type & PERF_SAMPLE_TIME) COPY_U64(); /* time */ - if (sample_type & PERF_SAMPLE_ADDR) + if (orig_sample_type & PERF_SAMPLE_ADDR) REMAP_U64(sample->addr); - if (sample_type & PERF_SAMPLE_ID) + if (orig_sample_type & PERF_SAMPLE_ID) COPY_U64(); /* id */ - if (sample_type & PERF_SAMPLE_STREAM_ID) + if (orig_sample_type & PERF_SAMPLE_STREAM_ID) COPY_U64(); /* stream_id */ - if (sample_type & PERF_SAMPLE_CPU) + if (orig_sample_type & PERF_SAMPLE_CPU) COPY_U64(); /* cpu, res */ - if (sample_type & PERF_SAMPLE_PERIOD) + if (orig_sample_type & PERF_SAMPLE_PERIOD) COPY_U64(); /* period */ - if (sample_type & PERF_SAMPLE_READ) { + if (orig_sample_type & PERF_SAMPLE_READ) { if ((evsel->core.attr.read_format & PERF_FORMAT_GROUP) == 0) { COPY_U64(); /* value */ if (evsel->core.attr.read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) @@ -818,7 +868,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } } } - if (sample_type & PERF_SAMPLE_CALLCHAIN) { + if (orig_sample_type & PERF_SAMPLE_CALLCHAIN) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -884,7 +934,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, out_array[j++] = addr; } } - if (sample_type & PERF_SAMPLE_RAW) { + if (orig_sample_type & PERF_SAMPLE_RAW) { size_t bytes = sizeof(u32) + sample->raw_size; size_t u64_words = (bytes + 7) / 8; @@ -903,7 +953,7 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_BRANCH_STACK) { + if (orig_sample_type & PERF_SAMPLE_BRANCH_STACK) { u64 nr; if (CHECK_BOUNDS(1, 1)) { @@ -944,19 +994,25 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, COPY_U64(); } } - if (sample_type & PERF_SAMPLE_REGS_USER) { + if (orig_sample_type & PERF_SAMPLE_REGS_USER) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping regs user sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_user); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_STACK_USER) { + if (orig_sample_type & PERF_SAMPLE_STACK_USER) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -986,39 +1042,45 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_WEIGHT_TYPE) + if (orig_sample_type & PERF_SAMPLE_WEIGHT_TYPE) COPY_U64(); /* perf_sample_weight */ - if (sample_type & PERF_SAMPLE_DATA_SRC) + if (orig_sample_type & PERF_SAMPLE_DATA_SRC) COPY_U64(); /* data_src */ - if (sample_type & PERF_SAMPLE_TRANSACTION) + if (orig_sample_type & PERF_SAMPLE_TRANSACTION) COPY_U64(); /* transaction */ - if (sample_type & PERF_SAMPLE_REGS_INTR) { + if (orig_sample_type & PERF_SAMPLE_REGS_INTR) { + u64 abi; + if (CHECK_BOUNDS(1, 0)) { ret = -EFAULT; goto out_put; } - /* abi */ - COPY_U64(); - /* TODO: can this be less conservative? */ - pr_debug("Dropping interrupt register sample as possible ASLR leak\n"); - ret = 0; - goto out_put; + abi = in_array[i++]; + if (abi != PERF_SAMPLE_REGS_ABI_NONE) { + u64 nr = hweight64(orig_regs_intr); + + if (nr > max_i - i) { + ret = -EFAULT; + goto out_put; + } + i += nr; + } } - if (sample_type & PERF_SAMPLE_PHYS_ADDR) { + if (orig_sample_type & PERF_SAMPLE_PHYS_ADDR) { COPY_U64(); /* phys_addr */ /* TODO: can this be less conservative? */ pr_debug("Dropping physical address sample as possible ASLR leak\n"); ret = 0; goto out_put; } - if (sample_type & PERF_SAMPLE_CGROUP) + if (orig_sample_type & PERF_SAMPLE_CGROUP) COPY_U64(); /* cgroup */ - if (sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_DATA_PAGE_SIZE) COPY_U64(); /* data_page_size */ - if (sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) + if (orig_sample_type & PERF_SAMPLE_CODE_PAGE_SIZE) COPY_U64(); /* code_page_size */ - if (sample_type & PERF_SAMPLE_AUX) { + if (orig_sample_type & PERF_SAMPLE_AUX) { u64 size; if (CHECK_BOUNDS(1, 1)) { @@ -1058,11 +1120,20 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, } new_event->sample.header.size = sizeof(struct perf_event_header) + j * sizeof(u64); - + /* Temporarily override evsel attributes to match the stripped new_event format! */ + evsel->sample_size = __evsel__sample_size(sample_type); + evsel->core.attr.sample_type = sample_type; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; perf_sample__init(&new_sample, /*all=*/ true); ret = __evsel__parse_sample(evsel, new_event, &new_sample, /*needs_swap=*/false); if (ret) { + /* Restore original attributes immediately if parsing fails */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; perf_sample__exit(&new_sample); goto out_put; } @@ -1071,6 +1142,12 @@ static int aslr_tool__process_sample(const struct perf_tool *tool, ret = delegate->sample(delegate, new_event, &new_sample, machine); perf_sample__exit(&new_sample); + /* Restore original attributes so trace ingestion never desynchronizes! */ + evsel->sample_size = orig_sample_size; + evsel->core.attr.sample_type = orig_sample_type; + evsel->core.attr.sample_regs_user = orig_regs_user; + evsel->core.attr.sample_regs_intr = orig_regs_intr; + out_put: thread__put(thread); return ret; @@ -1124,15 +1201,22 @@ static int aslr_tool__process_auxtrace_error(const struct perf_tool *tool __mayb return 0; } - -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist __maybe_unused) +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist) { u32 attr_size; + if (!evlist) + return; + attr_size = event->attr.attr.size ?: PERF_ATTR_SIZE_VER0; if (attr_size >= (offsetof(struct perf_event_attr, sample_type) + sizeof(u64))) { event->attr.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + + if (attr_size >= (offsetof(struct perf_event_attr, sample_regs_user) + sizeof(u64))) + event->attr.attr.sample_regs_user = 0; + if (attr_size >= (offsetof(struct perf_event_attr, sample_regs_intr) + sizeof(u64))) + event->attr.attr.sample_regs_intr = 0; } if (attr_size >= (offsetof(struct perf_event_attr, type) + sizeof(u32))) { @@ -1156,28 +1240,6 @@ void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlis } } -void aslr_tool__strip_evlist(struct perf_tool *tool __maybe_unused, - struct evlist *evlist) -{ - struct evsel *evsel; - - evlist__for_each_entry(evlist, evsel) { - evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; - - if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) - evsel->core.attr.bp_addr = 0; - else if (evsel->core.attr.type >= PERF_TYPE_MAX) { - struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); - - if (pmu && (!strcmp(pmu->name, "kprobe") || - !strcmp(pmu->name, "uprobe"))) { - evsel->core.attr.config1 = 0; - evsel->core.attr.config2 = 0; - } - } - } -} - static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) { delegate_tool__init(&aslr->tool, delegate); @@ -1191,6 +1253,9 @@ static void aslr_tool__init(struct aslr_tool *aslr, struct perf_tool *delegate) hashmap__init(&aslr->top_addresses, top_addresses__hash, top_addresses__equal, /*ctx=*/NULL); + hashmap__init(&aslr->evsel_orig_attrs, + evsel_hash, evsel_equal, + /*ctx=*/NULL); aslr->tool.tool.sample = aslr_tool__process_sample; /* read - reads a counter, okay to delegate. */ @@ -1253,9 +1318,13 @@ void aslr_tool__delete(struct perf_tool *tool) zfree(&cur->pkey); zfree(&cur->pvalue); } + hashmap__for_each_entry(&aslr->evsel_orig_attrs, cur, bkt) { + zfree(&cur->pvalue); + } hashmap__clear(&aslr->remap_addresses); hashmap__clear(&aslr->top_addresses); + hashmap__clear(&aslr->evsel_orig_attrs); aslr_tool__destroy_machines_priv(&aslr->machines); machines__destroy_kernel_maps(&aslr->machines); @@ -1269,3 +1338,69 @@ void aslr_tool__delete(struct perf_tool *tool) machines__exit(&aslr->machines); free(aslr); } + +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel) +{ + struct delegate_tool *del_tool = container_of(tool, struct delegate_tool, tool); + struct aslr_tool *aslr = container_of(del_tool, struct aslr_tool, tool); + struct aslr_evsel_priv *priv = zalloc(sizeof(*priv)); + int err; + + if (!priv) + return -ENOMEM; + + priv->orig_sample_type = evsel->core.attr.sample_type; + priv->orig_sample_regs_user = evsel->core.attr.sample_regs_user; + priv->orig_sample_regs_intr = evsel->core.attr.sample_regs_intr; + priv->orig_sample_size = evsel->sample_size; + + err = hashmap__add(&aslr->evsel_orig_attrs, evsel, priv); + if (err) { + free(priv); + return err; + } + return 0; +} + +void aslr_tool__strip_evlist(const struct perf_tool *tool __maybe_unused, struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + evsel->core.attr.sample_type &= ASLR_SUPPORTED_SAMPLE_TYPE; + evsel->core.attr.sample_regs_user = 0; + evsel->core.attr.sample_regs_intr = 0; + evsel->sample_size = __evsel__sample_size(evsel->core.attr.sample_type); + evsel__calc_id_pos(evsel); + + if (evsel->core.attr.type == PERF_TYPE_BREAKPOINT) { + evsel->core.attr.bp_addr = 0; + } else if (evsel->core.attr.type >= PERF_TYPE_MAX) { + struct perf_pmu *pmu = perf_pmus__find_by_type(evsel->core.attr.type); + + if (pmu && (!strcmp(pmu->name, "kprobe") || + !strcmp(pmu->name, "uprobe"))) { + evsel->core.attr.config1 = 0; + evsel->core.attr.config2 = 0; + } + } + } +} + +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist) +{ + const struct delegate_tool *del_tool = container_of(tool, const struct delegate_tool, tool); + const struct aslr_tool *aslr = container_of(del_tool, const struct aslr_tool, tool); + struct evsel *evsel; + struct aslr_evsel_priv *priv; + + evlist__for_each_entry(evlist, evsel) { + if (hashmap__find(&aslr->evsel_orig_attrs, evsel, &priv)) { + evsel->core.attr.sample_type = priv->orig_sample_type; + evsel->core.attr.sample_regs_user = priv->orig_sample_regs_user; + evsel->core.attr.sample_regs_intr = priv->orig_sample_regs_intr; + evsel->sample_size = priv->orig_sample_size; + evsel__calc_id_pos(evsel); + } + } +} diff --git a/tools/perf/util/aslr.h b/tools/perf/util/aslr.h index 2b82f711bc67..522e31c8e2c0 100644 --- a/tools/perf/util/aslr.h +++ b/tools/perf/util/aslr.h @@ -34,8 +34,11 @@ struct evlist; union perf_event; struct perf_tool *aslr_tool__new(struct perf_tool *delegate); -void aslr_tool__delete(struct perf_tool *aslr); -void aslr_tool__strip_attr_event(union perf_event *event, struct evlist **pevlist); -void aslr_tool__strip_evlist(struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__delete(struct perf_tool *tool); + +void aslr_tool__strip_attr_event(union perf_event *event, struct evlist *evlist); +int aslr_tool__cache_orig_attrs(struct perf_tool *tool, struct evsel *evsel); +void aslr_tool__strip_evlist(const struct perf_tool *tool, struct evlist *evlist); +void aslr_tool__restore_evlist(const struct perf_tool *tool, struct evlist *evlist); #endif /* __PERF_ASLR_H */ -- 2.54.0.1099.g489fc7bff1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* [PATCH v20 5/5] perf test: Add inject ASLR test 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers ` (3 preceding siblings ...) 2026-06-11 16:41 ` [PATCH v20 4/5] perf aslr: Strip sample registers Ian Rogers @ 2026-06-11 16:41 ` Ian Rogers 2026-06-11 18:29 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-11 16:41 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz Add a new shell test to verify the feature. The test covers: - Basic address remapping for user space samples. - Pipe mode coverage for piped into. - Callchain address remapping. - Consistency of output before and after injection. - Pipe mode report consistency. - Dropping of samples that leak ASLR info (physical addresses). - Kernel address remapping (utilizing a dedicated kernel-intensive VFS dd workload to guarantee continuous timer interrupts sampling flow inside kernel privilege states). - Kernel report consistency with address normalization. The test suite is hardened with global 'set -o pipefail' assertions to catch pipeline failures, stream-consuming awk processors to handle SIGPIPE signals, and a dedicated pipe output scenario validating raw 'perf inject -o -' stdout streams. Note on kernel DSO normalization in the test script: The test script deliberately normalizes all kernel DSOs to a generic [kernel] tag before diffing, as obfuscating physical kernel addresses forces perf report to occasionally shift samples between individual modules and [kernel.kallsyms] due to the lack of valid host module boundary maps. Note on ARM: Kernel-based ASLR test cases (test_kernel_aslr and test_kernel_report_aslr) are skipped on ARM architectures (aarch64 and arm*) to bypass high latency constraints (such as check_invariants() spending excessive execution time in maps__split_kallsyms() on debug builds) and symbolization inconsistencies. Signed-off-by: Ian Rogers <irogers@google.com> Assisted-by: Antigravity:gemini-3.1-pro --- tools/perf/tests/shell/inject_aslr.sh | 533 ++++++++++++++++++++++++++ 1 file changed, 533 insertions(+) create mode 100755 tools/perf/tests/shell/inject_aslr.sh diff --git a/tools/perf/tests/shell/inject_aslr.sh b/tools/perf/tests/shell/inject_aslr.sh new file mode 100755 index 000000000000..c00461828ea7 --- /dev/null +++ b/tools/perf/tests/shell/inject_aslr.sh @@ -0,0 +1,533 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# perf inject --aslr test + +set -e +set -o pipefail + +shelldir=$(dirname "$0") +# shellcheck source=lib/perf_has_symbol.sh +. "${shelldir}"/lib/perf_has_symbol.sh + +sym="noploop" + +skip_test_missing_symbol ${sym} + +# Create global temp directory +temp_dir=$(mktemp -d /tmp/perf-test-aslr.XXXXXXXXXX) + +prog="perf test -w noploop" +[ "$(uname -m)" = "s390x" ] && prog="$prog 3" +err=0 +kprog="dd if=/dev/urandom of=/dev/null bs=1M count=50" + +cleanup() { + local exit_code=${1:-$?} + trap - EXIT TERM INT + if [ "${exit_code}" -ne 0 ] || [ "${err}" -ne 0 ]; then + echo "Test failed! Preserving temp directory: ${temp_dir}" + return + fi + # Check if temp_dir is set and looks sane before removing + if [[ "${temp_dir}" =~ ^/tmp/perf-test-aslr\. ]]; then + rm -rf "${temp_dir}" + fi +} + +trap_cleanup() { + local exit_code=$? + echo "Unexpected signal in ${FUNCNAME[1]}" + cleanup ${exit_code} + exit ${exit_code} +} +trap trap_cleanup EXIT TERM INT + +get_noploop_addr() { + local file=$1 + perf script -i "$file" | awk ' + BEGIN { found=0 } + { + for (i=1; i<=NF; i++) { + if ($i ~ /noploop\+/) { + if (!found) { + print $(i-1) + found=1 + } + } + } + }' +} + +test_basic_aslr() { + echo "Test basic ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.basic.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.basic.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -v --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Basic ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Basic ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Basic ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Basic ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Basic ASLR test [Success]" + fi +} + +test_pipe_aslr() { + echo "Test pipe mode ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe.XXXXXX") + + # Use tee to save the original pipe data for comparison + perf record -e task-clock:u -o - ${prog} | tee "${data}" | perf inject --aslr -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Pipe ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Pipe ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Pipe ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Pipe ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Pipe ASLR test [Success]" + fi +} + +test_callchain_aslr() { + echo "Test Callchain ASLR remapping" + local data + data=$(mktemp "${temp_dir}/perf.data.callchain.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.callchain.XXXXXX") + + perf record -g -e task-clock:u -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + orig_addr=$(get_noploop_addr "${data}") + new_addr=$(get_noploop_addr "${data2}") + + echo "Callchain ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Callchain ASLR test [Failed - no noploop samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Callchain ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Callchain ASLR test [Failed - addresses are not remapped]" + err=1 + else + # Extract callchain addresses (indented lines starting with hex addresses) + orig_callchain=$(perf script -i "${data}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + new_callchain=$(perf script -i "${data2}" | awk '/^[[:space:]]+[0-9a-f]+/ {print $1}') + + if [ -z "$orig_callchain" ]; then + echo "Callchain ASLR test [Failed - no callchain samples in original file]" + err=1 + elif [ -z "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain data was dropped]" + err=1 + elif [ "$orig_callchain" = "$new_callchain" ]; then + echo "Callchain ASLR test [Failed - callchain addresses were not remapped]" + err=1 + else + echo "Callchain ASLR test [Success]" + fi + fi +} + +test_report_aslr() { + echo "Test perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${data}" -o "${data_clean}" + perf inject -v -b --aslr -i "${data}" -o "${data2}" + + local report1="${temp_dir}/report1_basic" + local report2="${temp_dir}/report2_basic" + local report1_clean="${temp_dir}/report1_basic.clean" + local report2_clean="${temp_dir}/report2_basic.clean" + local diff_file="${temp_dir}/diff_basic" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Report ASLR test [Success]" + fi +} + +test_pipe_report_aslr() { + echo "Test pipe mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_report.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.pipe_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + # Use tee to save the original pipe data, then process it with inject -b + perf record -e task-clock:u -o - ${prog} | \ + tee "${data}" | \ + perf inject -b --aslr -o "${data2}" + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1_pipe" + local report2="${temp_dir}/report2_pipe" + local report1_clean="${temp_dir}/report1_pipe.clean" + local report2_clean="${temp_dir}/report2_pipe.clean" + local diff_file="${temp_dir}/diff_pipe" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${data2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Report ASLR test [Success]" + fi +} + +test_pipe_out_report_aslr() { + echo "Test pipe output mode perf report consistency" + local data + data=$(mktemp "${temp_dir}/perf.data.pipe_out_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + + perf record -e task-clock:u -o "${data}" ${prog} + perf inject -b -i "${data}" -o "${data_clean}" + + local report1="${temp_dir}/report1_pipe_out" + local report2="${temp_dir}/report2_pipe_out" + local report1_clean="${temp_dir}/report1_pipe_out.clean" + local report2_clean="${temp_dir}/report2_pipe_out.clean" + local diff_file="${temp_dir}/diff_pipe_out" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf inject -b --aslr -i "${data}" -o - | perf report -i - --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "Pipe Output Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Pipe Output Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Pipe Output Report ASLR test [Success]" + fi +} + +test_dropped_samples() { + echo "Test dropped samples (phys-data)" + local data + data=$(mktemp "${temp_dir}/perf.data.dropped.XXXXXX") + local data2 + data2=$(mktemp "${temp_dir}/perf.data2.dropped.XXXXXX") + + # Check if --phys-data is supported by recording a short run + if ! perf record -e task-clock:u --phys-data -o "${data}" -- sleep 0.1 > /dev/null 2>&1; then + echo "Skipping dropped samples test as --phys-data is not supported" + return + fi + + perf record -e task-clock:u --phys-data -o "${data}" ${prog} + perf inject --aslr -i "${data}" -o "${data2}" + + # Verify that the original file actually contained samples! + orig_samples=$(perf script -i "${data}" | wc -l) + if [ "$orig_samples" -eq 0 ]; then + echo "Dropped samples test [Failed - no samples in original file]" + err=1 + else + # Verify that samples are dropped. + samples_count=$(perf script -i "${data2}" | wc -l) + + if [ "$samples_count" -gt 0 ]; then + echo "Dropped samples test [Failed - samples were not dropped]" + err=1 + else + echo "Dropped samples test [Success]" + fi + fi +} + +test_kernel_aslr() { + echo "Test kernel ASLR remapping" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel ASLR test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel ASLR test as kernel map could not be recorded (permissions restricted)" + return + fi + + perf inject -v --aslr -i "${kdata}" -o "${kdata2}" + + # Check if kernel addresses are remapped. + # Find the field that ends with :k: (the event name) and take the next field! + orig_addr=$(perf script -i "${kdata}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + new_addr=$(perf script -i "${kdata2}" | awk ' + BEGIN { found=0 } + { + for (i=1; i<NF; i++) { + if ($i ~ /:[k]+:?$/) { + if (!found) { + print $(i+1) + found=1 + } + } + } + }') + + echo "Kernel ASLR: orig_addr=$orig_addr, new_addr=$new_addr" + + if [ -z "$orig_addr" ]; then + echo "Kernel ASLR test [Failed - no kernel samples in original file]" + err=1 + elif [ -z "$new_addr" ]; then + echo "Kernel ASLR test [Failed - could not find remapped address]" + err=1 + elif [ "$orig_addr" = "$new_addr" ]; then + echo "Kernel ASLR test [Failed - addresses are not remapped]" + err=1 + else + echo "Kernel ASLR test [Success]" + fi +} + +test_kernel_report_aslr() { + echo "Test kernel perf report consistency" + local kdata + kdata=$(mktemp "${temp_dir}/perf.data.kernel_report.XXXXXX") + local kdata2 + kdata2=$(mktemp "${temp_dir}/perf.data2.kernel_report.XXXXXX") + local data_clean + data_clean=$(mktemp "${temp_dir}/perf.data.clean.XXXXXX") + local log_file + log_file=$(mktemp "${temp_dir}/kernel_report_record.log.XXXXXX") + + # Try to record kernel samples + if ! perf record -e task-clock:k -o "${kdata}" ${kprog} > "${log_file}" 2>&1; then + echo "Skipping kernel report test as recording failed (maybe no permissions)" + return + fi + + # Check for warning about kernel map restriction + if grep -q "Couldn't record kernel reference relocation symbol" "${log_file}"; then + echo "Skipping kernel report test as kernel map could not be recorded (permissions restricted)" + return + fi + + # Use -b to inject build-ids and force ordered events processing in both + perf inject -b -i "${kdata}" -o "${data_clean}" + perf inject -v -b --aslr -i "${kdata}" -o "${kdata2}" + + local report1="${temp_dir}/report_kernel1" + local report2="${temp_dir}/report_kernel2" + local report1_clean="${temp_dir}/report_kernel1.clean" + local report2_clean="${temp_dir}/report_kernel2.clean" + + perf report -i "${data_clean}" --stdio > "${report1}" + perf report -i "${kdata2}" --stdio > "${report2}" + + # Strip headers and compare lines with percentages + grep '%' "${report1}" | grep -v '^#' > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' > "${report2_clean}" || true + + # Normalize kernel DSOs and addresses in clean reports + # This allows kernel modules to be either a module or kernel.kallsyms + local report1_norm="${temp_dir}/report_kernel1.norm" + local report2_norm="${temp_dir}/report_kernel2.norm" + local diff_file="${temp_dir}/diff_kernel" + + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report1_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report1_norm}" || true + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' "${report2_clean}" | \ + awk '{gsub(/\[[a-zA-Z0-9_.-]{2,}\](\.[a-zA-Z0-9_]+)?/, "[kernel]", $0); print}' | \ + sort > "${report2_norm}" || true + + diff -u -w "${report1_norm}" "${report2_norm}" > "${diff_file}" || true + + if [ ! -s "${report1_norm}" ]; then + echo "Kernel Report ASLR test [Failed - no samples captured]" + err=1 + elif [ -s "${diff_file}" ]; then + echo "Kernel Report ASLR test [Failed - reports differ]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + else + echo "Kernel Report ASLR test [Success]" + fi +} + +test_regs_stripping() { + echo "Test user register stripping" + local rdata="${temp_dir}/perf.data.regs" + local rdata2="${temp_dir}/perf.data.regs.injected" + local rdata_clean="${temp_dir}/perf.data.regs.clean" + + if ! perf record -e cycles:u --user-regs -o "${rdata}" ${prog} > /dev/null 2>&1; then + echo "Skipping user registers test as recording failed (unsupported flag/platform)" + return + fi + + perf inject -b -i "${rdata}" -o "${rdata_clean}" + perf inject -v -b --aslr -i "${rdata}" -o "${rdata2}" + + local report1="${temp_dir}/report_regs1" + local report2="${temp_dir}/report_regs2" + local report1_clean="${temp_dir}/report_regs1.clean" + local report2_clean="${temp_dir}/report_regs2.clean" + local diff_file="${temp_dir}/diff_regs" + + perf report -i "${rdata_clean}" --stdio > "${report1}" 2>/dev/null || true + perf report -i "${rdata2}" --stdio > "${report2}" 2>/dev/null || true + + grep '%' "${report1}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report1_clean}" || true + grep '%' "${report2}" | grep -v '^#' | \ + grep -v -E '0x[0-9a-f]{8,}|0000000000000000' | \ + sort > "${report2_clean}" || true + + diff -u -w "${report1_clean}" "${report2_clean}" > "${diff_file}" || true + + if [ ! -s "${report1_clean}" ]; then + echo "User registers stripping test [Failed - profile trace starved/empty]" + err=1 + return + elif [ -s "${diff_file}" ]; then + echo "User registers stripping test [Failed - report parsing differs]" + echo "Showing first 20 lines of diff:" + head -n 20 "${diff_file}" + err=1 + return + fi + + local script_dump="${temp_dir}/script_regs_dump" + perf script -D -i "${rdata2}" > "${script_dump}" 2>/dev/null || true + if grep -q "user regs:" "${script_dump}"; then + echo "User registers stripping test [Failed - register dumps still present]" + err=1 + else + echo "User registers stripping test [Success]" + fi +} + +test_basic_aslr +test_pipe_aslr +test_callchain_aslr +test_report_aslr +test_pipe_report_aslr +test_pipe_out_report_aslr +test_dropped_samples +case "$(uname -m)" in + aarch64*|arm*) + echo "Skipping kernel ASLR tests on ARM" + ;; + *) + test_kernel_aslr + test_kernel_report_aslr + ;; +esac + +test_regs_stripping + +cleanup ${err} +exit $err -- 2.54.0.1099.g489fc7bff1-goog ^ permalink raw reply related [flat|nested] 183+ messages in thread
* Re: [PATCH v20 0/5] perf tools: Add inject --aslr feature 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers ` (4 preceding siblings ...) 2026-06-11 16:41 ` [PATCH v20 5/5] perf test: Add inject ASLR test Ian Rogers @ 2026-06-11 18:29 ` Ian Rogers 5 siblings, 0 replies; 183+ messages in thread From: Ian Rogers @ 2026-06-11 18:29 UTC (permalink / raw) To: irogers, acme, james.clark, namhyung Cc: adrian.hunter, gmx, jolsa, linux-kernel, linux-perf-users, mingo, peterz On Thu, Jun 11, 2026 at 9:41 AM Ian Rogers <irogers@google.com> wrote: > > This patch series introduces the new 'perf inject --aslr' feature to > remap virtual memory addresses or drop physical memory event leaks > when profile record data is shared between machines. Bundled with this > feature is a bug fix inside the core map tracking tool that hardens > perf session analysis against concurrent lookup data races. > > Detailed Mechanism of MMAP Mapping and ASLR virtual Address Allocation: > > The ASLR tool virtualizes the address space of the recorded processes by > intercepting MMAP and MMAP2 events to build a consistent translation > database, which is subsequently used to rewrite sample addresses. > > It maintains two primary lookup databases using hash maps: > 1. 'remap_addresses': Maps an original mapping key to its new remapped > base address. The key uses topological invariant coordinates: > (machine, dso, invariant). The invariant is computed as (start - pgoff) > for DSO-backed mappings. This invariant remains constant even when > perf's internal overlap-resolution splits a VMA into fragmented > pieces, ensuring split maps resolve consistently back to the same > remapped base. > 2. 'top_addresses': Tracks the allocation state per process (machine, pid). > It maintains 'remapped_max' (the highest allocated address in the > virtualized space). > > For each MMAP/MMAP2 event: > - We look up the DSO and invariant key in 'remap_addresses'. If found, we > reuse the translation, preserving the offset within the mapping. > - If not found, we allocate a new remapped address space: > - We use thread__find_map to look up the mapping immediately preceding > the new one in the original address space (at start - 1). If > the preceding > mapping was also remapped, we place the new mapping > contiguously after it in the remapped space. This preserves > contiguity of split mappings (e.g., symbols split by HugeTLB, > or anonymous .bss segments adjacent to initialized data). > - If no contiguous mapping is found, we insert a 1-page gap from > the highest allocated address (remapped_max) to prevent accidental > merging of unrelated VMAs. > - The event's start address (and pgoff for kernel maps) is rewritten, > and the event is delegated to the output writer. > > To remain strictly conservative and guarantee security, the tool > scrubs breakpoint addresses (bp_addr) from all synthesized stream > headers, completely drops PERF_RECORD_TEXT_POKE events to prevent > absolute immediate pointer operands leaks, and drops unsupported > complex payloads (such as user register stacks, raw tracepoints, and > hardware AUX tracing frames). > > Verification is reinforced with shell test ('inject_aslr.sh'). > > Prerequisite Bug Fix (Patch 1). During development, a core map > indexing issue was identified and resolved to prevent concurrent > lookup data races during session analysis. > > Changes since v19: > - Patch 1: Group lock and unlock operations inside maps__mutate_mapping() into > a single conditional block to resolve Clang 15 -Wthread-safety-analysis > compilation errors. > - Patch 5: Skip kernel-based ASLR test cases (test_kernel_aslr and > test_kernel_report_aslr) on ARM architectures (aarch64 and arm*) to > bypass high latency constraints and symbolization inconsistencies. > > Changes since v18: > - Patch 2 & 3: Squashed the bounds checking boundary fixes into the "Strip > sample registers" patch. The array bounds checking now correctly uses > 'orig_sample_type' to traverse the event payload, preventing heap > corruption when dealing with events that have had their registers > stripped by the ASLR tool pipeline. > - Patch 2 & 3: Rebased the commit series to properly isolate the sample > address remapping logic from the register stripping logic. > - Patch 2 & 3: Expanded commit messages to extensively document the > cross-endian behavior of 'perf inject'. Because 'perf inject' effectively > acts as an endianness converter (writing a host-endian PERF_MAGIC and > flushing events exactly as they sit in memory after being byte-swapped > by perf_event__all64_swap), all injected events must be perfectly > constructed in the host's native endianness. Specifically, > perf_event__all64_swap byte-swaps the raw 64-bit payloads, which causes > 32-bit sequential fields like PERF_SAMPLE_TID (containing pid and tid) > to have their ordering reversed in memory (e.g., [BE_pid][BE_tid] becomes > [LE_tid][LE_pid]). The ASLR tool's sample construction logic was > expanded to explicitly unpack these fields and repack them sequentially > via unions to guarantee a strictly host-endian layout that resolves > these inversion anomalies. Similarly, branch stack flags (which are > modified in-place to host-endian bitfields by the parser) are copied > directly to the newly synthesized event, and 'needs_swap=false' is explicitly > used when re-parsing the synthesized event to prevent erroneous double > swapping. > - Series: Verified cross-endian robustness via the sashiko analyzer. > > Changes since v17: > - Patch 2: Reordered ksymbol deletion logic to ensure > `perf_event__process_ksymbol` deletes the map *after* the > `aslr_tool__findnew_mapping` translates the unregister offsets. > - Patch 2: Changed `aslr_tool__delete` to cleanly handle guest machine > deletion memory leaks. > - Patch 2: Resolved read-only segfaults on memory-mapped perf.data > headers during attribute stripping by using deep copies in > `perf_event__repipe_attr`. > - Patch 2: Fixed user space remap invariant logic to include > `(start - map__start(al.map))` preventing negative overflows on module > offset boundaries. > - Patch 3: Removed duplicate `bswap_64` payload byte-swapping inside the > array logic, allowing the host endianness macros `COPY_U64()` to > handle it dynamically. > - Patch 3: Fixed LBR branch sample starvation by explicitly reading branch > counters instead of dropping the entire sample. > - Patch 5: Fixed test flakiness by grepping out physical hex addresses > `0x[0-9a-f]{8,}` instead of matching exact address strings. > - Patch 5: Parameterized temp reports and updated test to scale with > `/dev/urandom` continuous random reads. > - Patch Series: Added Signed-off-by tags uniformly and Assisted-by tags to > track assistance. > > Changes since v16: > - Patch 2: Refactored inline ASLR stripping logic out of builtin-inject.c > and into dedicated helpers (aslr_tool__strip_attr_event and > aslr_tool__strip_evlist) in aslr.c to better separate concerns. > - Patch 2: Fixed guest machine allocation memory leak in > aslr_tool__delete() where machines__exit() explicitly skipped freeing > the guest processes tree. > - Patch 3: Fixed bounds-check violations during cross-endian parsing inside > aslr_tool__process_sample() by correctly applying bswap_64() to raw > offsets, iteration counts, sizes, and addresses prior to logical > evaluation when orig_needs_swap is active. > - Patch 4: Fixed pipe mode parser misalignment bug by safely fetching > needs_swap from the initialized evsel rather than blindly intercepting > HEADER_ATTR events prior to session parsing. > - Patch 4: Resolved checkpatch.pl line length warnings in the bswap_64 > endianness swapping logic. > - Patch Series: Reordered the final two patches. "perf aslr: Strip > sample registers" is now Patch 4, and "perf test: Add inject ASLR > test" is now Patch 5. This ensures the register stripping logic > is fully introduced before the comprehensive shell tests validate it, > preventing bisectability test failures and easing merge conflicts. > - Patch 5: Fixed "User registers stripping test" starvation when run as > root by explicitly using '-e cycles:u' during recording, preventing > the ring buffer from overflowing with kernel samples. > > Changes since v15: > - Patch 2: Added bounds checking for event->header.size before writing > to breakpoint fields to avoid heap buffer overflow on older ABI events. > - Patch 2: Fixed asymmetric calculation bug in aslr_tool__findnew_mapping() > where pgoff for anonymous kernel memory was not properly subtracted upon > insertion, causing the lookup addition to overflow. > - Patch 2: Added detailed comments documenting the symmetric lookup and > insertion math for unmapped and mapped memory blocks. > - Patch 5: Add missing kprobe and uprobe scrubbing of config1 and > config2 during aslr_tool__strip_evlist() to strictly conform with > repipe constraints. > > Changes since v14: > - Patch 2: Removed unnecessary vertical whitespace in builtin-inject.c. > - Patch 2: Added comments explaining why pgoff is assigned for > anonymous memory maps to prevent ASLR leaks. > - Patch 2: Removed orig_last_end tracking and refactored contiguous mapping > detection to use thread__find_map(..., start - 1, ...) based on Gabriel's > feedback. > - Patch 2: Scrub kprobe/uprobe event config1 and config2 fields to prevent > address leaks. > - Patch 2: Overwrite pgoff with the remapped start address for anonymous > mappings (detected via is_anon_memory and is_no_dso_memory). > - Patch 3: Fix C90 mixed declaration error for orig_needs_swap. > - Patch 3: Temporarily disable evsel->needs_swap during the secondary > evsel__parse_sample() call to prevent branch stack double-swapping bugs. > > Changes since v13: > - Patch 2: Added a NULL check for env before calling > perf_env__kernel_is_64_bit(env) to prevent potential segfaults if the > recorded environment has no headers. > - Patch 5: Fixed sample_size and id_pos going out of sync during > aslr_tool__strip_evlist() and aslr_tool__restore_evlist(). Instead of > using evsel__reset_sample_bit(), which was acting as a no-op due to > early bit clearing and corrupted sample_size, the tool now directly > updates sample_type and recomputes sample_size/id_pos dynamically. > Added orig_sample_size to aslr_evsel_priv to correctly restore the > state. > > Changes since v12: > - Patch 2: Fixed potential NULL pointer dereference in > remap_addresses__hash() when handling unmapped memory events (key->dso > is NULL) under REFCNT_CHECKING. > - Patch 2: Dynamically detect machine architecture bitness via > perf_env__kernel_is_64_bit() to select appropriate kernel_space_start > boundaries, avoiding 64-bit address injection on 32-bit platforms. > > Changes since v11: > - Patch 1: Fixed struct dso name accessor in maps.c by using > dso__name() instead of ->name. > - Patch 2: Fixed hash function in aslr.c to hash the underlying > dso pointer using RC_CHK_ACCESS to support reference count checking. > > Changes since v10: > - Patch 1: Added explicit tracking array logic in maps__load_maps() > to correctly accumulate valid maps (skipping NULL entries after > failures) and safely return the exact populated count, resolving > out-of-bounds pointer iteration panics. > - Patch 3: Fixed endianness bug during cross-endian sample parsing > by passing evsel->needs_swap instead of false to __evsel__parse_sample > in aslr.c, ensuring correct 32-bit field byte unswapping for packed > fields. Refactored evsel__parse_sample to take a needs_swap argument > via __evsel__parse_sample. > - Patch 4: Fixed inject_aslr.sh exit code handling in trap functions > to capture and propagate the correct pipeline failure status code > instead of unconditionally returning success or failing the test. > > Changes since v9: > - Patch 1: Added `-ENOMEM` error check inside > `maps__find_symbol_by_name()` and return `NULL` early. Added map > sorting state invalidation on early return in `maps__load_maps()`. > - Patch 2: Fixed encapsulation by using `thread__maps()` and > `thread__pid()` accessors in `aslr_tool__findnew_mapping()`. Added > `pr_warning_once` warning when raw auxtrace data is dropped. > - Patch 3: Fixed encapsulation by using `thread__maps()` and > `thread__pid()` accessors in `aslr_tool__remap_address()`. Wrapped > `evsel__parse_sample()` to temporarily disable `needs_swap` to avoid > branch stack endianness corruption on cross-endian files. Fixed ISO > C90 warning for declaration-after-statement for `orig_needs_swap`. > - Patch 4: Fixed duplicate cleanup by explicitly removing trap > handlers (`trap - EXIT TERM INT`) inside the `cleanup()` function. > - Patch 5: Fixed heap corruption by adding size bounds checking before > writing to `sample_regs_user` and `sample_regs_intr` fields. Added > missing register mask clearing logic for the `itrace` synthesis path > of `perf_event__repipe_attr()`. > > Ian Rogers (5): > perf maps: Add maps__mutate_mapping > perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking > perf inject/aslr: Implement sample address remapping > perf aslr: Strip sample registers > perf test: Add inject ASLR test The sashiko reviews are at: https://sashiko.dev/#/patchset/20260611164122.3974068-1-irogers%40google.com To summarize: Patch 2: * TOCTOU if underlying event buffer mmaps change. Not an issue as rewriting a perf.data file while it is being read is out of scope. Patch 3: * Mapping addresses to 0 for unknown mappings is criticized but the proposed alternative doesn't hide ASLR. This will cluster things on address 0 but the fix is simply to ensure no MMAPs are missing. * Cross-endian issues, but as explained previously, these are out of scope. The clang build issue reported by James and disabling the kernel testing for ARM are both in the v20 series. So I think the patches are ready for review/merging. Thanks, Ian > tools/perf/builtin-inject.c | 81 +- > tools/perf/tests/shell/inject_aslr.sh | 533 ++++++++++ > tools/perf/util/Build | 1 + > tools/perf/util/aslr.c | 1406 +++++++++++++++++++++++++ > tools/perf/util/aslr.h | 44 + > tools/perf/util/evsel.c | 6 +- > tools/perf/util/evsel.h | 10 +- > tools/perf/util/machine.c | 32 +- > tools/perf/util/maps.c | 148 ++- > tools/perf/util/maps.h | 3 + > tools/perf/util/symbol-elf.c | 41 +- > tools/perf/util/symbol.c | 17 +- > 12 files changed, 2251 insertions(+), 71 deletions(-) > create mode 100755 tools/perf/tests/shell/inject_aslr.sh > create mode 100644 tools/perf/util/aslr.c > create mode 100644 tools/perf/util/aslr.h > > -- > 2.54.0.1099.g489fc7bff1-goog > ^ permalink raw reply [flat|nested] 183+ messages in thread
end of thread, other threads:[~2026-06-11 18:29 UTC | newest] Thread overview: 183+ messages (download: mbox.gz follow: Atom feed -- links below jump to the message on this page -- 2026-04-24 22:05 [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-04-24 22:05 ` [PATCH v1 2/2] perf test: Add inject ASLR test Ian Rogers 2026-04-24 22:47 ` sashiko-bot 2026-04-24 22:36 ` [PATCH v1 1/2] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses sashiko-bot 2026-04-25 2:05 ` [PATCH v2 " Ian Rogers 2026-04-25 2:05 ` [PATCH v2 2/2] perf test: Add inject ASLR test Ian Rogers 2026-05-04 3:51 ` [PATCH v3 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-04 3:51 ` [PATCH v3 1/4] perf sched: Add missing mmap2 handler in timehist Ian Rogers 2026-05-04 3:51 ` [PATCH v3 2/4] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers 2026-05-04 3:51 ` [PATCH v3 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-04 4:51 ` sashiko-bot 2026-05-04 3:51 ` [PATCH v3 4/4] perf test: Add inject ASLR test Ian Rogers 2026-05-04 5:02 ` sashiko-bot 2026-05-04 7:29 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-04 7:29 ` [PATCH v4 1/4] perf sched: Add missing mmap2 handler in timehist Ian Rogers 2026-05-04 7:29 ` [PATCH v4 2/4] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers 2026-05-04 7:29 ` [PATCH v4 3/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-04 8:39 ` sashiko-bot 2026-05-04 7:29 ` [PATCH v4 4/4] perf test: Add inject ASLR test Ian Rogers 2026-05-04 8:48 ` sashiko-bot 2026-05-04 8:23 ` [PATCH v4 0/4] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-06 0:45 ` [PATCH v5 0/5] " Ian Rogers 2026-05-06 0:45 ` [PATCH v5 1/5] perf sched: Add missing mmap2 handler in timehist Ian Rogers 2026-05-06 13:22 ` Arnaldo Carvalho de Melo 2026-05-06 16:16 ` Ian Rogers 2026-05-06 0:45 ` [PATCH v5 2/5] perf tool: Fix missing schedstat delegates and dont_split_sample_group in delegate_tool Ian Rogers 2026-05-06 0:45 ` [PATCH v5 3/5] perf symbols: Fix map removal sequence inside dso__process_kernel_symbol() Ian Rogers 2026-05-06 1:45 ` sashiko-bot 2026-05-06 0:45 ` [PATCH v5 4/5] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-06 2:40 ` sashiko-bot 2026-05-06 18:52 ` Namhyung Kim 2026-05-06 20:01 ` Ian Rogers 2026-05-06 0:45 ` [PATCH v5 5/5] perf test: Add inject ASLR test Ian Rogers 2026-05-07 15:58 ` James Clark 2026-05-07 16:17 ` Ian Rogers 2026-05-08 10:42 ` James Clark 2026-05-08 10:49 ` James Clark 2026-05-08 8:27 ` [PATCH v6 0/6] perf tools: Add inject --aslr feature and prerequisite robustness fixes Ian Rogers 2026-05-08 8:27 ` [PATCH v6 1/6] perf sched: Add missing mmap2 handler in timehist Ian Rogers 2026-05-08 8:27 ` [PATCH v6 2/6] perf tool: Missing delegate_tool schedstat delegates and dont_split_sample_group Ian Rogers 2026-05-08 8:27 ` [PATCH v6 3/6] perf maps: Add maps__mutate_mapping Ian Rogers 2026-05-08 10:57 ` James Clark 2026-05-08 20:37 ` sashiko-bot 2026-05-11 7:07 ` Namhyung Kim 2026-06-10 10:40 ` James Clark 2026-05-08 8:27 ` [PATCH v6 4/6] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-08 21:22 ` sashiko-bot 2026-05-11 7:32 ` Namhyung Kim 2026-05-08 8:27 ` [PATCH v6 5/6] perf test: Add inject ASLR test Ian Rogers 2026-05-08 13:29 ` James Clark 2026-05-08 14:29 ` James Clark 2026-05-11 7:34 ` Namhyung Kim 2026-05-08 8:27 ` [PATCH v6 6/6] perf aslr: Strip sample registers Ian Rogers 2026-05-08 21:49 ` sashiko-bot 2026-05-19 8:08 ` [PATCH v7 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-05-19 8:08 ` [PATCH v7 1/4] perf maps: Add maps__mutate_mapping Ian Rogers 2026-05-19 8:38 ` sashiko-bot 2026-05-19 8:08 ` [PATCH v7 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-19 9:14 ` sashiko-bot 2026-05-19 8:08 ` [PATCH v7 3/4] perf test: Add inject ASLR test Ian Rogers 2026-05-19 8:08 ` [PATCH v7 4/4] perf aslr: Strip sample registers Ian Rogers 2026-05-19 9:55 ` sashiko-bot 2026-05-20 6:30 ` [PATCH v8 0/4] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-05-20 6:30 ` [PATCH v8 1/4] perf maps: Add maps__mutate_mapping Ian Rogers 2026-05-20 7:06 ` sashiko-bot 2026-05-20 6:30 ` [PATCH v8 2/4] perf inject/aslr: Add aslr tool to remap/obfuscate virtual addresses Ian Rogers 2026-05-20 7:50 ` sashiko-bot 2026-05-23 14:44 ` kernel test robot 2026-05-20 6:30 ` [PATCH v8 3/4] perf test: Add inject ASLR test Ian Rogers 2026-05-20 8:02 ` sashiko-bot 2026-05-20 6:30 ` [PATCH v8 4/4] perf aslr: Strip sample registers Ian Rogers 2026-05-20 8:41 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-04 17:28 ` [PATCH v9 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-04 17:46 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-04 17:45 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-04 17:45 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 4/5] perf test: Add inject ASLR test Ian Rogers 2026-06-04 17:40 ` sashiko-bot 2026-06-04 17:28 ` [PATCH v9 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-04 17:45 ` sashiko-bot 2026-06-05 6:06 ` [PATCH v10 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 6:06 ` [PATCH v10 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 6:20 ` sashiko-bot 2026-06-05 6:06 ` [PATCH v10 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-05 6:06 ` [PATCH v10 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-05 6:30 ` sashiko-bot 2026-06-05 6:06 ` [PATCH v10 4/5] perf test: Add inject ASLR test Ian Rogers 2026-06-05 6:13 ` sashiko-bot 2026-06-05 6:06 ` [PATCH v10 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 18:52 ` [PATCH v11 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 18:52 ` [PATCH v11 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 19:06 ` sashiko-bot 2026-06-05 18:52 ` [PATCH v11 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-05 19:07 ` sashiko-bot 2026-06-05 18:52 ` [PATCH v11 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-05 18:52 ` [PATCH v11 4/5] perf test: Add inject ASLR test Ian Rogers 2026-06-05 18:52 ` [PATCH v11 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 19:24 ` [PATCH v12 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 19:24 ` [PATCH v12 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 19:24 ` [PATCH v12 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-05 19:38 ` sashiko-bot 2026-06-05 19:24 ` [PATCH v12 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-05 19:24 ` [PATCH v12 4/5] perf test: Add inject ASLR test Ian Rogers 2026-06-05 19:24 ` [PATCH v12 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 19:48 ` [PATCH v13 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 19:48 ` [PATCH v13 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 19:48 ` [PATCH v13 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-05 20:06 ` sashiko-bot 2026-06-05 19:48 ` [PATCH v13 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-05 19:48 ` [PATCH v13 4/5] perf test: Add inject ASLR test Ian Rogers 2026-06-05 19:48 ` [PATCH v13 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-05 20:04 ` sashiko-bot 2026-06-05 20:56 ` [PATCH v14 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-05 20:56 ` [PATCH v14 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-05 20:56 ` [PATCH v14 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-05 21:12 ` sashiko-bot 2026-06-05 20:56 ` [PATCH v14 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-05 23:17 ` sashiko-bot 2026-06-05 20:56 ` [PATCH v14 4/5] perf test: Add inject ASLR test Ian Rogers 2026-06-05 21:05 ` sashiko-bot 2026-06-05 20:56 ` [PATCH v14 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-06 7:21 ` [PATCH v15 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-06 7:21 ` [PATCH v15 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-06 7:21 ` [PATCH v15 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-06 7:38 ` sashiko-bot 2026-06-06 7:21 ` [PATCH v15 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-06 7:36 ` sashiko-bot 2026-06-06 7:21 ` [PATCH v15 4/5] perf test: Add inject ASLR test Ian Rogers 2026-06-06 7:31 ` sashiko-bot 2026-06-06 7:21 ` [PATCH v15 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-06 7:38 ` sashiko-bot 2026-06-06 15:14 ` [PATCH v16 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-06 15:14 ` [PATCH v16 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-06 15:14 ` [PATCH v16 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-06 15:31 ` sashiko-bot 2026-06-06 15:14 ` [PATCH v16 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-06 15:30 ` sashiko-bot 2026-06-06 15:14 ` [PATCH v16 4/5] perf test: Add inject ASLR test Ian Rogers 2026-06-06 15:14 ` [PATCH v16 5/5] perf aslr: Strip sample registers Ian Rogers 2026-06-06 15:33 ` sashiko-bot 2026-06-07 6:09 ` [PATCH v17 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-07 6:09 ` [PATCH v17 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-07 6:09 ` [PATCH v17 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-07 8:27 ` sashiko-bot 2026-06-07 6:09 ` [PATCH v17 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-07 6:27 ` sashiko-bot 2026-06-07 6:09 ` [PATCH v17 4/5] perf aslr: Strip sample registers Ian Rogers 2026-06-07 6:27 ` sashiko-bot 2026-06-07 6:09 ` [PATCH v17 5/5] perf test: Add inject ASLR test Ian Rogers 2026-06-07 6:18 ` sashiko-bot 2026-06-07 21:36 ` [PATCH v18 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-07 21:36 ` [PATCH v18 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-07 21:55 ` sashiko-bot 2026-06-07 21:36 ` [PATCH v18 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-07 21:52 ` sashiko-bot 2026-06-07 21:36 ` [PATCH v18 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-07 21:56 ` sashiko-bot 2026-06-07 21:36 ` [PATCH v18 4/5] perf aslr: Strip sample registers Ian Rogers 2026-06-07 21:52 ` sashiko-bot 2026-06-07 21:37 ` [PATCH v18 5/5] perf test: Add inject ASLR test Ian Rogers 2026-06-08 5:48 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-08 5:48 ` [PATCH v19 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-08 5:48 ` [PATCH v19 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-08 5:48 ` [PATCH v19 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-08 6:00 ` sashiko-bot 2026-06-08 5:48 ` [PATCH v19 4/5] perf aslr: Strip sample registers Ian Rogers 2026-06-08 5:48 ` [PATCH v19 5/5] perf test: Add inject ASLR test Ian Rogers 2026-06-10 13:26 ` James Clark 2026-06-10 16:15 ` Ian Rogers 2026-06-11 8:32 ` James Clark 2026-06-08 15:08 ` [PATCH v19 0/5] perf tools: Add inject --aslr feature, early maps loading, and decoupling fixes Ian Rogers 2026-06-11 16:41 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers 2026-06-11 16:41 ` [PATCH v20 1/5] perf maps: Add maps__mutate_mapping Ian Rogers 2026-06-11 16:41 ` [PATCH v20 2/5] perf inject/aslr: Add ASLR tool infrastructure and MMAP tracking Ian Rogers 2026-06-11 17:28 ` sashiko-bot 2026-06-11 16:41 ` [PATCH v20 3/5] perf inject/aslr: Implement sample address remapping Ian Rogers 2026-06-11 17:41 ` sashiko-bot 2026-06-11 16:41 ` [PATCH v20 4/5] perf aslr: Strip sample registers Ian Rogers 2026-06-11 16:41 ` [PATCH v20 5/5] perf test: Add inject ASLR test Ian Rogers 2026-06-11 18:29 ` [PATCH v20 0/5] perf tools: Add inject --aslr feature Ian Rogers
This is a public inbox, see mirroring instructions for how to clone and mirror all data and code used for this inbox