From: Howard Chu <howardchu95@gmail.com>
To: peterz@infradead.org
Cc: mingo@redhat.com, acme@kernel.org, namhyung@kernel.org,
mark.rutland@arm.com, alexander.shishkin@linux.intel.com,
jolsa@kernel.org, irogers@google.com, adrian.hunter@intel.com,
kan.liang@linux.intel.com, linux-perf-users@vger.kernel.org,
linux-kernel@vger.kernel.org, james.clark@linaro.org,
alan.maguire@oracle.com, Howard Chu <howardchu95@gmail.com>
Subject: [PATCH v2 2/2] perf trace: Rewrite BPF code to pass the verifier
Date: Thu, 10 Oct 2024 19:14:02 -0700 [thread overview]
Message-ID: <20241011021403.4089793-3-howardchu95@gmail.com> (raw)
In-Reply-To: <20241011021403.4089793-1-howardchu95@gmail.com>
Rewrite the code to add more memory bound checking in order to pass the
BPF verifier, no logic is changed.
This rewrite is centered around two main ideas:
- Always use a variable instead of an expression in if block's condition,
so BPF verifier keeps track of the correct register.
- Delay the check as late as possible, just before the BPF function
call.
Things that can be done better still:
- Instead of allowing a theoretical maximum of a 6-argument augmentation
payload, reduce the payload to a smaller fixed size.
Signed-off-by: Howard Chu <howardchu95@gmail.com>
---
.../bpf_skel/augmented_raw_syscalls.bpf.c | 122 ++++++++++--------
1 file changed, 67 insertions(+), 55 deletions(-)
diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index b2f17cca014b..9ae459faac4b 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -157,9 +157,9 @@ static inline int augmented__output(void *ctx, struct augmented_args_payload *ar
return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
}
-static inline int augmented__beauty_output(void *ctx, void *data, int len)
+static inline int augmented__beauty_output(void *ctx, struct beauty_payload_enter *args, int len)
{
- return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len);
+ return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
}
static inline
@@ -277,25 +277,31 @@ int sys_enter_rename(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *oldpath_arg = (const void *)args->args[0],
*newpath_arg = (const void *)args->args[1];
- unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
+ unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len, aligned_size;
if (augmented_args == NULL)
- return 1; /* Failure: don't filter */
+ goto failure;
len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
- augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
- len += augmented_args->arg.size;
+ aligned_size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+ augmented_args->arg.size = aligned_size;
+ len += aligned_size;
+
+ /* Every read from userspace is limited to value size */
+ if (aligned_size > sizeof(augmented_args->arg.value))
+ goto failure;
- struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+ struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + aligned_size;
newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
arg2->size = newpath_len;
-
len += newpath_len;
return augmented__output(args, augmented_args, len);
+failure:
+ return 1; /* Failure: don't filter */
}
SEC("tp/syscalls/sys_enter_renameat2")
@@ -304,25 +310,31 @@ int sys_enter_renameat2(struct syscall_enter_args *args)
struct augmented_args_payload *augmented_args = augmented_args_payload();
const void *oldpath_arg = (const void *)args->args[1],
*newpath_arg = (const void *)args->args[3];
- unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
+ unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len, aligned_size;
if (augmented_args == NULL)
- return 1; /* Failure: don't filter */
+ goto failure;
len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
- augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
- len += augmented_args->arg.size;
+ aligned_size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+ augmented_args->arg.size = aligned_size;
+ len += aligned_size;
- struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+ /* Every read from userspace is limited to value size */
+ if (aligned_size > sizeof(augmented_args->arg.value))
+ goto failure;
+
+ struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + aligned_size;
newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
arg2->size = newpath_len;
-
len += newpath_len;
return augmented__output(args, augmented_args, len);
+failure:
+ return 1; /* Failure: don't filter */
}
#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */
@@ -422,12 +434,12 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
{
- bool augmented, do_output = false;
- int zero = 0, size, aug_size, index, output = 0,
- value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+ bool do_augment = false;
+ int zero = 0, value_size = sizeof(struct augmented_arg) - sizeof(u64);
unsigned int nr, *beauty_map;
struct beauty_payload_enter *payload;
- void *arg, *payload_offset;
+ void *payload_offset, *value_offset;
+ u64 len = 0; /* has to be u64, otherwise it won't pass the verifier */
/* fall back to do predefined tail call */
if (args == NULL)
@@ -436,16 +448,18 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
/* use syscall number to get beauty_map entry */
nr = (__u32)args->syscall_nr;
beauty_map = bpf_map_lookup_elem(&beauty_map_enter, &nr);
+ if (beauty_map == NULL)
+ return 1;
/* set up payload for output */
payload = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero);
payload_offset = (void *)&payload->aug_args;
-
- if (beauty_map == NULL || payload == NULL)
+ if (payload == NULL)
return 1;
/* copy the sys_enter header, which has the syscall_nr */
__builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args));
+ len += sizeof(struct syscall_enter_args);
/*
* Determine what type of argument and how many bytes to read from user space, using the
@@ -457,52 +471,50 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
* buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF)
*/
for (int i = 0; i < 6; i++) {
- arg = (void *)args->args[i];
- augmented = false;
- size = beauty_map[i];
- aug_size = size; /* size of the augmented data read from user space */
+ int augment_size = beauty_map[i];
+ unsigned int augment_size_with_header;
+ void *addr = (void *)args->args[i];
+ bool is_augmented = false;
- if (size == 0 || arg == NULL)
+ if (augment_size == 0 || addr == NULL)
continue;
- if (size == 1) { /* string */
- aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg);
- /* minimum of 0 to pass the verifier */
- if (aug_size < 0)
- aug_size = 0;
-
- augmented = true;
- } else if (size > 0 && size <= value_size) { /* struct */
- if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
- augmented = true;
- } else if (size < 0 && size >= -6) { /* buffer */
- index = -(size + 1);
- aug_size = args->args[index];
-
- if (aug_size > TRACE_AUG_MAX_BUF)
- aug_size = TRACE_AUG_MAX_BUF;
-
- if (aug_size > 0) {
- if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
- augmented = true;
- }
+ value_offset = ((struct augmented_arg *)payload_offset)->value;
+
+ if (augment_size == 1) { /* string */
+ augment_size = bpf_probe_read_user_str(value_offset, value_size, addr);
+ is_augmented = true;
+ } else if (augment_size > 1 && augment_size <= value_size) { /* struct */
+ if (!bpf_probe_read_user(value_offset, value_size, addr))
+ is_augmented = true;
+ } else if (augment_size < 0 && augment_size >= -6) { /* buffer */
+ int index = -(augment_size + 1);
+
+ augment_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
+ if (!bpf_probe_read_user(value_offset, augment_size, addr))
+ is_augmented = true;
}
- /* write data to payload */
- if (augmented) {
- int written = offsetof(struct augmented_arg, value) + aug_size;
+ /* Augmented data size is limited to value size */
+ if (augment_size > value_size)
+ augment_size = value_size;
+
+ /* Explicitly define this variable to pass the verifier */
+ augment_size_with_header = sizeof(u64) + augment_size;
- ((struct augmented_arg *)payload_offset)->size = aug_size;
- output += written;
- payload_offset += written;
- do_output = true;
+ /* Write data to payload */
+ if (is_augmented && augment_size_with_header <= sizeof(struct augmented_arg)) {
+ ((struct augmented_arg *)payload_offset)->size = augment_size;
+ do_augment = true;
+ len += augment_size_with_header;
+ payload_offset += augment_size_with_header;
}
}
- if (!do_output)
+ if (!do_augment || len > sizeof(struct beauty_payload_enter))
return 1;
- return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
+ return augmented__beauty_output(ctx, payload, len);
}
SEC("tp/raw_syscalls/sys_enter")
--
2.43.0
next prev parent reply other threads:[~2024-10-11 2:14 UTC|newest]
Thread overview: 17+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-10-11 2:14 [PATCH v2 0/2] perf trace: Fix support for the new BPF feature in clang 12 Howard Chu
2024-10-11 2:14 ` [PATCH v2 1/2] perf build: Change the clang check back to 12.0.1 Howard Chu
2024-10-11 2:14 ` Howard Chu [this message]
2024-10-11 8:18 ` [PATCH v2 0/2] perf trace: Fix support for the new BPF feature in clang 12 James Clark
2024-10-15 18:32 ` Namhyung Kim
2024-10-15 19:35 ` Arnaldo Carvalho de Melo
2024-10-15 19:58 ` Arnaldo Carvalho de Melo
2024-10-15 20:37 ` Arnaldo Carvalho de Melo
2024-10-15 21:37 ` Song Liu
[not found] ` <CA+JHD905Xtbb2OYqm3mGbh3C1dKOd-avnC=01=uJfTVEnmA1zQ@mail.gmail.com>
2024-10-15 23:20 ` Song Liu
2024-10-16 2:06 ` Namhyung Kim
2024-10-16 14:22 ` Arnaldo Carvalho de Melo
2024-10-22 17:04 ` Namhyung Kim
2024-10-22 18:33 ` Arnaldo Carvalho de Melo
2024-10-22 23:52 ` Namhyung Kim
2024-10-23 14:39 ` Arnaldo Carvalho de Melo
2024-10-23 21:57 ` Namhyung Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20241011021403.4089793-3-howardchu95@gmail.com \
--to=howardchu95@gmail.com \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=alan.maguire@oracle.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=irogers@google.com \
--cc=james.clark@linaro.org \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=namhyung@kernel.org \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).