[PATCH v2 2/2] perf trace: Rewrite BPF code to pass the verifier

linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Howard Chu <howardchu95@gmail.com>
To: peterz@infradead.org
Cc: mingo@redhat.com, acme@kernel.org, namhyung@kernel.org,
	mark.rutland@arm.com, alexander.shishkin@linux.intel.com,
	jolsa@kernel.org, irogers@google.com, adrian.hunter@intel.com,
	kan.liang@linux.intel.com, linux-perf-users@vger.kernel.org,
	linux-kernel@vger.kernel.org, james.clark@linaro.org,
	alan.maguire@oracle.com, Howard Chu <howardchu95@gmail.com>
Subject: [PATCH v2 2/2] perf trace: Rewrite BPF code to pass the verifier
Date: Thu, 10 Oct 2024 19:14:02 -0700	[thread overview]
Message-ID: <20241011021403.4089793-3-howardchu95@gmail.com> (raw)
In-Reply-To: <20241011021403.4089793-1-howardchu95@gmail.com>

Rewrite the code to add more memory bound checking in order to pass the
BPF verifier, no logic is changed.

This rewrite is centered around two main ideas:

- Always use a variable instead of an expression in if block's condition,
  so BPF verifier keeps track of the correct register.
- Delay the check as late as possible, just before the BPF function
  call.

Things that can be done better still:

- Instead of allowing a theoretical maximum of a 6-argument augmentation
  payload, reduce the payload to a smaller fixed size.

Signed-off-by: Howard Chu <howardchu95@gmail.com>
---
 .../bpf_skel/augmented_raw_syscalls.bpf.c     | 122 ++++++++++--------
 1 file changed, 67 insertions(+), 55 deletions(-)

diff --git a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
index b2f17cca014b..9ae459faac4b 100644
--- a/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
+++ b/tools/perf/util/bpf_skel/augmented_raw_syscalls.bpf.c
@@ -157,9 +157,9 @@ static inline int augmented__output(void *ctx, struct augmented_args_payload *ar
 	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
 }
 
-static inline int augmented__beauty_output(void *ctx, void *data, int len)
+static inline int augmented__beauty_output(void *ctx, struct beauty_payload_enter *args, int len)
 {
-	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, data, len);
+	return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len);
 }
 
 static inline
@@ -277,25 +277,31 @@ int sys_enter_rename(struct syscall_enter_args *args)
 	struct augmented_args_payload *augmented_args = augmented_args_payload();
 	const void *oldpath_arg = (const void *)args->args[0],
 		   *newpath_arg = (const void *)args->args[1];
-	unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
+	unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len, aligned_size;
 
         if (augmented_args == NULL)
-                return 1; /* Failure: don't filter */
+		goto failure;
 
 	len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
 
 	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
-	augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
-	len += augmented_args->arg.size;
+	aligned_size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+	augmented_args->arg.size = aligned_size;
+	len += aligned_size;
+
+	/* Every read from userspace is limited to value size */
+	if (aligned_size > sizeof(augmented_args->arg.value))
+		goto failure;
 
-	struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+	struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + aligned_size;
 
 	newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
 	arg2->size = newpath_len;
-
 	len += newpath_len;
 
 	return augmented__output(args, augmented_args, len);
+failure:
+	return 1; /* Failure: don't filter */
 }
 
 SEC("tp/syscalls/sys_enter_renameat2")
@@ -304,25 +310,31 @@ int sys_enter_renameat2(struct syscall_enter_args *args)
 	struct augmented_args_payload *augmented_args = augmented_args_payload();
 	const void *oldpath_arg = (const void *)args->args[1],
 		   *newpath_arg = (const void *)args->args[3];
-	unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len;
+	unsigned int len = sizeof(augmented_args->args), oldpath_len, newpath_len, aligned_size;
 
         if (augmented_args == NULL)
-                return 1; /* Failure: don't filter */
+		goto failure;
 
 	len += 2 * sizeof(u64); // The overhead of size and err, just before the payload...
 
 	oldpath_len = augmented_arg__read_str(&augmented_args->arg, oldpath_arg, sizeof(augmented_args->arg.value));
-	augmented_args->arg.size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
-	len += augmented_args->arg.size;
+	aligned_size = PERF_ALIGN(oldpath_len + 1, sizeof(u64));
+	augmented_args->arg.size = aligned_size;
+	len += aligned_size;
 
-	struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + augmented_args->arg.size;
+	/* Every read from userspace is limited to value size */
+	if (aligned_size > sizeof(augmented_args->arg.value))
+		goto failure;
+
+	struct augmented_arg *arg2 = (void *)&augmented_args->arg.value + aligned_size;
 
 	newpath_len = augmented_arg__read_str(arg2, newpath_arg, sizeof(augmented_args->arg.value));
 	arg2->size = newpath_len;
-
 	len += newpath_len;
 
 	return augmented__output(args, augmented_args, len);
+failure:
+	return 1; /* Failure: don't filter */
 }
 
 #define PERF_ATTR_SIZE_VER0     64      /* sizeof first published struct */
@@ -422,12 +434,12 @@ static bool pid_filter__has(struct pids_filtered *pids, pid_t pid)
 
 static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
 {
-	bool augmented, do_output = false;
-	int zero = 0, size, aug_size, index, output = 0,
-	    value_size = sizeof(struct augmented_arg) - offsetof(struct augmented_arg, value);
+	bool do_augment = false;
+	int zero = 0, value_size = sizeof(struct augmented_arg) - sizeof(u64);
 	unsigned int nr, *beauty_map;
 	struct beauty_payload_enter *payload;
-	void *arg, *payload_offset;
+	void *payload_offset, *value_offset;
+	u64 len = 0; /* has to be u64, otherwise it won't pass the verifier */
 
 	/* fall back to do predefined tail call */
 	if (args == NULL)
@@ -436,16 +448,18 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
 	/* use syscall number to get beauty_map entry */
 	nr             = (__u32)args->syscall_nr;
 	beauty_map     = bpf_map_lookup_elem(&beauty_map_enter, &nr);
+	if (beauty_map == NULL)
+		return 1;
 
 	/* set up payload for output */
 	payload        = bpf_map_lookup_elem(&beauty_payload_enter_map, &zero);
 	payload_offset = (void *)&payload->aug_args;
-
-	if (beauty_map == NULL || payload == NULL)
+	if (payload == NULL)
 		return 1;
 
 	/* copy the sys_enter header, which has the syscall_nr */
 	__builtin_memcpy(&payload->args, args, sizeof(struct syscall_enter_args));
+	len += sizeof(struct syscall_enter_args);
 
 	/*
 	 * Determine what type of argument and how many bytes to read from user space, using the
@@ -457,52 +471,50 @@ static int augment_sys_enter(void *ctx, struct syscall_enter_args *args)
 	 * buffer: -1 * (index of paired len) -> value of paired len (maximum: TRACE_AUG_MAX_BUF)
 	 */
 	for (int i = 0; i < 6; i++) {
-		arg = (void *)args->args[i];
-		augmented = false;
-		size = beauty_map[i];
-		aug_size = size; /* size of the augmented data read from user space */
+		int augment_size = beauty_map[i];
+		unsigned int augment_size_with_header;
+		void *addr = (void *)args->args[i];
+		bool is_augmented = false;
 
-		if (size == 0 || arg == NULL)
+		if (augment_size == 0 || addr == NULL)
 			continue;
 
-		if (size == 1) { /* string */
-			aug_size = bpf_probe_read_user_str(((struct augmented_arg *)payload_offset)->value, value_size, arg);
-			/* minimum of 0 to pass the verifier */
-			if (aug_size < 0)
-				aug_size = 0;
-
-			augmented = true;
-		} else if (size > 0 && size <= value_size) { /* struct */
-			if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, size, arg))
-				augmented = true;
-		} else if (size < 0 && size >= -6) { /* buffer */
-			index = -(size + 1);
-			aug_size = args->args[index];
-
-			if (aug_size > TRACE_AUG_MAX_BUF)
-				aug_size = TRACE_AUG_MAX_BUF;
-
-			if (aug_size > 0) {
-				if (!bpf_probe_read_user(((struct augmented_arg *)payload_offset)->value, aug_size, arg))
-					augmented = true;
-			}
+		value_offset = ((struct augmented_arg *)payload_offset)->value;
+
+		if (augment_size == 1) { /* string */
+			augment_size = bpf_probe_read_user_str(value_offset, value_size, addr);
+			is_augmented = true;
+		} else if (augment_size > 1 && augment_size <= value_size) { /* struct */
+			if (!bpf_probe_read_user(value_offset, value_size, addr))
+				is_augmented = true;
+		} else if (augment_size < 0 && augment_size >= -6) { /* buffer */
+			int index = -(augment_size + 1);
+
+			augment_size = args->args[index] > TRACE_AUG_MAX_BUF ? TRACE_AUG_MAX_BUF : args->args[index];
+			if (!bpf_probe_read_user(value_offset, augment_size, addr))
+				is_augmented = true;
 		}
 
-		/* write data to payload */
-		if (augmented) {
-			int written = offsetof(struct augmented_arg, value) + aug_size;
+		/* Augmented data size is limited to value size */
+		if (augment_size > value_size)
+			augment_size = value_size;
+
+		/* Explicitly define this variable to pass the verifier */
+		augment_size_with_header = sizeof(u64) + augment_size;
 
-			((struct augmented_arg *)payload_offset)->size = aug_size;
-			output += written;
-			payload_offset += written;
-			do_output = true;
+		/* Write data to payload */
+		if (is_augmented && augment_size_with_header <= sizeof(struct augmented_arg)) {
+			((struct augmented_arg *)payload_offset)->size = augment_size;
+			do_augment      = true;
+			len            += augment_size_with_header;
+			payload_offset += augment_size_with_header;
 		}
 	}
 
-	if (!do_output)
+	if (!do_augment || len > sizeof(struct beauty_payload_enter))
 		return 1;
 
-	return augmented__beauty_output(ctx, payload, sizeof(struct syscall_enter_args) + output);
+	return augmented__beauty_output(ctx, payload, len);
 }
 
 SEC("tp/raw_syscalls/sys_enter")
-- 
2.43.0

next prev parent reply	other threads:[~2024-10-11  2:14 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-10-11  2:14 [PATCH v2 0/2] perf trace: Fix support for the new BPF feature in clang 12 Howard Chu
2024-10-11  2:14 ` [PATCH v2 1/2] perf build: Change the clang check back to 12.0.1 Howard Chu
2024-10-11  2:14 ` Howard Chu [this message]
2024-10-11  8:18 ` [PATCH v2 0/2] perf trace: Fix support for the new BPF feature in clang 12 James Clark
2024-10-15 18:32 ` Namhyung Kim
2024-10-15 19:35   ` Arnaldo Carvalho de Melo
2024-10-15 19:58     ` Arnaldo Carvalho de Melo
2024-10-15 20:37       ` Arnaldo Carvalho de Melo
2024-10-15 21:37         ` Song Liu
     [not found]           ` <CA+JHD905Xtbb2OYqm3mGbh3C1dKOd-avnC=01=uJfTVEnmA1zQ@mail.gmail.com>
2024-10-15 23:20             ` Song Liu
2024-10-16  2:06         ` Namhyung Kim
2024-10-16 14:22           ` Arnaldo Carvalho de Melo
2024-10-22 17:04             ` Namhyung Kim
2024-10-22 18:33               ` Arnaldo Carvalho de Melo
2024-10-22 23:52                 ` Namhyung Kim
2024-10-23 14:39                   ` Arnaldo Carvalho de Melo
2024-10-23 21:57                     ` Namhyung Kim

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:b2f17cca014 dfblob:9ae459faac4 )
 OR (
bs:"[PATCH v2 2/2] perf trace: Rewrite BPF code to pass the verifier" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20241011021403.4089793-3-howardchu95@gmail.com \
    --to=howardchu95@gmail.com \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alan.maguire@oracle.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=irogers@google.com \
    --cc=james.clark@linaro.org \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).