* [tip:perf/core] perf augmented_raw_syscalls: Use a PERCPU_ARRAY map to copy more string bytes
@ 2019-04-05 11:35 tip-bot for Arnaldo Carvalho de Melo
0 siblings, 0 replies; only message in thread
From: tip-bot for Arnaldo Carvalho de Melo @ 2019-04-05 11:35 UTC (permalink / raw)
To: linux-tip-commits
Cc: lclaudio, yhs, brouer, namhyung, acme, linux-kernel, mingo,
borkmann, g.borello, kafai, tglx, andriin, adrian.hunter, hpa,
jolsa, wangnan0
Commit-ID: 59f3bd7802d3ff7e6ddcce600f361bed288a97dd
Gitweb: https://git.kernel.org/tip/59f3bd7802d3ff7e6ddcce600f361bed288a97dd
Author: Arnaldo Carvalho de Melo <acme@redhat.com>
AuthorDate: Fri, 22 Mar 2019 14:48:26 -0300
Committer: Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 1 Apr 2019 14:49:24 -0300
perf augmented_raw_syscalls: Use a PERCPU_ARRAY map to copy more string bytes
The previous method, copying to the BPF stack limited us in how many
bytes we could copy from strings, use a PERCPU_ARRAY map like devised by
the sysdig guys[1] to copy more bytes:
Before:
# trace --no-inherit -e openat touch `python -c "print "$s" 'a' * 2000"`
touch: cannot touch 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa': File name too!
long
openat(AT_FDCWD, "/etc/ld.so.cache", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/lib64/libc.so.6", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/lib/locale/locale-archive", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", O_CREAT|O_NOCTTY|O_NONBLOCK|O_WRONLY, S_IRUGO|S_IWUGO) = -1 ENAMETOOLONG (File name too long)
openat(AT_FDCWD, "/usr/share/locale/locale.alias", O_RDONLY|O_CLOEXEC) = 3
openat(AT_FDCWD, "/usr/share/locale/en_US.UTF-8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
openat(AT_FDCWD, "/usr/share/locale/en_US.utf8/LC_MESSAGES/coreutils.mo", O_RDONLY) = -1 ENOENT (No such file or directory)
<SNIP some openat calls>
#
After:
[root@quaco acme]# trace --no-inherit -e openat touch `python -c "print "$s" 'a' * 2000"`
<STRIP what is the same as in the 'before' part>
openat(AT_FDCWD, "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa!
aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa", O_CREAT|O_NOCTTY!
|O_NONBLOC) = -1 ENAMETOOLONG (File name too long)
<STRIP what is the same as in the 'before' part>
If we leave something like 'perf trace -e string' to trace all syscalls
with a string, and then do some 'perf top', to get some annotation for
the augmented_raw_syscalls.o BPF program we get:
│ → callq *ffffffffc45576d1 ▒
│ augmented_args->filename.size = probe_read_str(&augmented_args->filename.value, ▒
0.05 │ mov %eax,0x40(%r13)
Looking with pahole, expanding types, asking for hex offsets and sizes,
and use of BTF type information to see what is at that 0x40 offset from
%r13:
# pahole -F btf -C augmented_args_filename --expand_types --hex /home/acme/git/perf/tools/perf/examples/bpf/augmented_raw_syscalls.o
struct augmented_args_filename {
struct syscall_enter_args {
long long unsigned int common_tp_fields; /* 0 0x8 */
long int syscall_nr; /* 0x8 0x8 */
long unsigned int args[6]; /* 0x10 0x30 */
} args; /* 0 0x40 */
/* --- cacheline 1 boundary (64 bytes) --- */
struct augmented_filename {
unsigned int size; /* 0x40 0x4 */
int reserved; /* 0x44 0x4 */
char value[4096]; /* 0x48 0x1000 */
} filename; /* 0x40 0x1008 */
/* size: 4168, cachelines: 66, members: 2 */
/* last cacheline: 8 bytes */
};
#
Then looking if PATH_MAX leaves some signature in the tests:
│ if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) { ▒
│ cmp $0xfff,%rdi
0xfff == 4095
sizeof(augmented_args->filename.value) == PATH_MAX == 4096
[1] https://sysdig.com/blog/the-art-of-writing-ebpf-programs-a-primer/
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andrii Nakryiko <andriin@fb.com>
Cc: Daniel Borkmann <borkmann@iogearbox.net>
Cc: Gianluca Borello <g.borello@gmail.com>
Cc: Jesper Dangaard Brouer <brouer@redhat.com>
Cc: Jiri Olsa <jolsa@kernel.org>
Cc: Luis Cláudio Gonçalves <lclaudio@redhat.com>
cc: Martin Lau <kafai@fb.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Wang Nan <wangnan0@huawei.com>
Cc: Yonghong Song <yhs@fb.com>
Link: https://lkml.kernel.org/n/tip-76gce2d2ghzq537ubwhjkone@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/examples/bpf/augmented_raw_syscalls.c | 46 ++++++++++++++----------
1 file changed, 28 insertions(+), 18 deletions(-)
diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c
index 9f8b31ad7a49..2422894a8194 100644
--- a/tools/perf/examples/bpf/augmented_raw_syscalls.c
+++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c
@@ -15,6 +15,7 @@
*/
#include <unistd.h>
+#include <linux/limits.h>
#include <pid_filter.h>
/* bpf-output associated map */
@@ -41,7 +42,7 @@ struct syscall_exit_args {
struct augmented_filename {
unsigned int size;
int reserved;
- char value[256];
+ char value[PATH_MAX];
};
/* syscalls where the first arg is a string */
@@ -119,23 +120,32 @@ struct augmented_filename {
pid_filter(pids_filtered);
+struct augmented_args_filename {
+ struct syscall_enter_args args;
+ struct augmented_filename filename;
+};
+
+bpf_map(augmented_filename_map, PERCPU_ARRAY, int, struct augmented_args_filename, 1);
+
SEC("raw_syscalls:sys_enter")
int sys_enter(struct syscall_enter_args *args)
{
- struct {
- struct syscall_enter_args args;
- struct augmented_filename filename;
- } augmented_args;
- struct syscall *syscall;
- unsigned int len = sizeof(augmented_args);
+ struct augmented_args_filename *augmented_args;
+ unsigned int len = sizeof(*augmented_args);
const void *filename_arg = NULL;
+ struct syscall *syscall;
+ int key = 0;
+
+ augmented_args = bpf_map_lookup_elem(&augmented_filename_map, &key);
+ if (augmented_args == NULL)
+ return 1;
if (pid_filter__has(&pids_filtered, getpid()))
return 0;
- probe_read(&augmented_args.args, sizeof(augmented_args.args), args);
+ probe_read(&augmented_args->args, sizeof(augmented_args->args), args);
- syscall = bpf_map_lookup_elem(&syscalls, &augmented_args.args.syscall_nr);
+ syscall = bpf_map_lookup_elem(&syscalls, &augmented_args->args.syscall_nr);
if (syscall == NULL || !syscall->enabled)
return 0;
/*
@@ -191,7 +201,7 @@ int sys_enter(struct syscall_enter_args *args)
* processor architecture, making the kernel part the same no matter what
* kernel version or processor architecture it runs on.
*/
- switch (augmented_args.args.syscall_nr) {
+ switch (augmented_args->args.syscall_nr) {
case SYS_ACCT:
case SYS_ADD_KEY:
case SYS_CHDIR:
@@ -263,20 +273,20 @@ int sys_enter(struct syscall_enter_args *args)
}
if (filename_arg != NULL) {
- augmented_args.filename.reserved = 0;
- augmented_args.filename.size = probe_read_str(&augmented_args.filename.value,
- sizeof(augmented_args.filename.value),
+ augmented_args->filename.reserved = 0;
+ augmented_args->filename.size = probe_read_str(&augmented_args->filename.value,
+ sizeof(augmented_args->filename.value),
filename_arg);
- if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) {
- len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size;
- len &= sizeof(augmented_args.filename.value) - 1;
+ if (augmented_args->filename.size < sizeof(augmented_args->filename.value)) {
+ len -= sizeof(augmented_args->filename.value) - augmented_args->filename.size;
+ len &= sizeof(augmented_args->filename.value) - 1;
}
} else {
- len = sizeof(augmented_args.args);
+ len = sizeof(augmented_args->args);
}
/* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */
- return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, &augmented_args, len);
+ return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, augmented_args, len);
}
SEC("raw_syscalls:sys_exit")
^ permalink raw reply related [flat|nested] only message in thread
only message in thread, other threads:[~2019-04-05 11:36 UTC | newest]
Thread overview: (only message) (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2019-04-05 11:35 [tip:perf/core] perf augmented_raw_syscalls: Use a PERCPU_ARRAY map to copy more string bytes tip-bot for Arnaldo Carvalho de Melo
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.