* [PATCH v5] bcc: Try use new API to create [k,u]probe with perf_event_open
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
@ 2017-12-06 22:45 ` Song Liu
2017-12-06 22:45 ` [PATCH v5] perf_event_open.2: add type kprobe and uprobe Song Liu
` (9 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2017-12-06 22:45 UTC (permalink / raw)
To: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel
Cc: kernel-team, Song Liu
New kernel API allows creating [k,u]probe with perf_event_open.
This patch tries to use the new API. If the new API doesn't work,
we fall back to old API.
bpf_detach_probe() looks up the event being removed. If the event
is not found, we skip the clean up procedure.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
src/cc/libbpf.c | 264 +++++++++++++++++++++++++++++++++++++++++---------------
1 file changed, 196 insertions(+), 68 deletions(-)
diff --git a/src/cc/libbpf.c b/src/cc/libbpf.c
index ef6daf3..1ac685f 100644
--- a/src/cc/libbpf.c
+++ b/src/cc/libbpf.c
@@ -526,38 +526,113 @@ int bpf_attach_socket(int sock, int prog) {
return setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog, sizeof(prog));
}
-static int bpf_attach_tracing_event(int progfd, const char *event_path,
- struct perf_reader *reader, int pid, int cpu, int group_fd) {
- int efd, pfd;
- ssize_t bytes;
- char buf[256];
- struct perf_event_attr attr = {};
+#define PMU_TYPE_FILE "/sys/bus/event_source/devices/%s/type"
+static int bpf_find_probe_type(const char *event_type)
+{
+ int fd;
+ int ret;
+ char buf[64];
- snprintf(buf, sizeof(buf), "%s/id", event_path);
- efd = open(buf, O_RDONLY, 0);
- if (efd < 0) {
- fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ snprintf(buf, sizeof(buf), PMU_TYPE_FILE, event_type);
+
+ fd = open(buf, O_RDONLY);
+ if (fd < 0)
return -1;
- }
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (ret < 0 || ret >= sizeof(buf))
+ return -1;
+ ret = (int)strtol(buf, NULL, 10);
+ return errno ? -1 : ret;
+}
- bytes = read(efd, buf, sizeof(buf));
- if (bytes <= 0 || bytes >= sizeof(buf)) {
- fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
- close(efd);
+#define PMU_RETPROBE_FILE "/sys/bus/event_source/devices/%s/format/retprobe"
+static int bpf_get_retprobe_bit(const char *event_type)
+{
+ int fd;
+ int ret;
+ char buf[64];
+
+ snprintf(buf, sizeof(buf), PMU_RETPROBE_FILE, event_type);
+ fd = open(buf, O_RDONLY);
+ if (fd < 0)
+ return -1;
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (ret < 0 || ret >= sizeof(buf))
+ return -1;
+ if (strlen(buf) < strlen("config:"))
+ return -1;
+ ret = (int)strtol(buf + strlen("config:"), NULL, 10);
+ return errno ? -1 : ret;
+}
+
+/*
+ * new kernel API allows creating [k,u]probe with perf_event_open, which
+ * makes it easier to clean up the [k,u]probe. This function tries to
+ * create pfd with the new API.
+ */
+static int bpf_try_perf_event_open_with_probe(const char *name, uint64_t offs,
+ int pid, int cpu, int group_fd, char *event_type, int is_return)
+{
+ struct perf_event_attr attr = {};
+ int type = bpf_find_probe_type(event_type);
+ int is_return_bit = bpf_get_retprobe_bit(event_type);
+
+ if (type < 0 || is_return_bit < 0)
return -1;
- }
- close(efd);
- buf[bytes] = '\0';
- attr.config = strtol(buf, NULL, 0);
- attr.type = PERF_TYPE_TRACEPOINT;
attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
attr.sample_period = 1;
attr.wakeup_events = 1;
- pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
+ if (is_return)
+ attr.config |= 1 << is_return_bit;
+ attr.probe_offset = offs; /* for kprobe, if name is NULL, this the addr */
+ attr.size = sizeof(attr);
+ attr.type = type;
+ attr.kprobe_func = ptr_to_u64((void *)name); /* also work for uprobe_path */
+ return syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd,
+ PERF_FLAG_FD_CLOEXEC);
+}
+
+static int bpf_attach_tracing_event(int progfd, const char *event_path,
+ struct perf_reader *reader, int pid, int cpu, int group_fd, int pfd) {
+ int efd;
+ ssize_t bytes;
+ char buf[256];
+ struct perf_event_attr attr = {};
+
+ /*
+ * Only look up id and call perf_event_open when
+ * bpf_try_perf_event_open_with_probe() didn't returns valid pfd.
+ */
if (pfd < 0) {
- fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
- return -1;
+ snprintf(buf, sizeof(buf), "%s/id", event_path);
+ efd = open(buf, O_RDONLY, 0);
+ if (efd < 0) {
+ fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ return -1;
+ }
+
+ bytes = read(efd, buf, sizeof(buf));
+ if (bytes <= 0 || bytes >= sizeof(buf)) {
+ fprintf(stderr, "read(%s): %s\n", buf, strerror(errno));
+ close(efd);
+ return -1;
+ }
+ close(efd);
+ buf[bytes] = '\0';
+ attr.config = strtol(buf, NULL, 0);
+ attr.type = PERF_TYPE_TRACEPOINT;
+ attr.sample_type = PERF_SAMPLE_RAW | PERF_SAMPLE_CALLCHAIN;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+ pfd = syscall(__NR_perf_event_open, &attr, pid, cpu, group_fd, PERF_FLAG_FD_CLOEXEC);
+ if (pfd < 0) {
+ fprintf(stderr, "perf_event_open(%s/id): %s\n", event_path, strerror(errno));
+ return -1;
+ }
}
+
perf_reader_set_fd(reader, pfd);
if (perf_reader_mmap(reader, attr.type, attr.sample_type) < 0)
@@ -585,31 +660,39 @@ void * bpf_attach_kprobe(int progfd, enum bpf_probe_attach_type attach_type, con
char event_alias[128];
struct perf_reader *reader = NULL;
static char *event_type = "kprobe";
+ int pfd;
reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
if (!reader)
goto error;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
- kfd = open(buf, O_WRONLY | O_APPEND, 0);
- if (kfd < 0) {
- fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
- goto error;
- }
+ /* try use new API to create kprobe */
+ pfd = bpf_try_perf_event_open_with_probe(fn_name, 0, pid, cpu, group_fd,
+ event_type,
+ attach_type != BPF_PROBE_ENTRY);
- snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
- snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
- event_type, event_alias, fn_name);
- if (write(kfd, buf, strlen(buf)) < 0) {
- if (errno == EINVAL)
- fprintf(stderr, "check dmesg output for possible cause\n");
+ if (pfd < 0) {
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+ kfd = open(buf, O_WRONLY | O_APPEND, 0);
+ if (kfd < 0) {
+ fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ goto error;
+ }
+
+ snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
+ snprintf(buf, sizeof(buf), "%c:%ss/%s %s", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
+ event_type, event_alias, fn_name);
+ if (write(kfd, buf, strlen(buf)) < 0) {
+ if (errno == EINVAL)
+ fprintf(stderr, "check dmesg output for possible cause\n");
+ close(kfd);
+ goto error;
+ }
close(kfd);
- goto error;
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
}
- close(kfd);
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
- if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+ if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, pfd) < 0)
goto error;
return reader;
@@ -691,42 +774,50 @@ void * bpf_attach_uprobe(int progfd, enum bpf_probe_attach_type attach_type, con
struct perf_reader *reader = NULL;
static char *event_type = "uprobe";
int res, kfd = -1, ns_fd = -1;
+ int pfd = -1;
reader = perf_reader_new(cb, NULL, NULL, cb_cookie, probe_perf_reader_page_cnt);
if (!reader)
goto error;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
- kfd = open(buf, O_WRONLY | O_APPEND, 0);
- if (kfd < 0) {
- fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
- goto error;
- }
+ /* try use new API to create uprobe */
+ pfd = bpf_try_perf_event_open_with_probe(binary_path, offset, pid, cpu,
+ group_fd, event_type, attach_type != BPF_PROBE_ENTRY);
- res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
- if (res < 0 || res >= sizeof(event_alias)) {
- fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
- goto error;
- }
- res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
- event_type, event_alias, binary_path, offset);
- if (res < 0 || res >= sizeof(buf)) {
- fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
- goto error;
- }
+ if (pfd < 0) {
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+ kfd = open(buf, O_WRONLY | O_APPEND, 0);
+ if (kfd < 0) {
+ fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ goto error;
+ }
- ns_fd = enter_mount_ns(pid);
- if (write(kfd, buf, strlen(buf)) < 0) {
- if (errno == EINVAL)
- fprintf(stderr, "check dmesg output for possible cause\n");
- goto error;
+ res = snprintf(event_alias, sizeof(event_alias), "%s_bcc_%d", ev_name, getpid());
+ if (res < 0 || res >= sizeof(event_alias)) {
+ fprintf(stderr, "Event name (%s) is too long for buffer\n", ev_name);
+ goto error;
+ }
+ res = snprintf(buf, sizeof(buf), "%c:%ss/%s %s:0x%lx", attach_type==BPF_PROBE_ENTRY ? 'p' : 'r',
+ event_type, event_alias, binary_path, offset);
+ if (res < 0 || res >= sizeof(buf)) {
+ fprintf(stderr, "Event alias (%s) too long for buffer\n", event_alias);
+ goto error;
+ }
+
+ ns_fd = enter_mount_ns(pid);
+ if (write(kfd, buf, strlen(buf)) < 0) {
+ if (errno == EINVAL)
+ fprintf(stderr, "check dmesg output for possible cause\n");
+ goto error;
+ }
+ close(kfd);
+ exit_mount_ns(ns_fd);
+ ns_fd = -1;
+
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
}
- close(kfd);
- exit_mount_ns(ns_fd);
- ns_fd = -1;
- snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%ss/%s", event_type, event_alias);
- if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+ if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, pfd) < 0)
goto error;
return reader;
@@ -741,8 +832,43 @@ error:
static int bpf_detach_probe(const char *ev_name, const char *event_type)
{
- int kfd, res;
+ int kfd = -1, res;
char buf[PATH_MAX];
+ int found_event = 0;
+ size_t bufsize = 0;
+ char *cptr = NULL;
+ FILE *fp;
+
+ /*
+ * For [k,u]probe created with perf_event_open (on newer kernel), it is
+ * not necessary to clean it up in [k,u]probe_events. We first look up
+ * the %s_bcc_%d line in [k,u]probe_events. If the event is not found,
+ * it is safe to skip the cleaning up process (write -:... to the file).
+ */
+ snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
+ fp = fopen(buf, "r");
+ if (!fp) {
+ fprintf(stderr, "open(%s): %s\n", buf, strerror(errno));
+ goto error;
+ }
+
+ res = snprintf(buf, sizeof(buf), "%ss/%s_bcc_%d", event_type, ev_name, getpid());
+ if (res < 0 || res >= sizeof(buf)) {
+ fprintf(stderr, "snprintf(%s): %d\n", ev_name, res);
+ goto error;
+ }
+
+ while (getline(&cptr, &bufsize, fp) != -1)
+ if (strstr(cptr, buf) != NULL) {
+ found_event = 1;
+ break;
+ }
+ fclose(fp);
+ fp = NULL;
+
+ if (!found_event)
+ return 0;
+
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/%s_events", event_type);
kfd = open(buf, O_WRONLY | O_APPEND, 0);
if (kfd < 0) {
@@ -766,6 +892,8 @@ static int bpf_detach_probe(const char *ev_name, const char *event_type)
error:
if (kfd >= 0)
close(kfd);
+ if (fp)
+ fclose(fp);
return -1;
}
@@ -792,7 +920,7 @@ void * bpf_attach_tracepoint(int progfd, const char *tp_category,
snprintf(buf, sizeof(buf), "/sys/kernel/debug/tracing/events/%s/%s",
tp_category, tp_name);
- if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd) < 0)
+ if (bpf_attach_tracing_event(progfd, buf, reader, pid, cpu, group_fd, -1) < 0)
goto error;
return reader;
--
2.9.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v5] perf_event_open.2: add type kprobe and uprobe
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
2017-12-06 22:45 ` [PATCH v5] bcc: Try use new API to create " Song Liu
@ 2017-12-06 22:45 ` Song Liu
2017-12-06 22:45 ` [PATCH v5 1/6] perf: prepare perf_event.h for new types perf_kprobe and perf_uprobe Song Liu
` (8 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2017-12-06 22:45 UTC (permalink / raw)
To: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel
Cc: kernel-team, Song Liu
Two new types kprobe and uprobe are being added to perf_event_open,
which allow creating kprobe or uprobe with perf_event_open. This
patch adds information about these types.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
man2/perf_event_open.2 | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 50 insertions(+)
diff --git a/man2/perf_event_open.2 b/man2/perf_event_open.2
index c91da3f..02d6673 100644
--- a/man2/perf_event_open.2
+++ b/man2/perf_event_open.2
@@ -256,11 +256,15 @@ struct perf_event_attr {
union {
__u64 bp_addr; /* breakpoint address */
+ __u64 kprobe_func; /* for perf_kprobe */
+ __u64 uprobe_path; /* for perf_uprobe */
__u64 config1; /* extension of config */
};
union {
__u64 bp_len; /* breakpoint length */
+ __u64 kprobe_addr; /* with kprobe_func == NULL */
+ __u64 probe_offset; /* for perf_[k,u]probe */
__u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
@@ -336,6 +340,13 @@ field.
For instance,
.I /sys/bus/event_source/devices/cpu/type
contains the value for the core CPU PMU, which is usually 4.
+.TP
+.BR kprobe " and " uprobe " (since Linux 4.TBD)"
+These two dynamic PMU creates kprobe or uprobe with perf_event_open and
+attaches it to the file descriptor.
+See fields
+.IR kprobe_func ", " uprobe_path ", " kprobe_addr ", and " probe_offset
+for more details.
.RE
.TP
.I "size"
@@ -627,6 +638,45 @@ then leave
.I config
set to zero.
Its parameters are set in other places.
+.PP
+If
+.I type
+is
+.BR kprobe
+or
+.BR uprobe ,
+set
+.IR retprobe
+(bit 0 of
+.IR config ,
+see /sys/bus/event_source/devices/[k,u]probe/format/retprobe)
+for kretprobe/uretprobe. See fields
+.IR kprobe_func ", " uprobe_path ", " kprobe_addr ", and " probe_offset
+for more details.
+.RE
+.TP
+.IR kprobe_func ", " uprobe_path ", " kprobe_addr ", and " probe_offset
+.EE
+These fields describes the kprobe/uprobe for dynamic PMU
+.BR kprobe
+and
+.BR uprobe .
+For
+.BR kprobe ": "
+use
+.I kprobe_func
+and
+.IR probe_offset ,
+or use
+.I kprobe_addr
+and leave
+.I kprobe_func
+as NULL. For
+.BR uprobe ": "
+use
+.I uprobe_path
+and
+.IR probe_offset .
.RE
.TP
.IR sample_period ", " sample_freq
--
2.9.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v5 1/6] perf: prepare perf_event.h for new types perf_kprobe and perf_uprobe
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
2017-12-06 22:45 ` [PATCH v5] bcc: Try use new API to create " Song Liu
2017-12-06 22:45 ` [PATCH v5] perf_event_open.2: add type kprobe and uprobe Song Liu
@ 2017-12-06 22:45 ` Song Liu
2017-12-06 22:45 ` [PATCH v5 2/6] perf: copy new perf_event.h to tools/include/uapi Song Liu
` (7 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2017-12-06 22:45 UTC (permalink / raw)
To: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel
Cc: kernel-team, Song Liu
Two new perf types, perf_kprobe and perf_uprobe, will be added to allow
creating [k,u]probe with perf_event_open. These [k,u]probe are associated
with the file decriptor created by perf_event_open, thus are easy to
clean when the file descriptor is destroyed.
kprobe_func and uprobe_path are added to union config1 for pointers to
function name for kprobe or binary path for uprobe.
kprobe_addr and probe_offset are added to union config2 for kernel
address (when kprobe_func is NULL), or [k,u]probe offset.
Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
include/uapi/linux/perf_event.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 362493a..b2d80a7 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -380,10 +380,14 @@ struct perf_event_attr {
__u32 bp_type;
union {
__u64 bp_addr;
+ __u64 kprobe_func; /* for perf_kprobe */
+ __u64 uprobe_path; /* for perf_uprobe */
__u64 config1; /* extension of config */
};
union {
__u64 bp_len;
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 probe_offset; /* for perf_[k,u]probe */
__u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
--
2.9.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v5 2/6] perf: copy new perf_event.h to tools/include/uapi
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
` (2 preceding siblings ...)
2017-12-06 22:45 ` [PATCH v5 1/6] perf: prepare perf_event.h for new types perf_kprobe and perf_uprobe Song Liu
@ 2017-12-06 22:45 ` Song Liu
2017-12-06 22:45 ` [PATCH v5 3/6] perf: implement pmu perf_kprobe Song Liu
` (6 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2017-12-06 22:45 UTC (permalink / raw)
To: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel
Cc: kernel-team, Song Liu
perf_event.h is updated in previous patch, this patch applies same
changes to the tools/ version. This is part is put in a separate
patch in case the two files are back ported separately.
Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
tools/include/uapi/linux/perf_event.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h
index b9a4953..1133d6a 100644
--- a/tools/include/uapi/linux/perf_event.h
+++ b/tools/include/uapi/linux/perf_event.h
@@ -380,10 +380,14 @@ struct perf_event_attr {
__u32 bp_type;
union {
__u64 bp_addr;
+ __u64 kprobe_func; /* for perf_kprobe */
+ __u64 uprobe_path; /* for perf_uprobe */
__u64 config1; /* extension of config */
};
union {
__u64 bp_len;
+ __u64 kprobe_addr; /* when kprobe_func == NULL */
+ __u64 probe_offset; /* for perf_[k,u]probe */
__u64 config2; /* extension of config1 */
};
__u64 branch_sample_type; /* enum perf_branch_sample_type */
--
2.9.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v5 3/6] perf: implement pmu perf_kprobe
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
` (3 preceding siblings ...)
2017-12-06 22:45 ` [PATCH v5 2/6] perf: copy new perf_event.h to tools/include/uapi Song Liu
@ 2017-12-06 22:45 ` Song Liu
2017-12-20 10:03 ` Peter Zijlstra
2017-12-06 22:45 ` [PATCH v5 4/6] perf: implement pmu perf_uprobe Song Liu
` (5 subsequent siblings)
10 siblings, 1 reply; 20+ messages in thread
From: Song Liu @ 2017-12-06 22:45 UTC (permalink / raw)
To: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel
Cc: kernel-team, Song Liu
A new pmu, perf_kprobe is added. Based attr from perf_event_open(),
perf_kprobe creates a kprobe (or kretprobe) for the perf_event. This
kprobe is private to this perf_event, and thus not added to global
lists, and not available in tracefs.
Two functions, create_local_trace_kprobe() and
destroy_local_trace_kprobe() are added to created and destroy these
local trace_kprobe.
Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
include/linux/trace_events.h | 4 ++
kernel/events/core.c | 87 ++++++++++++++++++++++++++++++++++++++-
kernel/trace/trace_event_perf.c | 49 ++++++++++++++++++++++
kernel/trace/trace_kprobe.c | 91 +++++++++++++++++++++++++++++++++++++----
kernel/trace/trace_probe.h | 7 ++++
5 files changed, 228 insertions(+), 10 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 2bcb4dc..1cfb0a4 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -494,6 +494,10 @@ extern int perf_trace_init(struct perf_event *event);
extern void perf_trace_destroy(struct perf_event *event);
extern int perf_trace_add(struct perf_event *event, int flags);
extern void perf_trace_del(struct perf_event *event, int flags);
+#ifdef CONFIG_KPROBE_EVENTS
+extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
+extern void perf_kprobe_destroy(struct perf_event *event);
+#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 494eca1..f518214 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7981,9 +7981,92 @@ static struct pmu perf_tracepoint = {
.read = perf_swevent_read,
};
+/*
+ * Flags in config, used by dynamic PMU kprobe and uprobe
+ * The flags should match following PMU_FORMAT_ATTR().
+ *
+ * PERF_PROBE_CONFIG_IS_RETPROBE if set, create kretprobe/uretprobe
+ * if not set, create kprobe/uprobe
+ */
+enum perf_probe_config {
+ PERF_PROBE_CONFIG_IS_RETPROBE = 1U << 0, /* [k,u]retprobe */
+};
+
+PMU_FORMAT_ATTR(retprobe, "config:0");
+
+static struct attribute *probe_attrs[] = {
+ &format_attr_retprobe.attr,
+ NULL,
+};
+
+static struct attribute_group probe_format_group = {
+ .name = "format",
+ .attrs = probe_attrs,
+};
+
+static const struct attribute_group *probe_attr_groups[] = {
+ &probe_format_group,
+ NULL,
+};
+
+#ifdef CONFIG_KPROBE_EVENTS
+static int perf_kprobe_event_init(struct perf_event *event);
+static struct pmu perf_kprobe = {
+ .task_ctx_nr = perf_sw_context,
+ .event_init = perf_kprobe_event_init,
+ .add = perf_trace_add,
+ .del = perf_trace_del,
+ .start = perf_swevent_start,
+ .stop = perf_swevent_stop,
+ .read = perf_swevent_read,
+ .attr_groups = probe_attr_groups,
+};
+
+static int perf_kprobe_event_init(struct perf_event *event)
+{
+ int err;
+ bool is_retprobe;
+
+ if (event->attr.type != perf_kprobe.type)
+ return -ENOENT;
+ /*
+ * no branch sampling for probe events
+ */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
+ err = perf_kprobe_init(event, is_retprobe);
+ if (err)
+ return err;
+
+ event->destroy = perf_kprobe_destroy;
+
+ return 0;
+}
+#endif /* CONFIG_KPROBE_EVENTS */
+
+/*
+ * returns true if the event is a tracepoint, or a kprobe/upprobe created
+ * with perf_event_open()
+ */
+static inline bool perf_event_is_tracing(struct perf_event *event)
+{
+ if (event->attr.type == PERF_TYPE_TRACEPOINT)
+ return true;
+#ifdef CONFIG_KPROBE_EVENTS
+ if (event->pmu == &perf_kprobe)
+ return true;
+#endif
+ return false;
+}
+
static inline void perf_tp_register(void)
{
perf_pmu_register(&perf_tracepoint, "tracepoint", PERF_TYPE_TRACEPOINT);
+#ifdef CONFIG_KPROBE_EVENTS
+ perf_pmu_register(&perf_kprobe, "kprobe", -1);
+#endif
}
static void perf_event_free_filter(struct perf_event *event)
@@ -8065,7 +8148,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
bool is_kprobe, is_tracepoint, is_syscall_tp;
struct bpf_prog *prog;
- if (event->attr.type != PERF_TYPE_TRACEPOINT)
+ if (!perf_event_is_tracing(event))
return perf_event_set_bpf_handler(event, prog_fd);
if (event->tp_event->prog)
@@ -8537,7 +8620,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
char *filter_str;
int ret = -EINVAL;
- if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
+ if ((!perf_event_is_tracing(event) ||
!IS_ENABLED(CONFIG_EVENT_TRACING)) &&
!has_addr_filter(event))
return -EINVAL;
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 13ba2d3..7f1cc45 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -8,6 +8,7 @@
#include <linux/module.h>
#include <linux/kprobes.h>
#include "trace.h"
+#include "trace_probe.h"
static char __percpu *perf_trace_buf[PERF_NR_CONTEXTS];
@@ -237,6 +238,54 @@ void perf_trace_destroy(struct perf_event *p_event)
mutex_unlock(&event_mutex);
}
+#ifdef CONFIG_KPROBE_EVENTS
+int perf_kprobe_init(struct perf_event *p_event, bool is_retprobe)
+{
+ int ret;
+ char *func = NULL;
+ struct trace_event_call *tp_event;
+
+ if (p_event->attr.kprobe_func) {
+ func = kzalloc(KSYM_NAME_LEN, GFP_KERNEL);
+ if (!func)
+ return -ENOMEM;
+ ret = strncpy_from_user(
+ func, u64_to_user_ptr(p_event->attr.kprobe_func),
+ KSYM_NAME_LEN);
+ if (ret < 0)
+ goto out;
+
+ if (func[0] == '\0') {
+ kfree(func);
+ func = NULL;
+ }
+ }
+
+ tp_event = create_local_trace_kprobe(
+ func, (void *)(unsigned long)(p_event->attr.kprobe_addr),
+ p_event->attr.probe_offset, is_retprobe);
+ if (IS_ERR(tp_event)) {
+ ret = PTR_ERR(tp_event);
+ goto out;
+ }
+
+ ret = perf_trace_event_init(tp_event, p_event);
+ if (ret)
+ destroy_local_trace_kprobe(tp_event);
+out:
+ kfree(func);
+ return ret;
+}
+
+void perf_kprobe_destroy(struct perf_event *p_event)
+{
+ perf_trace_event_close(p_event);
+ perf_trace_event_unreg(p_event);
+
+ destroy_local_trace_kprobe(p_event->tp_event);
+}
+#endif /* CONFIG_KPROBE_EVENTS */
+
int perf_trace_add(struct perf_event *p_event, int flags)
{
struct trace_event_call *tp_event = p_event->tp_event;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index 8a907e1..16b334a 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -438,6 +438,14 @@ disable_trace_kprobe(struct trace_kprobe *tk, struct trace_event_file *file)
disable_kprobe(&tk->rp.kp);
wait = 1;
}
+
+ /*
+ * if tk is not added to any list, it must be a local trace_kprobe
+ * created with perf_event_open. We don't need to wait for these
+ * trace_kprobes
+ */
+ if (list_empty(&tk->list))
+ wait = 0;
out:
if (wait) {
/*
@@ -1315,12 +1323,9 @@ static struct trace_event_functions kprobe_funcs = {
.trace = print_kprobe_event
};
-static int register_kprobe_event(struct trace_kprobe *tk)
+static inline void init_trace_event_call(struct trace_kprobe *tk,
+ struct trace_event_call *call)
{
- struct trace_event_call *call = &tk->tp.call;
- int ret;
-
- /* Initialize trace_event_call */
INIT_LIST_HEAD(&call->class->fields);
if (trace_kprobe_is_return(tk)) {
call->event.funcs = &kretprobe_funcs;
@@ -1329,6 +1334,19 @@ static int register_kprobe_event(struct trace_kprobe *tk)
call->event.funcs = &kprobe_funcs;
call->class->define_fields = kprobe_event_define_fields;
}
+
+ call->flags = TRACE_EVENT_FL_KPROBE;
+ call->class->reg = kprobe_register;
+ call->data = tk;
+}
+
+static int register_kprobe_event(struct trace_kprobe *tk)
+{
+ struct trace_event_call *call = &tk->tp.call;
+ int ret = 0;
+
+ init_trace_event_call(tk, call);
+
if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0)
return -ENOMEM;
ret = register_trace_event(&call->event);
@@ -1336,9 +1354,6 @@ static int register_kprobe_event(struct trace_kprobe *tk)
kfree(call->print_fmt);
return -ENODEV;
}
- call->flags = TRACE_EVENT_FL_KPROBE;
- call->class->reg = kprobe_register;
- call->data = tk;
ret = trace_add_event_call(call);
if (ret) {
pr_info("Failed to register kprobe event: %s\n",
@@ -1360,6 +1375,66 @@ static int unregister_kprobe_event(struct trace_kprobe *tk)
return ret;
}
+#ifdef CONFIG_PERF_EVENTS
+/* create a trace_kprobe, but don't add it to global lists */
+struct trace_event_call *
+create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
+ bool is_return)
+{
+ struct trace_kprobe *tk;
+ int ret;
+ char *event;
+
+ /*
+ * local trace_kprobes are not added to probe_list, so they are never
+ * searched in find_trace_kprobe(). Therefore, there is no concern of
+ * duplicated name here.
+ */
+ event = func ? func : "DUMMY_EVENT";
+
+ tk = alloc_trace_kprobe(KPROBE_EVENT_SYSTEM, event, (void *)addr, func,
+ offs, 0 /* maxactive */, 0 /* nargs */,
+ is_return);
+
+ if (IS_ERR(tk)) {
+ pr_info("Failed to allocate trace_probe.(%d)\n",
+ (int)PTR_ERR(tk));
+ return ERR_CAST(tk);
+ }
+
+ init_trace_event_call(tk, &tk->tp.call);
+
+ if (set_print_fmt(&tk->tp, trace_kprobe_is_return(tk)) < 0) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ret = __register_trace_kprobe(tk);
+ if (ret < 0)
+ goto error;
+
+ return &tk->tp.call;
+error:
+ free_trace_kprobe(tk);
+ return ERR_PTR(ret);
+}
+
+void destroy_local_trace_kprobe(struct trace_event_call *event_call)
+{
+ struct trace_kprobe *tk;
+
+ tk = container_of(event_call, struct trace_kprobe, tp.call);
+
+ if (trace_probe_is_enabled(&tk->tp)) {
+ WARN_ON(1);
+ return;
+ }
+
+ __unregister_trace_kprobe(tk);
+ free_trace_kprobe(tk);
+}
+#endif /* CONFIG_PERF_EVENTS */
+
/* Make a tracefs interface for controlling probe points */
static __init int init_kprobe_trace(void)
{
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 903273c..910ae1b 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -411,3 +411,10 @@ store_trace_args(int ent_size, struct trace_probe *tp, struct pt_regs *regs,
}
extern int set_print_fmt(struct trace_probe *tp, bool is_return);
+
+#ifdef CONFIG_PERF_EVENTS
+extern struct trace_event_call *
+create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
+ bool is_return);
+extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
+#endif
--
2.9.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v5 3/6] perf: implement pmu perf_kprobe
2017-12-06 22:45 ` [PATCH v5 3/6] perf: implement pmu perf_kprobe Song Liu
@ 2017-12-20 10:03 ` Peter Zijlstra
2017-12-20 10:14 ` Peter Zijlstra
0 siblings, 1 reply; 20+ messages in thread
From: Peter Zijlstra @ 2017-12-20 10:03 UTC (permalink / raw)
To: Song Liu; +Cc: rostedt, mingo, davem, netdev, linux-kernel, daniel, kernel-team
On Wed, Dec 06, 2017 at 02:45:15PM -0800, Song Liu wrote:
> @@ -8537,7 +8620,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
> char *filter_str;
> int ret = -EINVAL;
>
> - if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
> + if ((!perf_event_is_tracing(event) ||
> !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
> !has_addr_filter(event))
> return -EINVAL;
You actually missed an instance later in this same function... fixing
that.
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v5 3/6] perf: implement pmu perf_kprobe
2017-12-20 10:03 ` Peter Zijlstra
@ 2017-12-20 10:14 ` Peter Zijlstra
2017-12-20 18:10 ` Song Liu
0 siblings, 1 reply; 20+ messages in thread
From: Peter Zijlstra @ 2017-12-20 10:14 UTC (permalink / raw)
To: Song Liu; +Cc: rostedt, mingo, davem, netdev, linux-kernel, daniel, kernel-team
On Wed, Dec 20, 2017 at 11:03:01AM +0100, Peter Zijlstra wrote:
> On Wed, Dec 06, 2017 at 02:45:15PM -0800, Song Liu wrote:
> > @@ -8537,7 +8620,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
> > char *filter_str;
> > int ret = -EINVAL;
> >
> > - if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
> > + if ((!perf_event_is_tracing(event) ||
> > !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
> > !has_addr_filter(event))
> > return -EINVAL;
>
> You actually missed an instance later in this same function... fixing
> that.
@@ -8518,23 +8601,19 @@ perf_event_set_addr_filter(struct perf_e
static int perf_event_set_filter(struct perf_event *event, void __user *arg)
{
- char *filter_str;
int ret = -EINVAL;
-
- if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
- !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
- !has_addr_filter(event))
- return -EINVAL;
+ char *filter_str;
filter_str = strndup_user(arg, PAGE_SIZE);
if (IS_ERR(filter_str))
return PTR_ERR(filter_str);
- if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
- event->attr.type == PERF_TYPE_TRACEPOINT)
- ret = ftrace_profile_set_filter(event, event->attr.config,
- filter_str);
- else if (has_addr_filter(event))
+#ifdef CONFIG_EVENT_TRACING
+ if (perf_event_is_tracing(event))
+ ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
+ else
+#endif
+ if (has_addr_filter(event))
ret = perf_event_set_addr_filter(event, filter_str);
kfree(filter_str);
Is that right?
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH v5 3/6] perf: implement pmu perf_kprobe
2017-12-20 10:14 ` Peter Zijlstra
@ 2017-12-20 18:10 ` Song Liu
2017-12-20 21:25 ` Peter Zijlstra
0 siblings, 1 reply; 20+ messages in thread
From: Song Liu @ 2017-12-20 18:10 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Steven Rostedt, mingo@redhat.com, David Miller,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Daniel Borkmann, Kernel Team
> On Dec 20, 2017, at 2:14 AM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Dec 20, 2017 at 11:03:01AM +0100, Peter Zijlstra wrote:
>> On Wed, Dec 06, 2017 at 02:45:15PM -0800, Song Liu wrote:
>>> @@ -8537,7 +8620,7 @@ static int perf_event_set_filter(struct perf_event *event, void __user *arg)
>>> char *filter_str;
>>> int ret = -EINVAL;
>>>
>>> - if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
>>> + if ((!perf_event_is_tracing(event) ||
>>> !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
>>> !has_addr_filter(event))
>>> return -EINVAL;
>>
>> You actually missed an instance later in this same function... fixing
>> that.
>
>
> @@ -8518,23 +8601,19 @@ perf_event_set_addr_filter(struct perf_e
>
> static int perf_event_set_filter(struct perf_event *event, void __user *arg)
> {
> - char *filter_str;
> int ret = -EINVAL;
> -
> - if ((event->attr.type != PERF_TYPE_TRACEPOINT ||
> - !IS_ENABLED(CONFIG_EVENT_TRACING)) &&
> - !has_addr_filter(event))
> - return -EINVAL;
> + char *filter_str;
>
> filter_str = strndup_user(arg, PAGE_SIZE);
> if (IS_ERR(filter_str))
> return PTR_ERR(filter_str);
>
> - if (IS_ENABLED(CONFIG_EVENT_TRACING) &&
> - event->attr.type == PERF_TYPE_TRACEPOINT)
> - ret = ftrace_profile_set_filter(event, event->attr.config,
> - filter_str);
> - else if (has_addr_filter(event))
> +#ifdef CONFIG_EVENT_TRACING
> + if (perf_event_is_tracing(event))
> + ret = ftrace_profile_set_filter(event, event->attr.config, filter_str);
> + else
> +#endif
> + if (has_addr_filter(event))
> ret = perf_event_set_addr_filter(event, filter_str);
>
> kfree(filter_str);
>
>
>
> Is that right?
Yeah, this is right and neat. Thanks a lot for your help on this.
I think there is one more thing to change:
diff --git i/kernel/events/core.c w/kernel/events/core.c
index a906f30..516ff9b 100644
--- i/kernel/events/core.c
+++ w/kernel/events/core.c
@@ -8226,7 +8226,7 @@ static int perf_event_set_bpf_prog(struct perf_event *event, u32 prog_fd)
static void perf_event_free_bpf_prog(struct perf_event *event)
{
- if (event->attr.type != PERF_TYPE_TRACEPOINT) {
+ if (!perf_event_is_tracing(event)) {
perf_event_free_bpf_handler(event);
return;
}
Thanks,
Song
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v5 3/6] perf: implement pmu perf_kprobe
2017-12-20 18:10 ` Song Liu
@ 2017-12-20 21:25 ` Peter Zijlstra
2017-12-20 22:05 ` Song Liu
0 siblings, 1 reply; 20+ messages in thread
From: Peter Zijlstra @ 2017-12-20 21:25 UTC (permalink / raw)
To: Song Liu
Cc: Steven Rostedt, mingo@redhat.com, David Miller,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Daniel Borkmann, Kernel Team
On Wed, Dec 20, 2017 at 06:10:11PM +0000, Song Liu wrote:
> I think there is one more thing to change:
OK, folded that too; it should all be at:
git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git perf/core
Can you verify it all looks/works right?
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v5 3/6] perf: implement pmu perf_kprobe
2017-12-20 21:25 ` Peter Zijlstra
@ 2017-12-20 22:05 ` Song Liu
0 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2017-12-20 22:05 UTC (permalink / raw)
To: Peter Zijlstra
Cc: Steven Rostedt, mingo@redhat.com, David Miller,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org,
Daniel Borkmann, Kernel Team
> On Dec 20, 2017, at 1:25 PM, Peter Zijlstra <peterz@infradead.org> wrote:
>
> On Wed, Dec 20, 2017 at 06:10:11PM +0000, Song Liu wrote:
>> I think there is one more thing to change:
>
> OK, folded that too; it should all be at:
>
> git://git.kernel.org/pub/scm/linux/kernel/git/peterz/queue.git perf/core
>
> Can you verify it all looks/works right?
Thanks Peter! The patches look right. And they work as expected in my tests.
Best,
Song
^ permalink raw reply [flat|nested] 20+ messages in thread
* [PATCH v5 4/6] perf: implement pmu perf_uprobe
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
` (4 preceding siblings ...)
2017-12-06 22:45 ` [PATCH v5 3/6] perf: implement pmu perf_kprobe Song Liu
@ 2017-12-06 22:45 ` Song Liu
2017-12-06 22:45 ` [PATCH v5 5/6] bpf: add option for bpf_load.c to use perf_kprobe Song Liu
` (4 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2017-12-06 22:45 UTC (permalink / raw)
To: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel
Cc: kernel-team, Song Liu
This patch adds perf_uprobe support with similar pattern as previous
patch (for kprobe).
Two functions, create_local_trace_uprobe() and
destroy_local_trace_uprobe(), are created so a uprobe can be created
and attached to the file descriptor created by perf_event_open().
Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Yonghong Song <yhs@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
include/linux/trace_events.h | 4 ++
kernel/events/core.c | 44 +++++++++++++++++++++
kernel/trace/trace_event_perf.c | 53 +++++++++++++++++++++++++
kernel/trace/trace_probe.h | 4 ++
kernel/trace/trace_uprobe.c | 86 +++++++++++++++++++++++++++++++++++++----
5 files changed, 183 insertions(+), 8 deletions(-)
diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 1cfb0a4..b56ec3d 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -498,6 +498,10 @@ extern void perf_trace_del(struct perf_event *event, int flags);
extern int perf_kprobe_init(struct perf_event *event, bool is_retprobe);
extern void perf_kprobe_destroy(struct perf_event *event);
#endif
+#ifdef CONFIG_UPROBE_EVENTS
+extern int perf_uprobe_init(struct perf_event *event, bool is_retprobe);
+extern void perf_uprobe_destroy(struct perf_event *event);
+#endif
extern int ftrace_profile_set_filter(struct perf_event *event, int event_id,
char *filter_str);
extern void ftrace_profile_free_filter(struct perf_event *event);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index f518214..31628ca 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8046,6 +8046,43 @@ static int perf_kprobe_event_init(struct perf_event *event)
}
#endif /* CONFIG_KPROBE_EVENTS */
+#ifdef CONFIG_UPROBE_EVENTS
+static int perf_uprobe_event_init(struct perf_event *event);
+static struct pmu perf_uprobe = {
+ .task_ctx_nr = perf_sw_context,
+ .event_init = perf_uprobe_event_init,
+ .add = perf_trace_add,
+ .del = perf_trace_del,
+ .start = perf_swevent_start,
+ .stop = perf_swevent_stop,
+ .read = perf_swevent_read,
+ .attr_groups = probe_attr_groups,
+};
+
+static int perf_uprobe_event_init(struct perf_event *event)
+{
+ int err;
+ bool is_retprobe;
+
+ if (event->attr.type != perf_uprobe.type)
+ return -ENOENT;
+ /*
+ * no branch sampling for probe events
+ */
+ if (has_branch_stack(event))
+ return -EOPNOTSUPP;
+
+ is_retprobe = event->attr.config & PERF_PROBE_CONFIG_IS_RETPROBE;
+ err = perf_uprobe_init(event, is_retprobe);
+ if (err)
+ return err;
+
+ event->destroy = perf_uprobe_destroy;
+
+ return 0;
+}
+#endif /* CONFIG_UPROBE_EVENTS */
+
/*
* returns true if the event is a tracepoint, or a kprobe/upprobe created
* with perf_event_open()
@@ -8058,6 +8095,10 @@ static inline bool perf_event_is_tracing(struct perf_event *event)
if (event->pmu == &perf_kprobe)
return true;
#endif
+#if CONFIG_UPROBE_EVENTS
+ if (event->pmu == &perf_uprobe)
+ return true;
+#endif
return false;
}
@@ -8067,6 +8108,9 @@ static inline void perf_tp_register(void)
#ifdef CONFIG_KPROBE_EVENTS
perf_pmu_register(&perf_kprobe, "kprobe", -1);
#endif
+#ifdef CONFIG_UPROBE_EVENTS
+ perf_pmu_register(&perf_uprobe, "uprobe", -1);
+#endif
}
static void perf_event_free_filter(struct perf_event *event)
diff --git a/kernel/trace/trace_event_perf.c b/kernel/trace/trace_event_perf.c
index 7f1cc45..6a352ee 100644
--- a/kernel/trace/trace_event_perf.c
+++ b/kernel/trace/trace_event_perf.c
@@ -286,6 +286,59 @@ void perf_kprobe_destroy(struct perf_event *p_event)
}
#endif /* CONFIG_KPROBE_EVENTS */
+#ifdef CONFIG_UPROBE_EVENTS
+int perf_uprobe_init(struct perf_event *p_event, bool is_retprobe)
+{
+ int ret;
+ char *path = NULL;
+ struct trace_event_call *tp_event;
+
+ if (!p_event->attr.uprobe_path)
+ return -EINVAL;
+ path = kzalloc(PATH_MAX, GFP_KERNEL);
+ if (!path)
+ return -ENOMEM;
+ ret = strncpy_from_user(
+ path, u64_to_user_ptr(p_event->attr.uprobe_path), PATH_MAX);
+ if (ret < 0)
+ goto out;
+ if (path[0] == '\0') {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ tp_event = create_local_trace_uprobe(
+ path, p_event->attr.probe_offset, is_retprobe);
+ if (IS_ERR(tp_event)) {
+ ret = PTR_ERR(tp_event);
+ goto out;
+ }
+
+ /*
+ * local trace_uprobe need to hold event_mutex to call
+ * uprobe_buffer_enable() and uprobe_buffer_disable().
+ * event_mutex is not required for local trace_kprobes.
+ */
+ mutex_lock(&event_mutex);
+ ret = perf_trace_event_init(tp_event, p_event);
+ if (ret)
+ destroy_local_trace_uprobe(tp_event);
+ mutex_unlock(&event_mutex);
+out:
+ kfree(path);
+ return ret;
+}
+
+void perf_uprobe_destroy(struct perf_event *p_event)
+{
+ mutex_lock(&event_mutex);
+ perf_trace_event_close(p_event);
+ perf_trace_event_unreg(p_event);
+ mutex_unlock(&event_mutex);
+ destroy_local_trace_uprobe(p_event->tp_event);
+}
+#endif /* CONFIG_UPROBE_EVENTS */
+
int perf_trace_add(struct perf_event *p_event, int flags)
{
struct trace_event_call *tp_event = p_event->tp_event;
diff --git a/kernel/trace/trace_probe.h b/kernel/trace/trace_probe.h
index 910ae1b..86b5925 100644
--- a/kernel/trace/trace_probe.h
+++ b/kernel/trace/trace_probe.h
@@ -417,4 +417,8 @@ extern struct trace_event_call *
create_local_trace_kprobe(char *func, void *addr, unsigned long offs,
bool is_return);
extern void destroy_local_trace_kprobe(struct trace_event_call *event_call);
+
+extern struct trace_event_call *
+create_local_trace_uprobe(char *name, unsigned long offs, bool is_return);
+extern void destroy_local_trace_uprobe(struct trace_event_call *event_call);
#endif
diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
index 4525e02..4d805d2 100644
--- a/kernel/trace/trace_uprobe.c
+++ b/kernel/trace/trace_uprobe.c
@@ -1293,16 +1293,25 @@ static struct trace_event_functions uprobe_funcs = {
.trace = print_uprobe_event
};
-static int register_uprobe_event(struct trace_uprobe *tu)
+static inline void init_trace_event_call(struct trace_uprobe *tu,
+ struct trace_event_call *call)
{
- struct trace_event_call *call = &tu->tp.call;
- int ret;
-
- /* Initialize trace_event_call */
INIT_LIST_HEAD(&call->class->fields);
call->event.funcs = &uprobe_funcs;
call->class->define_fields = uprobe_event_define_fields;
+ call->flags = TRACE_EVENT_FL_UPROBE;
+ call->class->reg = trace_uprobe_register;
+ call->data = tu;
+}
+
+static int register_uprobe_event(struct trace_uprobe *tu)
+{
+ struct trace_event_call *call = &tu->tp.call;
+ int ret = 0;
+
+ init_trace_event_call(tu, call);
+
if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0)
return -ENOMEM;
@@ -1312,9 +1321,6 @@ static int register_uprobe_event(struct trace_uprobe *tu)
return -ENODEV;
}
- call->flags = TRACE_EVENT_FL_UPROBE;
- call->class->reg = trace_uprobe_register;
- call->data = tu;
ret = trace_add_event_call(call);
if (ret) {
@@ -1340,6 +1346,70 @@ static int unregister_uprobe_event(struct trace_uprobe *tu)
return 0;
}
+#ifdef CONFIG_PERF_EVENTS
+struct trace_event_call *
+create_local_trace_uprobe(char *name, unsigned long offs, bool is_return)
+{
+ struct trace_uprobe *tu;
+ struct inode *inode;
+ struct path path;
+ int ret;
+
+ ret = kern_path(name, LOOKUP_FOLLOW, &path);
+ if (ret)
+ return ERR_PTR(ret);
+
+ inode = igrab(d_inode(path.dentry));
+ path_put(&path);
+
+ if (!inode || !S_ISREG(inode->i_mode)) {
+ iput(inode);
+ return ERR_PTR(-EINVAL);
+ }
+
+ /*
+ * local trace_kprobes are not added to probe_list, so they are never
+ * searched in find_trace_kprobe(). Therefore, there is no concern of
+ * duplicated name "DUMMY_EVENT" here.
+ */
+ tu = alloc_trace_uprobe(UPROBE_EVENT_SYSTEM, "DUMMY_EVENT", 0,
+ is_return);
+
+ if (IS_ERR(tu)) {
+ pr_info("Failed to allocate trace_uprobe.(%d)\n",
+ (int)PTR_ERR(tu));
+ return ERR_CAST(tu);
+ }
+
+ tu->offset = offs;
+ tu->inode = inode;
+ tu->filename = kstrdup(name, GFP_KERNEL);
+ init_trace_event_call(tu, &tu->tp.call);
+
+ if (set_print_fmt(&tu->tp, is_ret_probe(tu)) < 0) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ return &tu->tp.call;
+error:
+ free_trace_uprobe(tu);
+ return ERR_PTR(ret);
+}
+
+void destroy_local_trace_uprobe(struct trace_event_call *event_call)
+{
+ struct trace_uprobe *tu;
+
+ tu = container_of(event_call, struct trace_uprobe, tp.call);
+
+ kfree(tu->tp.call.print_fmt);
+ tu->tp.call.print_fmt = NULL;
+
+ free_trace_uprobe(tu);
+}
+#endif /* CONFIG_PERF_EVENTS */
+
/* Make a trace interface for controling probe points */
static __init int init_uprobe_trace(void)
{
--
2.9.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v5 5/6] bpf: add option for bpf_load.c to use perf_kprobe
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
` (5 preceding siblings ...)
2017-12-06 22:45 ` [PATCH v5 4/6] perf: implement pmu perf_uprobe Song Liu
@ 2017-12-06 22:45 ` Song Liu
2017-12-06 22:45 ` [PATCH v5 6/6] bpf: add new test test_many_kprobe Song Liu
` (3 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2017-12-06 22:45 UTC (permalink / raw)
To: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel
Cc: kernel-team, Song Liu
Function load_and_attach() is updated to be able to create kprobes
with either old text based API, or the new perf_event_open API.
A global flag use_perf_kprobe is added to select between the two
APIs.
Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
---
samples/bpf/bpf_load.c | 61 +++++++++++++++++++++++++++++++++++++++++++-------
samples/bpf/bpf_load.h | 10 +++++++++
2 files changed, 63 insertions(+), 8 deletions(-)
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 2325d7a..86e3818 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -8,7 +8,6 @@
#include <errno.h>
#include <unistd.h>
#include <string.h>
-#include <stdbool.h>
#include <stdlib.h>
#include <linux/bpf.h>
#include <linux/filter.h>
@@ -29,6 +28,7 @@
#include "perf-sys.h"
#define DEBUGFS "/sys/kernel/debug/tracing/"
+#define KPROBE_TYPE_FILE "/sys/bus/event_source/devices/kprobe/type"
static char license[128];
static int kern_version;
@@ -42,6 +42,8 @@ int prog_array_fd = -1;
struct bpf_map_data map_data[MAX_MAPS];
int map_data_count = 0;
+bool use_perf_kprobe = true;
+int perf_kprobe_type = -1;
static int populate_prog_array(const char *event, int prog_fd)
{
@@ -55,6 +57,26 @@ static int populate_prog_array(const char *event, int prog_fd)
return 0;
}
+int get_perf_kprobe_type_id(void)
+{
+ int tfd;
+ int err;
+ char buf[16];
+
+ tfd = open(KPROBE_TYPE_FILE, O_RDONLY);
+ if (tfd < 0)
+ return -1;
+
+ err = read(tfd, buf, sizeof(buf));
+ close(tfd);
+
+ if (err < 0 || err >= sizeof(buf))
+ return -1;
+ buf[err] = 0;
+ perf_kprobe_type = atoi(buf);
+ return perf_kprobe_type;
+}
+
static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
{
bool is_socket = strncmp(event, "socket", 6) == 0;
@@ -70,7 +92,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
size_t insns_cnt = size / sizeof(struct bpf_insn);
enum bpf_prog_type prog_type;
char buf[256];
- int fd, efd, err, id;
+ int fd, efd, err, id = -1;
struct perf_event_attr attr = {};
attr.type = PERF_TYPE_TRACEPOINT;
@@ -128,7 +150,13 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return populate_prog_array(event, fd);
}
- if (is_kprobe || is_kretprobe) {
+ if (use_perf_kprobe && perf_kprobe_type == -1) {
+ get_perf_kprobe_type_id();
+ if (perf_kprobe_type == -1)
+ use_perf_kprobe = false;
+ }
+
+ if (!use_perf_kprobe && (is_kprobe || is_kretprobe)) {
if (is_kprobe)
event += 7;
else
@@ -169,27 +197,44 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
strcat(buf, "/id");
}
+ if (use_perf_kprobe && (is_kprobe || is_kretprobe)) {
+ attr.type = perf_kprobe_type;
+ attr.kprobe_func = ptr_to_u64(
+ event + strlen(is_kprobe ? "kprobe/" : "kretprobe/"));
+ attr.probe_offset = 0;
+
+ /* PERF_PROBE_CONFIG_IS_RETPROBE in kernel/events/core.c */
+ if (is_kretprobe)
+ attr.config |= 1 << 0;
+ } else {
efd = open(buf, O_RDONLY, 0);
if (efd < 0) {
printf("failed to open event %s\n", event);
return -1;
}
-
err = read(efd, buf, sizeof(buf));
if (err < 0 || err >= sizeof(buf)) {
- printf("read from '%s' failed '%s'\n", event, strerror(errno));
+ printf("read from '%s' failed '%s'\n", event,
+ strerror(errno));
return -1;
}
-
close(efd);
-
buf[err] = 0;
id = atoi(buf);
attr.config = id;
+ }
efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/, -1/*group_fd*/, 0);
if (efd < 0) {
- printf("event %d fd %d err %s\n", id, efd, strerror(errno));
+ if (use_perf_kprobe && (is_kprobe || is_kretprobe))
+ printf("k%sprobe %s fd %d err %s\n",
+ is_kprobe ? "" : "ret",
+ event + strlen(is_kprobe ? "kprobe/"
+ : "kretprobe/"),
+ efd, strerror(errno));
+ else
+ printf("event %d fd %d err %s\n", id, efd,
+ strerror(errno));
return -1;
}
event_fd[prog_cnt - 1] = efd;
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 7d57a42..95d6be5 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -2,6 +2,7 @@
#ifndef __BPF_LOAD_H
#define __BPF_LOAD_H
+#include <stdbool.h>
#include "libbpf.h"
#define MAX_MAPS 32
@@ -38,6 +39,10 @@ extern int map_fd[MAX_MAPS];
extern struct bpf_map_data map_data[MAX_MAPS];
extern int map_data_count;
+extern bool use_perf_kprobe;
+extern int perf_kprobe_type;
+extern int get_perf_kprobe_type_id(void);
+
/* parses elf file compiled by llvm .c->.o
* . parses 'maps' section and creates maps via BPF syscall
* . parses 'license' section and passes it to syscall
@@ -59,6 +64,11 @@ struct ksym {
char *name;
};
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
int load_kallsyms(void);
struct ksym *ksym_search(long key);
int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
--
2.9.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* [PATCH v5 6/6] bpf: add new test test_many_kprobe
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
` (6 preceding siblings ...)
2017-12-06 22:45 ` [PATCH v5 5/6] bpf: add option for bpf_load.c to use perf_kprobe Song Liu
@ 2017-12-06 22:45 ` Song Liu
2017-12-07 10:23 ` [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Philippe Ombredanne
` (2 subsequent siblings)
10 siblings, 0 replies; 20+ messages in thread
From: Song Liu @ 2017-12-06 22:45 UTC (permalink / raw)
To: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel
Cc: kernel-team, Song Liu
The test compares old text based kprobe API with perf_kprobe.
Here is a sample output of this test:
Creating 1000 kprobes with text-based API takes 6.979683 seconds
Cleaning 1000 kprobes with text-based API takes 84.897687 seconds
Creating 1000 kprobes with perf_kprobe (function name) takes 5.077558 seconds
Cleaning 1000 kprobes with perf_kprobe (function name) takes 81.241354 seconds
Creating 1000 kprobes with perf_kprobe (function addr) takes 5.218255 seconds
Cleaning 1000 kprobes with perf_kprobe (function addr) takes 80.010731 seconds
Signed-off-by: Song Liu <songliubraving@fb.com>
Reviewed-by: Josef Bacik <jbacik@fb.com>
Reviewed-by: Philippe Ombredanne <pombredanne@nexb.com>
---
samples/bpf/Makefile | 3 +
samples/bpf/bpf_load.c | 5 +-
samples/bpf/bpf_load.h | 4 +
samples/bpf/test_many_kprobe_user.c | 186 ++++++++++++++++++++++++++++++++++++
4 files changed, 195 insertions(+), 3 deletions(-)
create mode 100644 samples/bpf/test_many_kprobe_user.c
diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 9b4a66e..ec92f35 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -42,6 +42,7 @@ hostprogs-y += xdp_redirect
hostprogs-y += xdp_redirect_map
hostprogs-y += xdp_monitor
hostprogs-y += syscall_tp
+hostprogs-y += test_many_kprobe
# Libbpf dependencies
LIBBPF := ../../tools/lib/bpf/bpf.o
@@ -87,6 +88,7 @@ xdp_redirect-objs := bpf_load.o $(LIBBPF) xdp_redirect_user.o
xdp_redirect_map-objs := bpf_load.o $(LIBBPF) xdp_redirect_map_user.o
xdp_monitor-objs := bpf_load.o $(LIBBPF) xdp_monitor_user.o
syscall_tp-objs := bpf_load.o $(LIBBPF) syscall_tp_user.o
+test_many_kprobe-objs := bpf_load.o $(LIBBPF) test_many_kprobe_user.o
# Tell kbuild to always build the programs
always := $(hostprogs-y)
@@ -172,6 +174,7 @@ HOSTLOADLIBES_xdp_redirect += -lelf
HOSTLOADLIBES_xdp_redirect_map += -lelf
HOSTLOADLIBES_xdp_monitor += -lelf
HOSTLOADLIBES_syscall_tp += -lelf
+HOSTLOADLIBES_test_many_kprobe += -lelf
# Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline:
# make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 86e3818..49f5be5 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -666,9 +666,8 @@ void read_trace_pipe(void)
}
}
-#define MAX_SYMS 300000
-static struct ksym syms[MAX_SYMS];
-static int sym_cnt;
+struct ksym syms[MAX_SYMS];
+int sym_cnt;
static int ksym_cmp(const void *p1, const void *p2)
{
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 95d6be5..6c9d584 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -69,6 +69,10 @@ static inline __u64 ptr_to_u64(const void *ptr)
return (__u64) (unsigned long) ptr;
}
+#define MAX_SYMS 300000
+extern struct ksym syms[MAX_SYMS];
+extern int sym_cnt;
+
int load_kallsyms(void);
struct ksym *ksym_search(long key);
int set_link_xdp_fd(int ifindex, int fd, __u32 flags);
diff --git a/samples/bpf/test_many_kprobe_user.c b/samples/bpf/test_many_kprobe_user.c
new file mode 100644
index 0000000..6c111cf
--- /dev/null
+++ b/samples/bpf/test_many_kprobe_user.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <string.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <linux/version.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <time.h>
+#include "libbpf.h"
+#include "bpf_load.h"
+#include "perf-sys.h"
+
+#define MAX_KPROBES 1000
+
+#define DEBUGFS "/sys/kernel/debug/tracing/"
+
+int kprobes[MAX_KPROBES] = {0};
+int kprobe_count;
+int perf_event_fds[MAX_KPROBES];
+const char license[] = "GPL";
+
+static __u64 time_get_ns(void)
+{
+ struct timespec ts;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts);
+ return ts.tv_sec * 1000000000ull + ts.tv_nsec;
+}
+
+static int kprobe_api(char *func, void *addr, bool use_new_api)
+{
+ int efd;
+ struct perf_event_attr attr = {};
+ char buf[256];
+ int err, id;
+
+ attr.sample_type = PERF_SAMPLE_RAW;
+ attr.sample_period = 1;
+ attr.wakeup_events = 1;
+
+ if (use_new_api) {
+ attr.type = perf_kprobe_type;
+ if (func) {
+ attr.kprobe_func = ptr_to_u64(func);
+ attr.probe_offset = 0;
+ } else {
+ attr.kprobe_func = 0;
+ attr.kprobe_addr = ptr_to_u64(addr);
+ }
+ } else {
+ attr.type = PERF_TYPE_TRACEPOINT;
+ snprintf(buf, sizeof(buf),
+ "echo 'p:%s %s' >> /sys/kernel/debug/tracing/kprobe_events",
+ func, func);
+ err = system(buf);
+ if (err < 0) {
+ printf("failed to create kprobe '%s' error '%s'\n",
+ func, strerror(errno));
+ return -1;
+ }
+
+ strcpy(buf, DEBUGFS);
+ strcat(buf, "events/kprobes/");
+ strcat(buf, func);
+ strcat(buf, "/id");
+ efd = open(buf, O_RDONLY, 0);
+ if (efd < 0) {
+ printf("failed to open event %s\n", func);
+ return -1;
+ }
+
+ err = read(efd, buf, sizeof(buf));
+ if (err < 0 || err >= sizeof(buf)) {
+ printf("read from '%s' failed '%s'\n", func,
+ strerror(errno));
+ return -1;
+ }
+
+ close(efd);
+ buf[err] = 0;
+ id = atoi(buf);
+ attr.config = id;
+ }
+
+ attr.size = sizeof(attr);
+ efd = sys_perf_event_open(&attr, -1/*pid*/, 0/*cpu*/,
+ -1/*group_fd*/, 0);
+
+ return efd;
+}
+
+static int select_kprobes(void)
+{
+ int fd;
+ int i;
+
+ load_kallsyms();
+
+ kprobe_count = 0;
+ for (i = 0; i < sym_cnt; i++) {
+ if (strstr(syms[i].name, "."))
+ continue;
+ fd = kprobe_api(syms[i].name, NULL, false);
+ if (fd < 0)
+ continue;
+ close(fd);
+ kprobes[kprobe_count] = i;
+ if (++kprobe_count >= MAX_KPROBES)
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int i;
+ __u64 start_time;
+
+ select_kprobes();
+
+ /* clean all trace_kprobe */
+ i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+
+ /* test text based API */
+ start_time = time_get_ns();
+ for (i = 0; i < kprobe_count; i++)
+ perf_event_fds[i] = kprobe_api(syms[kprobes[i]].name,
+ NULL, false);
+ printf("Creating %d kprobes with text-based API takes %f seconds\n",
+ kprobe_count, (time_get_ns() - start_time) / 1000000000.0);
+
+ start_time = time_get_ns();
+ for (i = 0; i < kprobe_count; i++)
+ if (perf_event_fds[i] > 0)
+ close(perf_event_fds[i]);
+ i = system("echo \"\" > /sys/kernel/debug/tracing/kprobe_events");
+ printf("Cleaning %d kprobes with text-based API takes %f seconds\n",
+ kprobe_count, (time_get_ns() - start_time) / 1000000000.0);
+
+ get_perf_kprobe_type_id();
+ if (perf_kprobe_type == -1) {
+ printf("The kernel does support perf_kprobe.\n"
+ "Existing...\n");
+ return 0;
+ }
+
+ /* test perf_kprobe API, with function names */
+ start_time = time_get_ns();
+ for (i = 0; i < kprobe_count; i++)
+ perf_event_fds[i] = kprobe_api(syms[kprobes[i]].name,
+ NULL, true);
+ printf("Creating %d kprobes with perf_kprobe (function name) takes %f seconds\n",
+ kprobe_count, (time_get_ns() - start_time) / 1000000000.0);
+
+ start_time = time_get_ns();
+ for (i = 0; i < kprobe_count; i++)
+ if (perf_event_fds[i] > 0)
+ close(perf_event_fds[i]);
+ printf("Cleaning %d kprobes with perf_kprobe (function name) takes %f seconds\n",
+ kprobe_count, (time_get_ns() - start_time) / 1000000000.0);
+
+ /* test perf_kprobe API, with function address */
+ start_time = time_get_ns();
+ for (i = 0; i < kprobe_count; i++)
+ perf_event_fds[i] = kprobe_api(
+ NULL, (void *)(syms[kprobes[i]].addr), true);
+ printf("Creating %d kprobes with perf_kprobe (function addr) takes %f seconds\n",
+ kprobe_count, (time_get_ns() - start_time) / 1000000000.0);
+
+ start_time = time_get_ns();
+ for (i = 0; i < kprobe_count; i++)
+ if (perf_event_fds[i] > 0)
+ close(perf_event_fds[i]);
+ printf("Cleaning %d kprobes with perf_kprobe (function addr) takes %f seconds\n",
+ kprobe_count, (time_get_ns() - start_time) / 1000000000.0);
+ return 0;
+}
--
2.9.5
^ permalink raw reply related [flat|nested] 20+ messages in thread* Re: [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
` (7 preceding siblings ...)
2017-12-06 22:45 ` [PATCH v5 6/6] bpf: add new test test_many_kprobe Song Liu
@ 2017-12-07 10:23 ` Philippe Ombredanne
2017-12-08 19:57 ` Daniel Borkmann
2018-04-10 4:45 ` Ravi Bangoria
10 siblings, 0 replies; 20+ messages in thread
From: Philippe Ombredanne @ 2017-12-07 10:23 UTC (permalink / raw)
To: Song Liu
Cc: peterz, Steven Rostedt, Ingo Molnar, David S. Miller, netdev,
LKML, daniel, kernel-team
Song,
On Wed, Dec 6, 2017 at 11:45 PM, Song Liu <songliubraving@fb.com> wrote:
> Changes PATCH v4 to PATCH v5:
> Remove PERF_PROBE_CONFIG_IS_RETPROBE from uapi, use PMU_FORMAT_ATTR
> instead.
>
> Changes PATCH v3 to PATCH v4:
> Remove uapi define MAX_PROBE_FUNC_NAME_LEN, use KSYM_NAME_LEN instead.
> Add flag PERF_PROBE_CONFIG_IS_RETPROBE for config field of [k,u]probe.
> Optimize ifdef's of CONFIG_KPROBE_EVENTS and CONFIG_UPROBE_EVENTS.
> Optimize checks in perf_event_is_tracing().
> Optimize perf_tp_register().
>
> Changes PATCH v2 to PATCH v3:
> Remove fixed type PERF_TYPE_KPROBE and PERF_TYPE_UPROBE, use dynamic
> type instead.
> Update userspace (samples/bpf, bcc) to look up type from sysfs.
> Change License info in test_many_kprobe_user.c as Philippe Ombredanne
> suggested.
>
> Changes PATCH v1 to PATCH v2:
> Split PERF_TYPE_PROBE into PERF_TYPE_KPROBE and PERF_TYPE_UPROBE.
> Split perf_probe into perf_kprobe and perf_uprobe.
> Remove struct probe_desc, use config1 and config2 instead.
>
> Changes RFC v2 to PATCH v1:
> Check type PERF_TYPE_PROBE in perf_event_set_filter().
> Rebase on to tip perf/core.
>
> Changes RFC v1 to RFC v2:
> Fix build issue reported by kbuild test bot by adding ifdef of
> CONFIG_KPROBE_EVENTS, and CONFIG_UPROBE_EVENTS.
>
> RFC v1 cover letter:
>
> This is to follow up the discussion over "new kprobe api" at Linux
> Plumbers 2017:
>
> https://www.linuxplumbersconf.org/2017/ocw/proposals/4808
>
> With current kernel, user space tools can only create/destroy [k,u]probes
> with a text-based API (kprobe_events and uprobe_events in tracefs). This
> approach relies on user space to clean up the [k,u]probe after using them.
> However, this is not easy for user space to clean up properly.
>
> To solve this problem, we introduce a file descriptor based API.
> Specifically, we extended perf_event_open to create [k,u]probe, and attach
> this [k,u]probe to the file descriptor created by perf_event_open. These
> [k,u]probe are associated with this file descriptor, so they are not
> available in tracefs.
>
> We reuse large portion of existing trace_kprobe and trace_uprobe code.
> Currently, the file descriptor API does not support arguments as the
> text-based API does. This should not be a problem, as user of the file
> decriptor based API read data through other methods (bpf, etc.).
>
> I also include a patch to to bcc, and a patch to man-page perf_even_open.
> Please see the list below. A fork of bcc with this patch is also available
> on github:
>
> https://github.com/liu-song-6/bcc/tree/perf_event_open
>
> Thanks,
> Song
>
> man-pages patch:
> perf_event_open.2: add type kprobe and uprobe
>
> bcc patch:
> bcc: Try use new API to create [k,u]probe with perf_event_open
>
> kernel patches:
>
> Song Liu (6):
> perf: prepare perf_event.h for new types perf_kprobe and perf_uprobe
> perf: copy new perf_event.h to tools/include/uapi
> perf: implement pmu perf_kprobe
> perf: implement pmu perf_uprobe
> bpf: add option for bpf_load.c to use perf_kprobe
> bpf: add new test test_many_kprobe
>
> include/linux/trace_events.h | 8 ++
> include/uapi/linux/perf_event.h | 4 +
> kernel/events/core.c | 131 +++++++++++++++++++++++-
> kernel/trace/trace_event_perf.c | 102 +++++++++++++++++++
> kernel/trace/trace_kprobe.c | 91 +++++++++++++++--
> kernel/trace/trace_probe.h | 11 ++
> kernel/trace/trace_uprobe.c | 86 ++++++++++++++--
> samples/bpf/Makefile | 3 +
> samples/bpf/bpf_load.c | 66 ++++++++++--
> samples/bpf/bpf_load.h | 14 +++
> samples/bpf/test_many_kprobe_user.c | 186 ++++++++++++++++++++++++++++++++++
> tools/include/uapi/linux/perf_event.h | 4 +
> 12 files changed, 677 insertions(+), 29 deletions(-)
> create mode 100644 samples/bpf/test_many_kprobe_user.c
>
> --
> 2.9.5
Thank you for using the SPDX ids!
For this:
Acked-by: Philippe Ombredanne <pombredanne@nexb.com>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
` (8 preceding siblings ...)
2017-12-07 10:23 ` [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Philippe Ombredanne
@ 2017-12-08 19:57 ` Daniel Borkmann
2017-12-19 17:24 ` Song Liu
2018-04-10 4:45 ` Ravi Bangoria
10 siblings, 1 reply; 20+ messages in thread
From: Daniel Borkmann @ 2017-12-08 19:57 UTC (permalink / raw)
To: Song Liu, peterz, rostedt, mingo, davem, netdev, linux-kernel; +Cc: kernel-team
On 12/06/2017 11:45 PM, Song Liu wrote:
> Changes PATCH v4 to PATCH v5:
> Remove PERF_PROBE_CONFIG_IS_RETPROBE from uapi, use PMU_FORMAT_ATTR
> instead.
>
> Changes PATCH v3 to PATCH v4:
> Remove uapi define MAX_PROBE_FUNC_NAME_LEN, use KSYM_NAME_LEN instead.
> Add flag PERF_PROBE_CONFIG_IS_RETPROBE for config field of [k,u]probe.
> Optimize ifdef's of CONFIG_KPROBE_EVENTS and CONFIG_UPROBE_EVENTS.
> Optimize checks in perf_event_is_tracing().
> Optimize perf_tp_register().
>
> Changes PATCH v2 to PATCH v3:
> Remove fixed type PERF_TYPE_KPROBE and PERF_TYPE_UPROBE, use dynamic
> type instead.
> Update userspace (samples/bpf, bcc) to look up type from sysfs.
> Change License info in test_many_kprobe_user.c as Philippe Ombredanne
> suggested.
>
> Changes PATCH v1 to PATCH v2:
> Split PERF_TYPE_PROBE into PERF_TYPE_KPROBE and PERF_TYPE_UPROBE.
> Split perf_probe into perf_kprobe and perf_uprobe.
> Remove struct probe_desc, use config1 and config2 instead.
>
> Changes RFC v2 to PATCH v1:
> Check type PERF_TYPE_PROBE in perf_event_set_filter().
> Rebase on to tip perf/core.
>
> Changes RFC v1 to RFC v2:
> Fix build issue reported by kbuild test bot by adding ifdef of
> CONFIG_KPROBE_EVENTS, and CONFIG_UPROBE_EVENTS.
>
> RFC v1 cover letter:
>
> This is to follow up the discussion over "new kprobe api" at Linux
> Plumbers 2017:
>
> https://www.linuxplumbersconf.org/2017/ocw/proposals/4808
>
> With current kernel, user space tools can only create/destroy [k,u]probes
> with a text-based API (kprobe_events and uprobe_events in tracefs). This
> approach relies on user space to clean up the [k,u]probe after using them.
> However, this is not easy for user space to clean up properly.
>
> To solve this problem, we introduce a file descriptor based API.
> Specifically, we extended perf_event_open to create [k,u]probe, and attach
> this [k,u]probe to the file descriptor created by perf_event_open. These
> [k,u]probe are associated with this file descriptor, so they are not
> available in tracefs.
>
> We reuse large portion of existing trace_kprobe and trace_uprobe code.
> Currently, the file descriptor API does not support arguments as the
> text-based API does. This should not be a problem, as user of the file
> decriptor based API read data through other methods (bpf, etc.).
>
> I also include a patch to to bcc, and a patch to man-page perf_even_open.
> Please see the list below. A fork of bcc with this patch is also available
> on github:
>
> https://github.com/liu-song-6/bcc/tree/perf_event_open
Peter / Stephen, I presume this will be routed through one of you, if
not please yell.
Thanks,
Daniel
> man-pages patch:
> perf_event_open.2: add type kprobe and uprobe
>
> bcc patch:
> bcc: Try use new API to create [k,u]probe with perf_event_open
>
> kernel patches:
>
> Song Liu (6):
> perf: prepare perf_event.h for new types perf_kprobe and perf_uprobe
> perf: copy new perf_event.h to tools/include/uapi
> perf: implement pmu perf_kprobe
> perf: implement pmu perf_uprobe
> bpf: add option for bpf_load.c to use perf_kprobe
> bpf: add new test test_many_kprobe
>
> include/linux/trace_events.h | 8 ++
> include/uapi/linux/perf_event.h | 4 +
> kernel/events/core.c | 131 +++++++++++++++++++++++-
> kernel/trace/trace_event_perf.c | 102 +++++++++++++++++++
> kernel/trace/trace_kprobe.c | 91 +++++++++++++++--
> kernel/trace/trace_probe.h | 11 ++
> kernel/trace/trace_uprobe.c | 86 ++++++++++++++--
> samples/bpf/Makefile | 3 +
> samples/bpf/bpf_load.c | 66 ++++++++++--
> samples/bpf/bpf_load.h | 14 +++
> samples/bpf/test_many_kprobe_user.c | 186 ++++++++++++++++++++++++++++++++++
> tools/include/uapi/linux/perf_event.h | 4 +
> 12 files changed, 677 insertions(+), 29 deletions(-)
> create mode 100644 samples/bpf/test_many_kprobe_user.c
>
> --
> 2.9.5
>
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open
2017-12-08 19:57 ` Daniel Borkmann
@ 2017-12-19 17:24 ` Song Liu
2017-12-19 20:25 ` Peter Zijlstra
0 siblings, 1 reply; 20+ messages in thread
From: Song Liu @ 2017-12-19 17:24 UTC (permalink / raw)
To: Daniel Borkmann
Cc: Peter Zijlstra, Steven Rostedt, mingo@redhat.com, David Miller,
netdev@vger.kernel.org, linux-kernel@vger.kernel.org, Kernel Team
> On Dec 8, 2017, at 11:57 AM, Daniel Borkmann <daniel@iogearbox.net> wrote:
>
> On 12/06/2017 11:45 PM, Song Liu wrote:
>> Changes PATCH v4 to PATCH v5:
>> Remove PERF_PROBE_CONFIG_IS_RETPROBE from uapi, use PMU_FORMAT_ATTR
>> instead.
>>
>> Changes PATCH v3 to PATCH v4:
>> Remove uapi define MAX_PROBE_FUNC_NAME_LEN, use KSYM_NAME_LEN instead.
>> Add flag PERF_PROBE_CONFIG_IS_RETPROBE for config field of [k,u]probe.
>> Optimize ifdef's of CONFIG_KPROBE_EVENTS and CONFIG_UPROBE_EVENTS.
>> Optimize checks in perf_event_is_tracing().
>> Optimize perf_tp_register().
>>
>> Changes PATCH v2 to PATCH v3:
>> Remove fixed type PERF_TYPE_KPROBE and PERF_TYPE_UPROBE, use dynamic
>> type instead.
>> Update userspace (samples/bpf, bcc) to look up type from sysfs.
>> Change License info in test_many_kprobe_user.c as Philippe Ombredanne
>> suggested.
>>
>> Changes PATCH v1 to PATCH v2:
>> Split PERF_TYPE_PROBE into PERF_TYPE_KPROBE and PERF_TYPE_UPROBE.
>> Split perf_probe into perf_kprobe and perf_uprobe.
>> Remove struct probe_desc, use config1 and config2 instead.
>>
>> Changes RFC v2 to PATCH v1:
>> Check type PERF_TYPE_PROBE in perf_event_set_filter().
>> Rebase on to tip perf/core.
>>
>> Changes RFC v1 to RFC v2:
>> Fix build issue reported by kbuild test bot by adding ifdef of
>> CONFIG_KPROBE_EVENTS, and CONFIG_UPROBE_EVENTS.
>>
>> RFC v1 cover letter:
>>
>> This is to follow up the discussion over "new kprobe api" at Linux
>> Plumbers 2017:
>>
>> https://urldefense.proofpoint.com/v2/url?u=https-3A__www.linuxplumbersconf.org_2017_ocw_proposals_4808&d=DwICaQ&c=5VD0RTtNlTh3ycd41b3MUw&r=dR8692q0_uaizy0jkrBJQM5k2hfm4CiFxYT8KaysFrg&m=iAtBi3jXPYfFgvhHTsv8oWyUKiyEwnGZ34-4jniREz4&s=87sxRxoRkJJenQjCtFLJFXkMLvd_rHkY8ZGON9em1Jc&e=
>>
>> With current kernel, user space tools can only create/destroy [k,u]probes
>> with a text-based API (kprobe_events and uprobe_events in tracefs). This
>> approach relies on user space to clean up the [k,u]probe after using them.
>> However, this is not easy for user space to clean up properly.
>>
>> To solve this problem, we introduce a file descriptor based API.
>> Specifically, we extended perf_event_open to create [k,u]probe, and attach
>> this [k,u]probe to the file descriptor created by perf_event_open. These
>> [k,u]probe are associated with this file descriptor, so they are not
>> available in tracefs.
>>
>> We reuse large portion of existing trace_kprobe and trace_uprobe code.
>> Currently, the file descriptor API does not support arguments as the
>> text-based API does. This should not be a problem, as user of the file
>> decriptor based API read data through other methods (bpf, etc.).
>>
>> I also include a patch to to bcc, and a patch to man-page perf_even_open.
>> Please see the list below. A fork of bcc with this patch is also available
>> on github:
>>
>> https://github.com/liu-song-6/bcc/tree/perf_event_open
>
> Peter / Stephen, I presume this will be routed through one of you, if
> not please yell.
>
> Thanks,
> Daniel
>
Dear Peter,
Do you have further feedbacks/comments on these patches? If not, could you
please confirm they are ready for merge?
Thanks and Best Regards,
Song
^ permalink raw reply [flat|nested] 20+ messages in thread
* Re: [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open
2017-12-06 22:45 [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open Song Liu
` (9 preceding siblings ...)
2017-12-08 19:57 ` Daniel Borkmann
@ 2018-04-10 4:45 ` Ravi Bangoria
2018-04-10 4:54 ` Alexei Starovoitov
10 siblings, 1 reply; 20+ messages in thread
From: Ravi Bangoria @ 2018-04-10 4:45 UTC (permalink / raw)
To: Song Liu
Cc: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel,
kernel-team, Oleg Nesterov, Naveen N. Rao
Hi Song,
On 12/07/2017 04:15 AM, Song Liu wrote:
> With current kernel, user space tools can only create/destroy [k,u]probes
> with a text-based API (kprobe_events and uprobe_events in tracefs). This
> approach relies on user space to clean up the [k,u]probe after using them.
> However, this is not easy for user space to clean up properly.
>
> To solve this problem, we introduce a file descriptor based API.
> Specifically, we extended perf_event_open to create [k,u]probe, and attach
> this [k,u]probe to the file descriptor created by perf_event_open. These
> [k,u]probe are associated with this file descriptor, so they are not
> available in tracefs.
Sorry for being late. One simple question..
Will it be good to support k/uprobe arguments with perf_event_open()?
Do you have any plans about that?
Thanks,
Ravi
^ permalink raw reply [flat|nested] 20+ messages in thread* Re: [PATCH v5 0/6] enable creating [k,u]probe with perf_event_open
2018-04-10 4:45 ` Ravi Bangoria
@ 2018-04-10 4:54 ` Alexei Starovoitov
0 siblings, 0 replies; 20+ messages in thread
From: Alexei Starovoitov @ 2018-04-10 4:54 UTC (permalink / raw)
To: Ravi Bangoria, Song Liu
Cc: peterz, rostedt, mingo, davem, netdev, linux-kernel, daniel,
kernel-team, Oleg Nesterov, Naveen N. Rao
On 4/9/18 9:45 PM, Ravi Bangoria wrote:
> Hi Song,
>
> On 12/07/2017 04:15 AM, Song Liu wrote:
>> With current kernel, user space tools can only create/destroy [k,u]probes
>> with a text-based API (kprobe_events and uprobe_events in tracefs). This
>> approach relies on user space to clean up the [k,u]probe after using them.
>> However, this is not easy for user space to clean up properly.
>>
>> To solve this problem, we introduce a file descriptor based API.
>> Specifically, we extended perf_event_open to create [k,u]probe, and attach
>> this [k,u]probe to the file descriptor created by perf_event_open. These
>> [k,u]probe are associated with this file descriptor, so they are not
>> available in tracefs.
>
> Sorry for being late. One simple question..
>
> Will it be good to support k/uprobe arguments with perf_event_open()?
> Do you have any plans about that?
no plans for that. People that use text based interfaces should
probably be using text interfaces consistently.
imo mixing FD-based kprobe api with text is not worth the complexity.
^ permalink raw reply [flat|nested] 20+ messages in thread