* [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi
@ 2018-05-02 23:20 Song Liu
2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
0 siblings, 2 replies; 6+ messages in thread
From: Song Liu @ 2018-05-02 23:20 UTC (permalink / raw)
To: netdev; +Cc: Song Liu, kernel-team, qinteng
Changes v1 -> v2:
1. Rename some variables to (hopefully) reduce confusion;
2. Check irq_work status with IRQ_WORK_BUSY (instead of work->sem);
3. In Kconfig, let BPF_SYSCALL select IRQ_WORK;
4. Add static to DEFINE_PER_CPU();
5. Remove pr_info() in stack_map_init().
Song Liu (2):
bpf: enable stackmap with build_id in nmi context
bpf: add selftest for stackmap with build_id in NMI context
init/Kconfig | 1 +
kernel/bpf/stackmap.c | 59 +++++++++++--
tools/testing/selftests/bpf/test_progs.c | 137 +++++++++++++++++++++++++++++
tools/testing/selftests/bpf/urandom_read.c | 10 ++-
4 files changed, 199 insertions(+), 8 deletions(-)
^ permalink raw reply [flat|nested] 6+ messages in thread
* [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context
2018-05-02 23:20 [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
@ 2018-05-02 23:20 ` Song Liu
2018-05-03 7:03 ` Tobin C. Harding
2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
1 sibling, 1 reply; 6+ messages in thread
From: Song Liu @ 2018-05-02 23:20 UTC (permalink / raw)
To: netdev
Cc: Song Liu, kernel-team, qinteng, Alexei Starovoitov,
Daniel Borkmann, Peter Zijlstra
Currently, we cannot parse build_id in nmi context because of
up_read(¤t->mm->mmap_sem), this makes stackmap with build_id
less useful. This patch enables parsing build_id in nmi by putting
the up_read() call in irq_work. To avoid memory allocation in nmi
context, we use per cpu variable for the irq_work. As a result, only
one irq_work per cpu is allowed. If the irq_work is in-use, we
fallback to only report ips.
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
---
init/Kconfig | 1 +
kernel/bpf/stackmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------
2 files changed, 54 insertions(+), 6 deletions(-)
diff --git a/init/Kconfig b/init/Kconfig
index f013afc..480a4f2 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1391,6 +1391,7 @@ config BPF_SYSCALL
bool "Enable bpf() system call"
select ANON_INODES
select BPF
+ select IRQ_WORK
default n
help
Enable the bpf() system call that allows to manipulate eBPF
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index 3ba102b..51d4aea 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -11,6 +11,7 @@
#include <linux/perf_event.h>
#include <linux/elf.h>
#include <linux/pagemap.h>
+#include <linux/irq_work.h>
#include "percpu_freelist.h"
#define STACK_CREATE_FLAG_MASK \
@@ -32,6 +33,23 @@ struct bpf_stack_map {
struct stack_map_bucket *buckets[];
};
+/* irq_work to run up_read() for build_id lookup in nmi context */
+struct stack_map_irq_work {
+ struct irq_work irq_work;
+ struct rw_semaphore *sem;
+};
+
+static void do_up_read(struct irq_work *entry)
+{
+ struct stack_map_irq_work *work = container_of(entry,
+ struct stack_map_irq_work, irq_work);
+
+ up_read(work->sem);
+ work->sem = NULL;
+}
+
+static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
+
static inline bool stack_map_use_build_id(struct bpf_map *map)
{
return (map->map_flags & BPF_F_STACK_BUILD_ID);
@@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
{
int i;
struct vm_area_struct *vma;
+ bool in_nmi_ctx = in_nmi();
+ bool irq_work_busy = false;
+ struct stack_map_irq_work *work;
+
+ if (in_nmi_ctx) {
+ work = this_cpu_ptr(&up_read_work);
+ if (work->irq_work.flags & IRQ_WORK_BUSY)
+ /* cannot queue more up_read, fallback */
+ irq_work_busy = true;
+ }
/*
- * We cannot do up_read() in nmi context, so build_id lookup is
- * only supported for non-nmi events. If at some point, it is
- * possible to run find_vma() without taking the semaphore, we
- * would like to allow build_id lookup in nmi context.
+ * We cannot do up_read() in nmi context. To do build_id lookup
+ * in nmi context, we need to run up_read() in irq_work. We use
+ * a percpu variable to do the irq_work. If the irq_work is
+ * already used by another lookup, we fall back to report ips.
*
* Same fallback is used for kernel stack (!user) on a stackmap
* with build_id.
*/
- if (!user || !current || !current->mm || in_nmi() ||
+ if (!user || !current || !current->mm || irq_work_busy ||
down_read_trylock(¤t->mm->mmap_sem) == 0) {
/* cannot access current->mm, fall back to ips */
for (i = 0; i < trace_nr; i++) {
@@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
- vma->vm_start;
id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
}
- up_read(¤t->mm->mmap_sem);
+
+ if (!in_nmi_ctx)
+ up_read(¤t->mm->mmap_sem);
+ else {
+ work->sem = ¤t->mm->mmap_sem;
+ irq_work_queue(&work->irq_work);
+ }
}
BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
@@ -575,3 +609,16 @@ const struct bpf_map_ops stack_map_ops = {
.map_update_elem = stack_map_update_elem,
.map_delete_elem = stack_map_delete_elem,
};
+
+static int __init stack_map_init(void)
+{
+ int cpu;
+ struct stack_map_irq_work *work;
+
+ for_each_possible_cpu(cpu) {
+ work = per_cpu_ptr(&up_read_work, cpu);
+ init_irq_work(&work->irq_work, do_up_read);
+ }
+ return 0;
+}
+subsys_initcall(stack_map_init);
--
2.9.5
^ permalink raw reply related [flat|nested] 6+ messages in thread
* [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context
2018-05-02 23:20 [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
@ 2018-05-02 23:20 ` Song Liu
2018-05-03 7:19 ` Tobin C. Harding
1 sibling, 1 reply; 6+ messages in thread
From: Song Liu @ 2018-05-02 23:20 UTC (permalink / raw)
To: netdev; +Cc: Song Liu, kernel-team, qinteng
This new test captures stackmap with build_id with hardware event
PERF_COUNT_HW_CPU_CYCLES.
Because we only support one ips-to-build_id lookup per cpu in NMI
context, stack_amap will not be able to do the lookup in this test.
Therefore, we didn't do compare_stack_ips(), as it will alwasy fail.
urandom_read.c is extended to run configurable cycles so that it can be
caught by the perf event.
Signed-off-by: Song Liu <songliubraving@fb.com>
---
tools/testing/selftests/bpf/test_progs.c | 137 +++++++++++++++++++++++++++++
tools/testing/selftests/bpf/urandom_read.c | 10 ++-
2 files changed, 145 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index aa336f0..00bb08c 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -1272,6 +1272,142 @@ static void test_stacktrace_build_id(void)
return;
}
+static void test_stacktrace_build_id_nmi(void)
+{
+ int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ const char *file = "./test_stacktrace_build_id.o";
+ int err, pmu_fd, prog_fd;
+ struct perf_event_attr attr = {
+ .sample_freq = 5000,
+ .freq = 1,
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ };
+ __u32 key, previous_key, val, duration = 0;
+ struct bpf_object *obj;
+ char buf[256];
+ int i, j;
+ struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
+ int build_id_matches = 0;
+
+ err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
+ if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
+ goto out;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (CHECK(pmu_fd < 0, "perf_event_open",
+ "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
+ pmu_fd, errno))
+ goto close_prog;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
+ err, errno))
+ goto close_pmu;
+
+ err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
+ if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ /* find map fds */
+ control_map_fd = bpf_find_map(__func__, obj, "control_map");
+ if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
+ if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
+ if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
+ err, errno))
+ goto disable_pmu;
+
+ stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
+ if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
+ == 0);
+ assert(system("taskset 0x1 ./urandom_read 100000") == 0);
+ /* disable stack trace collection */
+ key = 0;
+ val = 1;
+ bpf_map_update_elem(control_map_fd, &key, &val, 0);
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = extract_build_id(buf, 256);
+
+ if (CHECK(err, "get build_id with readelf",
+ "err %d errno %d\n", err, errno))
+ goto disable_pmu;
+
+ err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ if (CHECK(err, "get_next_key from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+
+ do {
+ char build_id[64];
+
+ err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+ if (CHECK(err, "lookup_elem from stackmap",
+ "err %d, errno %d\n", err, errno))
+ goto disable_pmu;
+ for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
+ if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
+ id_offs[i].offset != 0) {
+ for (j = 0; j < 20; ++j)
+ sprintf(build_id + 2 * j, "%02x",
+ id_offs[i].build_id[j] & 0xff);
+ if (strstr(buf, build_id) != NULL)
+ build_id_matches = 1;
+ }
+ previous_key = key;
+ } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+
+ if (CHECK(build_id_matches < 1, "build id match",
+ "Didn't find expected build ID from the map\n"))
+ goto disable_pmu;
+
+ /*
+ * We intentionally skip compare_stack_ips(). This is because we
+ * only support one in_nmi() ips-to-build_id translation per cpu
+ * at any time, thus stack_amap here will always fallback to
+ * BPF_STACK_BUILD_ID_IP;
+ */
+
+disable_pmu:
+ ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
+
+close_pmu:
+ close(pmu_fd);
+
+close_prog:
+ bpf_object__close(obj);
+
+out:
+ return;
+}
+
#define MAX_CNT_RAWTP 10ull
#define MAX_STACK_RAWTP 100
struct get_stack_trace_t {
@@ -1425,6 +1561,7 @@ int main(void)
test_tp_attach_query();
test_stacktrace_map();
test_stacktrace_build_id();
+ test_stacktrace_build_id_nmi();
test_stacktrace_map_raw_tp();
test_get_stack_raw_tp();
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index 4acfdeb..9de8b7c 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -6,15 +6,21 @@
#include <stdlib.h>
#define BUF_SIZE 256
-int main(void)
+
+int main(int argc, char *argv[])
{
int fd = open("/dev/urandom", O_RDONLY);
int i;
char buf[BUF_SIZE];
+ int count = 4;
if (fd < 0)
return 1;
- for (i = 0; i < 4; ++i)
+
+ if (argc == 2)
+ count = atoi(argv[1]);
+
+ for (i = 0; i < count; ++i)
read(fd, buf, BUF_SIZE);
close(fd);
--
2.9.5
^ permalink raw reply related [flat|nested] 6+ messages in thread
* Re: [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context
2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
@ 2018-05-03 7:03 ` Tobin C. Harding
0 siblings, 0 replies; 6+ messages in thread
From: Tobin C. Harding @ 2018-05-03 7:03 UTC (permalink / raw)
To: Song Liu
Cc: netdev, kernel-team, qinteng, Alexei Starovoitov, Daniel Borkmann,
Peter Zijlstra
On Wed, May 02, 2018 at 04:20:29PM -0700, Song Liu wrote:
> Currently, we cannot parse build_id in nmi context because of
> up_read(¤t->mm->mmap_sem), this makes stackmap with build_id
> less useful. This patch enables parsing build_id in nmi by putting
> the up_read() call in irq_work. To avoid memory allocation in nmi
> context, we use per cpu variable for the irq_work. As a result, only
> one irq_work per cpu is allowed. If the irq_work is in-use, we
> fallback to only report ips.
>
> Cc: Alexei Starovoitov <ast@kernel.org>
> Cc: Daniel Borkmann <daniel@iogearbox.net>
> Cc: Peter Zijlstra <peterz@infradead.org>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
> init/Kconfig | 1 +
> kernel/bpf/stackmap.c | 59 +++++++++++++++++++++++++++++++++++++++++++++------
> 2 files changed, 54 insertions(+), 6 deletions(-)
>
> diff --git a/init/Kconfig b/init/Kconfig
> index f013afc..480a4f2 100644
> --- a/init/Kconfig
> +++ b/init/Kconfig
> @@ -1391,6 +1391,7 @@ config BPF_SYSCALL
> bool "Enable bpf() system call"
> select ANON_INODES
> select BPF
> + select IRQ_WORK
> default n
> help
> Enable the bpf() system call that allows to manipulate eBPF
> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
> index 3ba102b..51d4aea 100644
> --- a/kernel/bpf/stackmap.c
> +++ b/kernel/bpf/stackmap.c
> @@ -11,6 +11,7 @@
> #include <linux/perf_event.h>
> #include <linux/elf.h>
> #include <linux/pagemap.h>
> +#include <linux/irq_work.h>
> #include "percpu_freelist.h"
>
> #define STACK_CREATE_FLAG_MASK \
> @@ -32,6 +33,23 @@ struct bpf_stack_map {
> struct stack_map_bucket *buckets[];
> };
>
> +/* irq_work to run up_read() for build_id lookup in nmi context */
> +struct stack_map_irq_work {
> + struct irq_work irq_work;
> + struct rw_semaphore *sem;
> +};
> +
> +static void do_up_read(struct irq_work *entry)
> +{
> + struct stack_map_irq_work *work = container_of(entry,
> + struct stack_map_irq_work, irq_work);
perhaps:
struct stack_map_irq_work *work;
work = container_of(entry, struct stack_map_irq_work, irq_work);
> + up_read(work->sem);
> + work->sem = NULL;
> +}
> +
> +static DEFINE_PER_CPU(struct stack_map_irq_work, up_read_work);
> +
> static inline bool stack_map_use_build_id(struct bpf_map *map)
> {
> return (map->map_flags & BPF_F_STACK_BUILD_ID);
> @@ -267,17 +285,27 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
> {
> int i;
> struct vm_area_struct *vma;
> + bool in_nmi_ctx = in_nmi();
> + bool irq_work_busy = false;
> + struct stack_map_irq_work *work;
> +
> + if (in_nmi_ctx) {
> + work = this_cpu_ptr(&up_read_work);
> + if (work->irq_work.flags & IRQ_WORK_BUSY)
> + /* cannot queue more up_read, fallback */
> + irq_work_busy = true;
> + }
>
> /*
> - * We cannot do up_read() in nmi context, so build_id lookup is
> - * only supported for non-nmi events. If at some point, it is
> - * possible to run find_vma() without taking the semaphore, we
> - * would like to allow build_id lookup in nmi context.
> + * We cannot do up_read() in nmi context. To do build_id lookup
> + * in nmi context, we need to run up_read() in irq_work. We use
> + * a percpu variable to do the irq_work. If the irq_work is
> + * already used by another lookup, we fall back to report ips.
> *
> * Same fallback is used for kernel stack (!user) on a stackmap
> * with build_id.
> */
> - if (!user || !current || !current->mm || in_nmi() ||
> + if (!user || !current || !current->mm || irq_work_busy ||
> down_read_trylock(¤t->mm->mmap_sem) == 0) {
> /* cannot access current->mm, fall back to ips */
> for (i = 0; i < trace_nr; i++) {
> @@ -299,7 +327,13 @@ static void stack_map_get_build_id_offset(struct bpf_stack_build_id *id_offs,
> - vma->vm_start;
> id_offs[i].status = BPF_STACK_BUILD_ID_VALID;
> }
> - up_read(¤t->mm->mmap_sem);
> +
> + if (!in_nmi_ctx)
> + up_read(¤t->mm->mmap_sem);
> + else {
perhaps:
if (!in_nmi_ctx) {
up_read(¤t->mm->mmap_sem);
} else {
Hope this helps,
Tobin.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context
2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
@ 2018-05-03 7:19 ` Tobin C. Harding
2018-05-04 6:41 ` Song Liu
0 siblings, 1 reply; 6+ messages in thread
From: Tobin C. Harding @ 2018-05-03 7:19 UTC (permalink / raw)
To: Song Liu; +Cc: netdev, kernel-team, qinteng
On Wed, May 02, 2018 at 04:20:30PM -0700, Song Liu wrote:
> This new test captures stackmap with build_id with hardware event
> PERF_COUNT_HW_CPU_CYCLES.
>
> Because we only support one ips-to-build_id lookup per cpu in NMI
> context, stack_amap will not be able to do the lookup in this test.
stack_map ?
> Therefore, we didn't do compare_stack_ips(), as it will alwasy fail.
>
> urandom_read.c is extended to run configurable cycles so that it can be
> caught by the perf event.
>
> Signed-off-by: Song Liu <songliubraving@fb.com>
> ---
> tools/testing/selftests/bpf/test_progs.c | 137 +++++++++++++++++++++++++++++
> tools/testing/selftests/bpf/urandom_read.c | 10 ++-
> 2 files changed, 145 insertions(+), 2 deletions(-)
>
> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
> index aa336f0..00bb08c 100644
> --- a/tools/testing/selftests/bpf/test_progs.c
> +++ b/tools/testing/selftests/bpf/test_progs.c
> @@ -1272,6 +1272,142 @@ static void test_stacktrace_build_id(void)
> return;
> }
>
> +static void test_stacktrace_build_id_nmi(void)
> +{
> + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
> + const char *file = "./test_stacktrace_build_id.o";
> + int err, pmu_fd, prog_fd;
> + struct perf_event_attr attr = {
> + .sample_freq = 5000,
> + .freq = 1,
> + .type = PERF_TYPE_HARDWARE,
> + .config = PERF_COUNT_HW_CPU_CYCLES,
> + };
> + __u32 key, previous_key, val, duration = 0;
> + struct bpf_object *obj;
> + char buf[256];
> + int i, j;
> + struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
> + int build_id_matches = 0;
> +
> + err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
> + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
> + goto out;
perhaps:
return;
> + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
> + 0 /* cpu 0 */, -1 /* group id */,
> + 0 /* flags */);
> + if (CHECK(pmu_fd < 0, "perf_event_open",
> + "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
> + pmu_fd, errno))
> + goto close_prog;
> +
> + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
> + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
> + err, errno))
> + goto close_pmu;
> +
> + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
> + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
> + err, errno))
> + goto disable_pmu;
> +
> + /* find map fds */
> + control_map_fd = bpf_find_map(__func__, obj, "control_map");
> + if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
> + "err %d errno %d\n", err, errno))
> + goto disable_pmu;
> +
> + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
> + if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
> + "err %d errno %d\n", err, errno))
> + goto disable_pmu;
> +
> + stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
> + if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
> + err, errno))
> + goto disable_pmu;
> +
> + stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
> + if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
> + "err %d errno %d\n", err, errno))
> + goto disable_pmu;
> +
> + assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
> + == 0);
> + assert(system("taskset 0x1 ./urandom_read 100000") == 0);
> + /* disable stack trace collection */
> + key = 0;
> + val = 1;
> + bpf_map_update_elem(control_map_fd, &key, &val, 0);
> +
> + /* for every element in stackid_hmap, we can find a corresponding one
> + * in stackmap, and vise versa.
> + */
> + err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
> + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
> + "err %d errno %d\n", err, errno))
> + goto disable_pmu;
> +
> + err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
> + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
> + "err %d errno %d\n", err, errno))
> + goto disable_pmu;
> +
> + err = extract_build_id(buf, 256);
> +
> + if (CHECK(err, "get build_id with readelf",
> + "err %d errno %d\n", err, errno))
> + goto disable_pmu;
> +
> + err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
> + if (CHECK(err, "get_next_key from stackmap",
> + "err %d, errno %d\n", err, errno))
> + goto disable_pmu;
> +
> + do {
> + char build_id[64];
> +
> + err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
> + if (CHECK(err, "lookup_elem from stackmap",
> + "err %d, errno %d\n", err, errno))
> + goto disable_pmu;
> + for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
> + if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
> + id_offs[i].offset != 0) {
> + for (j = 0; j < 20; ++j)
> + sprintf(build_id + 2 * j, "%02x",
> + id_offs[i].build_id[j] & 0xff);
> + if (strstr(buf, build_id) != NULL)
> + build_id_matches = 1;
> + }
> + previous_key = key;
> + } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
> +
> + if (CHECK(build_id_matches < 1, "build id match",
> + "Didn't find expected build ID from the map\n"))
> + goto disable_pmu;
> +
> + /*
> + * We intentionally skip compare_stack_ips(). This is because we
> + * only support one in_nmi() ips-to-build_id translation per cpu
> + * at any time, thus stack_amap here will always fallback to
> + * BPF_STACK_BUILD_ID_IP;
> + */
> +
> +disable_pmu:
> + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
> +
> +close_pmu:
> + close(pmu_fd);
> +
> +close_prog:
> + bpf_object__close(obj);
> +
> +out:
> + return;
> +}
No real need for label 'out' right? We can just return directly and
remove the last three lines of this function.
Hope this helps,
Tobin.
^ permalink raw reply [flat|nested] 6+ messages in thread
* Re: [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context
2018-05-03 7:19 ` Tobin C. Harding
@ 2018-05-04 6:41 ` Song Liu
0 siblings, 0 replies; 6+ messages in thread
From: Song Liu @ 2018-05-04 6:41 UTC (permalink / raw)
To: Tobin C. Harding; +Cc: netdev@vger.kernel.org, Kernel Team, Teng Qin
Thanks Tobin. I will fold these changes in.
> On May 3, 2018, at 12:19 AM, Tobin C. Harding <tobin@apporbit.com> wrote:
>
> On Wed, May 02, 2018 at 04:20:30PM -0700, Song Liu wrote:
>> This new test captures stackmap with build_id with hardware event
>> PERF_COUNT_HW_CPU_CYCLES.
>>
>> Because we only support one ips-to-build_id lookup per cpu in NMI
>> context, stack_amap will not be able to do the lookup in this test.
>
> stack_map ?
This one is stack_amap. There are two maps in the test.
Song
>
>> Therefore, we didn't do compare_stack_ips(), as it will alwasy fail.
>>
>> urandom_read.c is extended to run configurable cycles so that it can be
>> caught by the perf event.
>>
>> Signed-off-by: Song Liu <songliubraving@fb.com>
>> ---
>> tools/testing/selftests/bpf/test_progs.c | 137 +++++++++++++++++++++++++++++
>> tools/testing/selftests/bpf/urandom_read.c | 10 ++-
>> 2 files changed, 145 insertions(+), 2 deletions(-)
>>
>> diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
>> index aa336f0..00bb08c 100644
>> --- a/tools/testing/selftests/bpf/test_progs.c
>> +++ b/tools/testing/selftests/bpf/test_progs.c
>> @@ -1272,6 +1272,142 @@ static void test_stacktrace_build_id(void)
>> return;
>> }
>>
>> +static void test_stacktrace_build_id_nmi(void)
>> +{
>> + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
>> + const char *file = "./test_stacktrace_build_id.o";
>> + int err, pmu_fd, prog_fd;
>> + struct perf_event_attr attr = {
>> + .sample_freq = 5000,
>> + .freq = 1,
>> + .type = PERF_TYPE_HARDWARE,
>> + .config = PERF_COUNT_HW_CPU_CYCLES,
>> + };
>> + __u32 key, previous_key, val, duration = 0;
>> + struct bpf_object *obj;
>> + char buf[256];
>> + int i, j;
>> + struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
>> + int build_id_matches = 0;
>> +
>> + err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd);
>> + if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
>> + goto out;
>
> perhaps:
> return;
>
>> + pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
>> + 0 /* cpu 0 */, -1 /* group id */,
>> + 0 /* flags */);
>> + if (CHECK(pmu_fd < 0, "perf_event_open",
>> + "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n",
>> + pmu_fd, errno))
>> + goto close_prog;
>> +
>> + err = ioctl(pmu_fd, PERF_EVENT_IOC_ENABLE, 0);
>> + if (CHECK(err, "perf_event_ioc_enable", "err %d errno %d\n",
>> + err, errno))
>> + goto close_pmu;
>> +
>> + err = ioctl(pmu_fd, PERF_EVENT_IOC_SET_BPF, prog_fd);
>> + if (CHECK(err, "perf_event_ioc_set_bpf", "err %d errno %d\n",
>> + err, errno))
>> + goto disable_pmu;
>> +
>> + /* find map fds */
>> + control_map_fd = bpf_find_map(__func__, obj, "control_map");
>> + if (CHECK(control_map_fd < 0, "bpf_find_map control_map",
>> + "err %d errno %d\n", err, errno))
>> + goto disable_pmu;
>> +
>> + stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap");
>> + if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap",
>> + "err %d errno %d\n", err, errno))
>> + goto disable_pmu;
>> +
>> + stackmap_fd = bpf_find_map(__func__, obj, "stackmap");
>> + if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n",
>> + err, errno))
>> + goto disable_pmu;
>> +
>> + stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap");
>> + if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap",
>> + "err %d errno %d\n", err, errno))
>> + goto disable_pmu;
>> +
>> + assert(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null")
>> + == 0);
>> + assert(system("taskset 0x1 ./urandom_read 100000") == 0);
>> + /* disable stack trace collection */
>> + key = 0;
>> + val = 1;
>> + bpf_map_update_elem(control_map_fd, &key, &val, 0);
>> +
>> + /* for every element in stackid_hmap, we can find a corresponding one
>> + * in stackmap, and vise versa.
>> + */
>> + err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
>> + if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap",
>> + "err %d errno %d\n", err, errno))
>> + goto disable_pmu;
>> +
>> + err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
>> + if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap",
>> + "err %d errno %d\n", err, errno))
>> + goto disable_pmu;
>> +
>> + err = extract_build_id(buf, 256);
>> +
>> + if (CHECK(err, "get build_id with readelf",
>> + "err %d errno %d\n", err, errno))
>> + goto disable_pmu;
>> +
>> + err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
>> + if (CHECK(err, "get_next_key from stackmap",
>> + "err %d, errno %d\n", err, errno))
>> + goto disable_pmu;
>> +
>> + do {
>> + char build_id[64];
>> +
>> + err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
>> + if (CHECK(err, "lookup_elem from stackmap",
>> + "err %d, errno %d\n", err, errno))
>> + goto disable_pmu;
>> + for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
>> + if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
>> + id_offs[i].offset != 0) {
>> + for (j = 0; j < 20; ++j)
>> + sprintf(build_id + 2 * j, "%02x",
>> + id_offs[i].build_id[j] & 0xff);
>> + if (strstr(buf, build_id) != NULL)
>> + build_id_matches = 1;
>> + }
>> + previous_key = key;
>> + } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
>> +
>> + if (CHECK(build_id_matches < 1, "build id match",
>> + "Didn't find expected build ID from the map\n"))
>> + goto disable_pmu;
>> +
>> + /*
>> + * We intentionally skip compare_stack_ips(). This is because we
>> + * only support one in_nmi() ips-to-build_id translation per cpu
>> + * at any time, thus stack_amap here will always fallback to
>> + * BPF_STACK_BUILD_ID_IP;
>> + */
>> +
>> +disable_pmu:
>> + ioctl(pmu_fd, PERF_EVENT_IOC_DISABLE);
>> +
>> +close_pmu:
>> + close(pmu_fd);
>> +
>> +close_prog:
>> + bpf_object__close(obj);
>> +
>> +out:
>> + return;
>> +}
>
> No real need for label 'out' right? We can just return directly and
> remove the last three lines of this function.
>
> Hope this helps,
> Tobin.
^ permalink raw reply [flat|nested] 6+ messages in thread
end of thread, other threads:[~2018-05-04 6:41 UTC | newest]
Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2018-05-02 23:20 [PATCH v2 bpf-next 0/2] bpf: enable stackmap with build_id in nmi Song Liu
2018-05-02 23:20 ` [PATCH v2 bpf-next 1/2] bpf: enable stackmap with build_id in nmi context Song Liu
2018-05-03 7:03 ` Tobin C. Harding
2018-05-02 23:20 ` [PATCH v2 bpf-next 2/2] bpf: add selftest for stackmap with build_id in NMI context Song Liu
2018-05-03 7:19 ` Tobin C. Harding
2018-05-04 6:41 ` Song Liu
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).