From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yonghong Song Subject: Re: [PATCH bpf-next v3 8/9] tools/bpf: add a test for bpf_get_stack with raw tracepoint prog Date: Sun, 22 Apr 2018 19:57:20 -0700 Message-ID: <4e7541d9-17b4-a07b-1672-9e20e23cdcb7@fb.com> References: <20180420221842.742330-1-yhs@fb.com> <20180420221842.742330-9-yhs@fb.com> <20180423002301.lnhdn6oueweqztvb@ast-mbp> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8"; format=flowed Content-Transfer-Encoding: 7bit Cc: , , , To: Alexei Starovoitov Return-path: Received: from mx0a-00082601.pphosted.com ([67.231.145.42]:41652 "EHLO mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1753848AbeDWC5u (ORCPT ); Sun, 22 Apr 2018 22:57:50 -0400 In-Reply-To: <20180423002301.lnhdn6oueweqztvb@ast-mbp> Content-Language: en-US Sender: netdev-owner@vger.kernel.org List-ID: On 4/22/18 5:23 PM, Alexei Starovoitov wrote: > On Fri, Apr 20, 2018 at 03:18:41PM -0700, Yonghong Song wrote: >> The test attached a raw_tracepoint program to sched/sched_switch. >> It tested to get stack for user space, kernel space and user >> space with build_id request. It also tested to get user >> and kernel stack into the same buffer with back-to-back >> bpf_get_stack helper calls. >> >> Whenever the kernel stack is available, the user space >> application will check to ensure that the kernel function >> for raw_tracepoint ___bpf_prog_run is part of the stack. >> >> Signed-off-by: Yonghong Song >> --- >> tools/testing/selftests/bpf/Makefile | 3 +- >> tools/testing/selftests/bpf/test_get_stack_rawtp.c | 102 ++++++++++++++++++ >> tools/testing/selftests/bpf/test_progs.c | 115 +++++++++++++++++++++ >> 3 files changed, 219 insertions(+), 1 deletion(-) >> create mode 100644 tools/testing/selftests/bpf/test_get_stack_rawtp.c >> >> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile >> index 0b72cc7..54e9e74 100644 >> --- a/tools/testing/selftests/bpf/Makefile >> +++ b/tools/testing/selftests/bpf/Makefile >> @@ -32,7 +32,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test >> test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \ >> sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \ >> sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \ >> - test_btf_haskv.o test_btf_nokv.o >> + test_btf_haskv.o test_btf_nokv.o test_get_stack_rawtp.o >> >> # Order correspond to 'make run_tests' order >> TEST_PROGS := test_kmod.sh \ >> @@ -56,6 +56,7 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a >> $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c >> $(OUTPUT)/test_sock: cgroup_helpers.c >> $(OUTPUT)/test_sock_addr: cgroup_helpers.c >> +$(OUTPUT)/test_progs: trace_helpers.c >> >> .PHONY: force >> >> diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c >> new file mode 100644 >> index 0000000..ba1dcf9 >> --- /dev/null >> +++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c >> @@ -0,0 +1,102 @@ >> +// SPDX-License-Identifier: GPL-2.0 >> + >> +#include >> +#include "bpf_helpers.h" >> + >> +/* Permit pretty deep stack traces */ >> +#define MAX_STACK_RAWTP 100 >> +struct stack_trace_t { >> + int pid; >> + int kern_stack_size; >> + int user_stack_size; >> + int user_stack_buildid_size; >> + __u64 kern_stack[MAX_STACK_RAWTP]; >> + __u64 user_stack[MAX_STACK_RAWTP]; >> + struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP]; >> +}; >> + >> +struct bpf_map_def SEC("maps") perfmap = { >> + .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, >> + .key_size = sizeof(int), >> + .value_size = sizeof(__u32), >> + .max_entries = 2, >> +}; >> + >> +struct bpf_map_def SEC("maps") stackdata_map = { >> + .type = BPF_MAP_TYPE_PERCPU_ARRAY, >> + .key_size = sizeof(__u32), >> + .value_size = sizeof(struct stack_trace_t), >> + .max_entries = 1, >> +}; >> + >> +/* Allocate per-cpu space twice the needed. For the code below >> + * usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); >> + * if (usize < 0) >> + * return 0; >> + * ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); >> + * >> + * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64), >> + * verifier will complain that access "raw_data + usize" >> + * with size "max_len - usize" may be out of bound. >> + * The maximum "raw_data + usize" is "raw_data + max_len" >> + * and the maximum "max_len - usize" is "max_len", verifier >> + * concludes that the maximum buffer access range is >> + * "raw_data[0...max_len * 2 - 1]" and hence reject the program. >> + * >> + * Doubling the to-be-used max buffer size can fix this verifier >> + * issue and avoid complicated C programming massaging. >> + * This is an acceptable workaround since there is one entry here. >> + */ >> +struct bpf_map_def SEC("maps") rawdata_map = { >> + .type = BPF_MAP_TYPE_PERCPU_ARRAY, >> + .key_size = sizeof(__u32), >> + .value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2, >> + .max_entries = 1, >> +}; >> + >> +SEC("tracepoint/sched/sched_switch") >> +int bpf_prog1(void *ctx) >> +{ >> + int max_len, max_buildid_len, usize, ksize, total_size; >> + struct stack_trace_t *data; >> + void *raw_data; >> + __u32 key = 0; >> + >> + data = bpf_map_lookup_elem(&stackdata_map, &key); >> + if (!data) >> + return 0; >> + >> + max_len = MAX_STACK_RAWTP * sizeof(__u64); >> + max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id); >> + data->pid = bpf_get_current_pid_tgid(); >> + data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack, >> + max_len, 0); >> + data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len, >> + BPF_F_USER_STACK); >> + data->user_stack_buildid_size = bpf_get_stack( >> + ctx, data->user_stack_buildid, max_buildid_len, >> + BPF_F_USER_STACK | BPF_F_USER_BUILD_ID); >> + bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data)); >> + >> + /* write both kernel and user stacks to the same buffer */ >> + raw_data = bpf_map_lookup_elem(&rawdata_map, &key); >> + if (!raw_data) >> + return 0; >> + >> + usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK); >> + if (usize < 0) >> + return 0; >> + >> + ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); >> + if (ksize < 0) > > may be instead of teaching verifier about ARSH (which doesn't look > straighforward) such use case can be done as: > u32 max_len, usize, ksize; > ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0); > if ((int)ksize < 0) Just tried, it does not work. The compiler generates code like: 47: (b7) r9 = 800 48: (bf) r1 = r6 49: (bf) r2 = r7 50: (b7) r3 = 800 51: (b7) r4 = 256 52: (85) call bpf_get_stack#66 R0=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R1_w=ctx(id=0,off=0,imm=0) R2_w=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R3_w=inv800 R4_w=inv256 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R9_w=inv800 R10=fp0,call_-1 53: (b7) r1 = 0 54: (bf) r8 = r0 55: (67) r8 <<= 32 56: (c7) r8 s>>= 32 57: (6d) if r1 s> r8 goto pc+27 R0=inv(id=0,umax_value=800) R1=inv0 R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R8=inv(id=0,umax_value=9223372036854775807,var_off=(0x0; 0x7fffffffffffffff)) R9=inv800 R10=fp0,call_-1 58: (1f) r9 -= r8 59: (bf) r1 = r8 60: (67) r1 <<= 32 61: (77) r1 >>= 32 62: (bf) r2 = r7 63: (0f) r2 += r1 64: (bf) r1 = r6 65: (bf) r3 = r9 66: (b7) r4 = 0 67: (85) call bpf_get_stack#66 R3 min value is negative, either use unsigned or 'var &= const' Basically, the compiler does lsh/arsh for "int" value to do the comparison and then get this value does a lsh/rsh. So it looks like we still need arsh. > > That's certainly suboptimal and very much non obvious to program > developers, but at least it can unblock the bpf_get_stack part > landing and proper ARSH support can be added later? > Just a thought. > >> + return 0; >> + >> + total_size = usize + ksize; >> + if (total_size > 0 && total_size <= max_len) >> + bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size); >> + >> + return 0; >> +} > > the rest of the test looks great. Thank you for adding such exhaustive test. >