Netdev List
 help / color / mirror / Atom feed
From: Yonghong Song <yhs@fb.com>
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: <ast@fb.com>, <daniel@iogearbox.net>, <netdev@vger.kernel.org>,
	<kernel-team@fb.com>
Subject: Re: [PATCH bpf-next v3 8/9] tools/bpf: add a test for bpf_get_stack with raw tracepoint prog
Date: Sun, 22 Apr 2018 19:57:20 -0700	[thread overview]
Message-ID: <4e7541d9-17b4-a07b-1672-9e20e23cdcb7@fb.com> (raw)
In-Reply-To: <20180423002301.lnhdn6oueweqztvb@ast-mbp>



On 4/22/18 5:23 PM, Alexei Starovoitov wrote:
> On Fri, Apr 20, 2018 at 03:18:41PM -0700, Yonghong Song wrote:
>> The test attached a raw_tracepoint program to sched/sched_switch.
>> It tested to get stack for user space, kernel space and user
>> space with build_id request. It also tested to get user
>> and kernel stack into the same buffer with back-to-back
>> bpf_get_stack helper calls.
>>
>> Whenever the kernel stack is available, the user space
>> application will check to ensure that the kernel function
>> for raw_tracepoint ___bpf_prog_run is part of the stack.
>>
>> Signed-off-by: Yonghong Song <yhs@fb.com>
>> ---
>>   tools/testing/selftests/bpf/Makefile               |   3 +-
>>   tools/testing/selftests/bpf/test_get_stack_rawtp.c | 102 ++++++++++++++++++
>>   tools/testing/selftests/bpf/test_progs.c           | 115 +++++++++++++++++++++
>>   3 files changed, 219 insertions(+), 1 deletion(-)
>>   create mode 100644 tools/testing/selftests/bpf/test_get_stack_rawtp.c
>>
>> diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
>> index 0b72cc7..54e9e74 100644
>> --- a/tools/testing/selftests/bpf/Makefile
>> +++ b/tools/testing/selftests/bpf/Makefile
>> @@ -32,7 +32,7 @@ TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test
>>   	test_l4lb_noinline.o test_xdp_noinline.o test_stacktrace_map.o \
>>   	sample_map_ret0.o test_tcpbpf_kern.o test_stacktrace_build_id.o \
>>   	sockmap_tcp_msg_prog.o connect4_prog.o connect6_prog.o test_adjust_tail.o \
>> -	test_btf_haskv.o test_btf_nokv.o
>> +	test_btf_haskv.o test_btf_nokv.o test_get_stack_rawtp.o
>>   
>>   # Order correspond to 'make run_tests' order
>>   TEST_PROGS := test_kmod.sh \
>> @@ -56,6 +56,7 @@ $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/libbpf.a
>>   $(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
>>   $(OUTPUT)/test_sock: cgroup_helpers.c
>>   $(OUTPUT)/test_sock_addr: cgroup_helpers.c
>> +$(OUTPUT)/test_progs: trace_helpers.c
>>   
>>   .PHONY: force
>>   
>> diff --git a/tools/testing/selftests/bpf/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
>> new file mode 100644
>> index 0000000..ba1dcf9
>> --- /dev/null
>> +++ b/tools/testing/selftests/bpf/test_get_stack_rawtp.c
>> @@ -0,0 +1,102 @@
>> +// SPDX-License-Identifier: GPL-2.0
>> +
>> +#include <linux/bpf.h>
>> +#include "bpf_helpers.h"
>> +
>> +/* Permit pretty deep stack traces */
>> +#define MAX_STACK_RAWTP 100
>> +struct stack_trace_t {
>> +	int pid;
>> +	int kern_stack_size;
>> +	int user_stack_size;
>> +	int user_stack_buildid_size;
>> +	__u64 kern_stack[MAX_STACK_RAWTP];
>> +	__u64 user_stack[MAX_STACK_RAWTP];
>> +	struct bpf_stack_build_id user_stack_buildid[MAX_STACK_RAWTP];
>> +};
>> +
>> +struct bpf_map_def SEC("maps") perfmap = {
>> +	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
>> +	.key_size = sizeof(int),
>> +	.value_size = sizeof(__u32),
>> +	.max_entries = 2,
>> +};
>> +
>> +struct bpf_map_def SEC("maps") stackdata_map = {
>> +	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
>> +	.key_size = sizeof(__u32),
>> +	.value_size = sizeof(struct stack_trace_t),
>> +	.max_entries = 1,
>> +};
>> +
>> +/* Allocate per-cpu space twice the needed. For the code below
>> + *   usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
>> + *   if (usize < 0)
>> + *     return 0;
>> + *   ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
>> + *
>> + * If we have value_size = MAX_STACK_RAWTP * sizeof(__u64),
>> + * verifier will complain that access "raw_data + usize"
>> + * with size "max_len - usize" may be out of bound.
>> + * The maximum "raw_data + usize" is "raw_data + max_len"
>> + * and the maximum "max_len - usize" is "max_len", verifier
>> + * concludes that the maximum buffer access range is
>> + * "raw_data[0...max_len * 2 - 1]" and hence reject the program.
>> + *
>> + * Doubling the to-be-used max buffer size can fix this verifier
>> + * issue and avoid complicated C programming massaging.
>> + * This is an acceptable workaround since there is one entry here.
>> + */
>> +struct bpf_map_def SEC("maps") rawdata_map = {
>> +	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
>> +	.key_size = sizeof(__u32),
>> +	.value_size = MAX_STACK_RAWTP * sizeof(__u64) * 2,
>> +	.max_entries = 1,
>> +};
>> +
>> +SEC("tracepoint/sched/sched_switch")
>> +int bpf_prog1(void *ctx)
>> +{
>> +	int max_len, max_buildid_len, usize, ksize, total_size;
>> +	struct stack_trace_t *data;
>> +	void *raw_data;
>> +	__u32 key = 0;
>> +
>> +	data = bpf_map_lookup_elem(&stackdata_map, &key);
>> +	if (!data)
>> +		return 0;
>> +
>> +	max_len = MAX_STACK_RAWTP * sizeof(__u64);
>> +	max_buildid_len = MAX_STACK_RAWTP * sizeof(struct bpf_stack_build_id);
>> +	data->pid = bpf_get_current_pid_tgid();
>> +	data->kern_stack_size = bpf_get_stack(ctx, data->kern_stack,
>> +					      max_len, 0);
>> +	data->user_stack_size = bpf_get_stack(ctx, data->user_stack, max_len,
>> +					    BPF_F_USER_STACK);
>> +	data->user_stack_buildid_size = bpf_get_stack(
>> +		ctx, data->user_stack_buildid, max_buildid_len,
>> +		BPF_F_USER_STACK | BPF_F_USER_BUILD_ID);
>> +	bpf_perf_event_output(ctx, &perfmap, 0, data, sizeof(*data));
>> +
>> +	/* write both kernel and user stacks to the same buffer */
>> +	raw_data = bpf_map_lookup_elem(&rawdata_map, &key);
>> +	if (!raw_data)
>> +		return 0;
>> +
>> +	usize = bpf_get_stack(ctx, raw_data, max_len, BPF_F_USER_STACK);
>> +	if (usize < 0)
>> +		return 0;
>> +
>> +	ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
>> +	if (ksize < 0)
> 
> may be instead of teaching verifier about ARSH (which doesn't look
> straighforward) such use case can be done as:
> u32 max_len, usize, ksize;
> ksize = bpf_get_stack(ctx, raw_data + usize, max_len - usize, 0);
> if ((int)ksize < 0)

Just tried, it does not work. The compiler generates code like:

47: (b7) r9 = 800
48: (bf) r1 = r6
49: (bf) r2 = r7
50: (b7) r3 = 800
51: (b7) r4 = 256
52: (85) call bpf_get_stack#66
  R0=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R1_w=ctx(id=0,off=0,imm=0) 
R2_w=map_value(id=0,off=0,ks=4,vs=1600,imm=0) R3_w=inv800 R4_w=inv256 
R6=ctx(id=0,off=0,imm=0) R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) 
R9_w=inv800 R10=fp0,call_-1
53: (b7) r1 = 0
54: (bf) r8 = r0
55: (67) r8 <<= 32
56: (c7) r8 s>>= 32
57: (6d) if r1 s> r8 goto pc+27
  R0=inv(id=0,umax_value=800) R1=inv0 R6=ctx(id=0,off=0,imm=0) 
R7=map_value(id=0,off=0,ks=4,vs=1600,imm=0) 
R8=inv(id=0,umax_value=9223372036854775807,var_off=(0x0; 
0x7fffffffffffffff)) R9=inv800 R10=fp0,call_-1
58: (1f) r9 -= r8
59: (bf) r1 = r8
60: (67) r1 <<= 32
61: (77) r1 >>= 32
62: (bf) r2 = r7
63: (0f) r2 += r1
64: (bf) r1 = r6
65: (bf) r3 = r9
66: (b7) r4 = 0
67: (85) call bpf_get_stack#66
R3 min value is negative, either use unsigned or 'var &= const'

Basically, the compiler does lsh/arsh for "int" value to do the 
comparison and then get this value does a lsh/rsh.
So it looks like we still need arsh.

> 
> That's certainly suboptimal and very much non obvious to program
> developers, but at least it can unblock the bpf_get_stack part
> landing and proper ARSH support can be added later?
> Just a thought.
> 
>> +		return 0;
>> +
>> +	total_size = usize + ksize;
>> +	if (total_size > 0 && total_size <= max_len)
>> +		bpf_perf_event_output(ctx, &perfmap, 0, raw_data, total_size);
>> +
>> +	return 0;
>> +}
> 
> the rest of the test looks great. Thank you for adding such exhaustive test.
> 

  reply	other threads:[~2018-04-23  2:57 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-04-20 22:18 [PATCH bpf-next v3 0/9] bpf: add bpf_get_stack helper Yonghong Song
2018-04-20 22:18 ` [PATCH bpf-next v3 1/9] bpf: change prototype for stack_map_get_build_id_offset Yonghong Song
2018-04-20 22:18 ` [PATCH bpf-next v3 2/9] bpf: add bpf_get_stack helper Yonghong Song
2018-04-20 22:18 ` [PATCH bpf-next v3 3/9] bpf/verifier: refine retval R0 state for " Yonghong Song
2018-04-22 23:55   ` Alexei Starovoitov
2018-04-23  2:46     ` Yonghong Song
2018-04-20 22:18 ` [PATCH bpf-next v3 4/9] bpf/verifier: improve register value range tracking with ARSH Yonghong Song
2018-04-23  0:16   ` Alexei Starovoitov
2018-04-23  2:49     ` Yonghong Song
2018-04-23  4:19       ` Alexei Starovoitov
2018-04-23  4:31         ` Yonghong Song
2018-04-23  4:40           ` Alexei Starovoitov
2018-04-23 12:25   ` Edward Cree
2018-04-23 16:19     ` Yonghong Song
2018-04-20 22:18 ` [PATCH bpf-next v3 5/9] tools/bpf: add bpf_get_stack helper to tools headers Yonghong Song
2018-04-20 22:18 ` [PATCH bpf-next v3 6/9] samples/bpf: move common-purpose trace functions to selftests Yonghong Song
2018-04-23  0:17   ` Alexei Starovoitov
2018-04-20 22:18 ` [PATCH bpf-next v3 7/9] tools/bpf: add a verifier test case for bpf_get_stack helper and ARSH Yonghong Song
2018-04-20 22:18 ` [PATCH bpf-next v3 8/9] tools/bpf: add a test for bpf_get_stack with raw tracepoint prog Yonghong Song
2018-04-23  0:23   ` Alexei Starovoitov
2018-04-23  2:57     ` Yonghong Song [this message]
2018-04-20 22:18 ` [PATCH bpf-next v3 9/9] tools/bpf: add a test for bpf_get_stack with " Yonghong Song
2018-04-23  0:27   ` Alexei Starovoitov
2018-04-23  2:58     ` Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=4e7541d9-17b4-a07b-1672-9e20e23cdcb7@fb.com \
    --to=yhs@fb.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=ast@fb.com \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox