From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: Eduard Zingerman <eddyz87@gmail.com>
Cc: bpf@vger.kernel.org, ast@kernel.org, andrii@kernel.org,
daniel@iogearbox.net, kernel-team@fb.com, song@kernel.org
Subject: Re: [PATCH bpf-next v3 3/5] bpf: Inline calls to bpf_loop when callback is known
Date: Sat, 4 Jun 2022 16:47:07 +0200 [thread overview]
Message-ID: <20220604144707.d7ehrmys5xeijba4@MacBook-Pro-3.local> (raw)
In-Reply-To: <20220603141047.2163170-4-eddyz87@gmail.com>
On Fri, Jun 03, 2022 at 05:10:45PM +0300, Eduard Zingerman wrote:
> +
> + if (fit_for_bpf_loop_inline(aux)) {
> + if (!subprog_updated) {
> + subprog_updated = true;
> + subprogs[cur_subprog].stack_depth += BPF_REG_SIZE * 3;
Instead of doing += and keeping track that the stack was increased
(if multiple bpf_loop happened to be in a function)
may be add subprogs[..].stack_depth_extra variable ?
And unconditionally do subprogs[cur_subprog].stack_depth_extra = BPF_REG_SIZE * 3;
every time bpf_loop is seen?
Then we can do that during inline_bpf_loop().
Also is there a test for multiple bpf_loop in a func?
> + stack_base = -subprogs[cur_subprog].stack_depth;
> + }
> + aux->loop_inline_state.stack_base = stack_base;
> + }
> + if (i == subprog_end - 1) {
> + subprog_updated = false;
> + cur_subprog++;
> + if (cur_subprog < env->subprog_cnt)
> + subprog_end = subprogs[cur_subprog + 1].start;
> + }
> + }
> +
> + env->prog->aux->stack_depth = env->subprog_info[0].stack_depth;
> +}
> +
> +static void update_loop_inline_state(struct bpf_verifier_env *env, u32 subprogno)
> +{
> + struct bpf_loop_inline_state *state = &cur_aux(env)->loop_inline_state;
> + struct bpf_reg_state *regs = cur_regs(env);
> + struct bpf_reg_state *flags_reg = ®s[BPF_REG_4];
> +
> + int flags_is_zero =
> + register_is_const(flags_reg) && flags_reg->var_off.value == 0;
> +
> + if (state->initialized) {
> + state->flags_is_zero &= flags_is_zero;
> + state->callback_is_constant &= state->callback_subprogno == subprogno;
> + } else {
> + state->initialized = 1;
> + state->callback_is_constant = 1;
> + state->flags_is_zero = flags_is_zero;
> + state->callback_subprogno = subprogno;
> + }
> +}
> +
> static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
> int *insn_idx_p)
> {
> @@ -7255,6 +7327,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
> err = check_bpf_snprintf_call(env, regs);
> break;
> case BPF_FUNC_loop:
> + update_loop_inline_state(env, meta.subprogno);
> err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
> set_loop_callback_state);
> break;
> @@ -7661,11 +7734,6 @@ static bool check_reg_sane_offset(struct bpf_verifier_env *env,
> return true;
> }
>
> -static struct bpf_insn_aux_data *cur_aux(struct bpf_verifier_env *env)
> -{
> - return &env->insn_aux_data[env->insn_idx];
> -}
> -
> enum {
> REASON_BOUNDS = -1,
> REASON_TYPE = -2,
> @@ -12920,6 +12988,22 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
> return new_prog;
> }
>
> +static void adjust_loop_inline_subprogno(struct bpf_verifier_env *env,
> + u32 first_removed,
> + u32 first_remaining)
> +{
> + int delta = first_remaining - first_removed;
> +
> + for (int i = 0; i < env->prog->len; ++i) {
> + struct bpf_loop_inline_state *state =
> + &env->insn_aux_data[i].loop_inline_state;
> +
> + if (state->initialized &&
> + state->callback_subprogno >= first_remaining)
> + state->callback_subprogno -= delta;
> + }
> +}
> +
> static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
> u32 off, u32 cnt)
> {
> @@ -12963,6 +13047,8 @@ static int adjust_subprog_starts_after_remove(struct bpf_verifier_env *env,
> * in adjust_btf_func() - no need to adjust
> */
> }
> +
> + adjust_loop_inline_subprogno(env, i, j);
This special case isn't great.
May be let's do inline_bpf_loop() earlier?
Instead of doing it from do_misc_fixups() how about doing it as a separate
pass right after check_max_stack_depth() ?
Then opt_remove_dead_code() will adjust BPF_CALL_REL automatically
as a part of bpf_adj_branches().
> } else {
> /* convert i from "first prog to remove" to "first to adjust" */
> if (env->subprog_info[i].start == off)
> @@ -13773,6 +13859,94 @@ static int fixup_kfunc_call(struct bpf_verifier_env *env,
> return 0;
> }
>
> +static struct bpf_prog *inline_bpf_loop(struct bpf_verifier_env *env,
> + int position, u32 *cnt)
> +{
> + struct bpf_insn_aux_data *aux = &env->insn_aux_data[position];
> + s32 stack_base = aux->loop_inline_state.stack_base;
> + s32 r6_offset = stack_base + 0 * BPF_REG_SIZE;
> + s32 r7_offset = stack_base + 1 * BPF_REG_SIZE;
> + s32 r8_offset = stack_base + 2 * BPF_REG_SIZE;
> + int reg_loop_max = BPF_REG_6;
> + int reg_loop_cnt = BPF_REG_7;
> + int reg_loop_ctx = BPF_REG_8;
> +
> + struct bpf_prog *new_prog;
> + u32 callback_subprogno = aux->loop_inline_state.callback_subprogno;
> + u32 callback_start;
> + u32 call_insn_offset;
> + s32 callback_offset;
> + struct bpf_insn insn_buf[19];
> + struct bpf_insn *next = insn_buf;
> + struct bpf_insn *call, *jump_to_end, *loop_header;
> + struct bpf_insn *jump_to_header, *loop_exit;
> +
> + /* Return error and jump to the end of the patch if
> + * expected number of iterations is too big. This
> + * repeats the check done in bpf_loop helper function,
> + * be careful to modify this code in sync.
> + */
> + (*next++) = BPF_JMP_IMM(BPF_JLE, BPF_REG_1, BPF_MAX_LOOPS, 2);
> + (*next++) = BPF_MOV32_IMM(BPF_REG_0, -E2BIG);
> + jump_to_end = next;
> + (*next++) = BPF_JMP_IMM(BPF_JA, 0, 0, 0 /* set below */);
> + /* spill R6, R7, R8 to use these as loop vars */
> + (*next++) = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, r6_offset);
> + (*next++) = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, r7_offset);
> + (*next++) = BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, r8_offset);
> + /* initialize loop vars */
> + (*next++) = BPF_MOV64_REG(reg_loop_max, BPF_REG_1);
> + (*next++) = BPF_MOV32_IMM(reg_loop_cnt, 0);
> + (*next++) = BPF_MOV64_REG(reg_loop_ctx, BPF_REG_3);
> + /* loop header;
> + * if reg_loop_cnt >= reg_loop_max skip the loop body
> + */
> + loop_header = next;
> + (*next++) = BPF_JMP_REG(BPF_JGE, reg_loop_cnt, reg_loop_max,
> + 0 /* set below */);
> + /* callback call */
> + (*next++) = BPF_MOV64_REG(BPF_REG_1, reg_loop_cnt);
> + (*next++) = BPF_MOV64_REG(BPF_REG_2, reg_loop_ctx);
> + call = next;
> + (*next++) = BPF_CALL_REL(0 /* set below after patching */);
> + /* increment loop counter */
> + (*next++) = BPF_ALU64_IMM(BPF_ADD, reg_loop_cnt, 1);
> + /* jump to loop header if callback returned 0 */
> + jump_to_header = next;
> + (*next++) = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 0 /* set below */);
> + /* return value of bpf_loop;
> + * set R0 to the number of iterations
> + */
> + loop_exit = next;
> + (*next++) = BPF_MOV64_REG(BPF_REG_0, reg_loop_cnt);
> + /* restore original values of R6, R7, R8 */
> + (*next++) = BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, r6_offset);
> + (*next++) = BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_10, r7_offset);
> + (*next++) = BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_10, r8_offset);
> +
> + *cnt = next - insn_buf;
> + if (*cnt > ARRAY_SIZE(insn_buf)) {
> + WARN_ONCE(1, "BUG %s: 'next' exceeds bounds for 'insn_buf'\n",
> + __func__);
> + return NULL;
> + }
> + jump_to_end->off = next - jump_to_end - 1;
> + loop_header->off = loop_exit - loop_header - 1;
> + jump_to_header->off = loop_header - jump_to_header - 1;
> +
> + new_prog = bpf_patch_insn_data(env, position, insn_buf, *cnt);
> + if (!new_prog)
> + return new_prog;
> +
> + /* callback start is known only after patching */
> + callback_start = env->subprog_info[callback_subprogno].start;
> + call_insn_offset = position + (call - insn_buf);
> + callback_offset = callback_start - call_insn_offset - 1;
> + env->prog->insnsi[call_insn_offset].imm = callback_offset;
> +
> + return new_prog;
> +}
> +
> /* Do various post-verification rewrites in a single program pass.
> * These rewrites simplify JIT and interpreter implementations.
> */
> @@ -14258,6 +14432,18 @@ static int do_misc_fixups(struct bpf_verifier_env *env)
> continue;
> }
>
> + if (insn->imm == BPF_FUNC_loop &&
> + fit_for_bpf_loop_inline(&env->insn_aux_data[i + delta])) {
> + new_prog = inline_bpf_loop(env, i + delta, &cnt);
> + if (!new_prog)
> + return -ENOMEM;
> +
> + delta += cnt - 1;
> + env->prog = prog = new_prog;
> + insn = new_prog->insnsi + i + delta;
> + continue;
> + }
> +
> patch_call_imm:
> fn = env->ops->get_func_proto(insn->imm, env->prog);
> /* all functions that have prototype and verifier allowed
> @@ -15030,6 +15216,9 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr)
> if (ret == 0)
> ret = check_max_stack_depth(env);
>
> + if (ret == 0)
> + adjust_stack_depth_for_loop_inlining(env);
With above two suggestions this will become
if (ret == 0)
optimize_bpf_loop(env);
where it will call inline_bpf_loop() and will assign max_depth_extra.
And then one extra loop for_each_subporg() max_depth += max_depth_extra.
wdyt?
next prev parent reply other threads:[~2022-06-04 14:47 UTC|newest]
Thread overview: 20+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-06-03 14:10 [PATCH bpf-next v3 0/5] bpf_loop inlining Eduard Zingerman
2022-06-03 14:10 ` [PATCH bpf-next v3 1/5] selftests/bpf: specify expected instructions in test_verifier tests Eduard Zingerman
2022-06-03 21:31 ` Song Liu
2022-06-03 22:08 ` Eduard Zingerman
2022-06-03 23:01 ` Song Liu
2022-06-04 12:53 ` Eduard Zingerman
2022-06-03 14:10 ` [PATCH bpf-next v3 2/5] selftests/bpf: allow BTF specs and func infos " Eduard Zingerman
2022-06-03 21:41 ` Song Liu
2022-06-03 14:10 ` [PATCH bpf-next v3 3/5] bpf: Inline calls to bpf_loop when callback is known Eduard Zingerman
2022-06-03 22:36 ` Song Liu
2022-06-04 0:06 ` Joanne Koong
2022-06-04 12:51 ` Eduard Zingerman
2022-06-06 18:08 ` Joanne Koong
2022-06-08 9:06 ` Eduard Zingerman
2022-06-04 14:47 ` Alexei Starovoitov [this message]
2022-06-04 15:36 ` Eduard Zingerman
2022-06-03 14:10 ` [PATCH bpf-next v3 4/5] selftests/bpf: BPF test_verifier selftests for bpf_loop inlining Eduard Zingerman
2022-06-03 22:38 ` Song Liu
2022-06-03 14:10 ` [PATCH bpf-next v3 5/5] selftests/bpf: BPF test_prog " Eduard Zingerman
2022-06-03 22:52 ` Song Liu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220604144707.d7ehrmys5xeijba4@MacBook-Pro-3.local \
--to=alexei.starovoitov@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=kernel-team@fb.com \
--cc=song@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox