From: Alexei Starovoitov <alexei.starovoitov@gmail.com>
To: Kui-Feng Lee <kuifeng@fb.com>
Cc: bpf@vger.kernel.org, ast@kernel.org, daniel@iogearbox.net,
andrii@kernel.org
Subject: Re: [PATCH bpf-next v2 2/4] bpf, x86: Create bpf_trace_run_ctx on the caller thread's stack
Date: Fri, 18 Mar 2022 12:09:17 -0700 [thread overview]
Message-ID: <20220318190917.tecjespuzkegwb2k@ast-mbp> (raw)
In-Reply-To: <20220316004231.1103318-3-kuifeng@fb.com>
On Tue, Mar 15, 2022 at 05:42:29PM -0700, Kui-Feng Lee wrote:
> BPF trampolines will create a bpf_trace_run_ctx on their stacks, and
> set/reset the current bpf_run_ctx whenever calling/returning from a
> bpf_prog.
>
> Signed-off-by: Kui-Feng Lee <kuifeng@fb.com>
> ---
> arch/x86/net/bpf_jit_comp.c | 32 ++++++++++++++++++++++++++++++++
> include/linux/bpf.h | 12 ++++++++----
> kernel/bpf/syscall.c | 4 ++--
> kernel/bpf/trampoline.c | 21 +++++++++++++++++----
> 4 files changed, 59 insertions(+), 10 deletions(-)
>
> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
> index 1228e6e6a420..29775a475513 100644
> --- a/arch/x86/net/bpf_jit_comp.c
> +++ b/arch/x86/net/bpf_jit_comp.c
> @@ -1748,10 +1748,33 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> {
> u8 *prog = *pprog;
> u8 *jmp_insn;
> + int ctx_cookie_off = offsetof(struct bpf_trace_run_ctx, bpf_cookie);
> struct bpf_prog *p = l->prog;
>
> + EMIT1(0x52); /* push rdx */
Why save/restore rdx?
> +
> + /* mov rdi, 0 */
> + emit_mov_imm64(&prog, BPF_REG_1, 0, 0);
> +
> + /* Prepare struct bpf_trace_run_ctx.
> + * sub rsp, sizeof(struct bpf_trace_run_ctx)
> + * mov rax, rsp
> + * mov QWORD PTR [rax + ctx_cookie_off], rdi
> + */
How about the following instead:
sub rsp, sizeof(struct bpf_trace_run_ctx)
mov qword ptr [rsp + ctx_cookie_off], 0
?
> + EMIT4(0x48, 0x83, 0xEC, sizeof(struct bpf_trace_run_ctx));
> + EMIT3(0x48, 0x89, 0xE0);
> + EMIT4(0x48, 0x89, 0x78, ctx_cookie_off);
> +
> + /* mov rdi, rsp */
> + EMIT3(0x48, 0x89, 0xE7);
> + /* mov QWORD PTR [rdi + sizeof(struct bpf_trace_run_ctx)], rax */
> + emit_stx(&prog, BPF_DW, BPF_REG_1, BPF_REG_0, sizeof(struct bpf_trace_run_ctx));
why not to do:
mov qword ptr[rsp + sizeof(struct bpf_trace_run_ctx)], rsp
?
> +
> /* arg1: mov rdi, progs[i] */
> emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
> + /* arg2: mov rsi, rsp (struct bpf_run_ctx *) */
> + EMIT3(0x48, 0x89, 0xE6);
> +
> if (emit_call(&prog,
> p->aux->sleepable ? __bpf_prog_enter_sleepable :
> __bpf_prog_enter, prog))
> @@ -1797,11 +1820,20 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog,
> emit_mov_imm64(&prog, BPF_REG_1, (long) p >> 32, (u32) (long) p);
> /* arg2: mov rsi, rbx <- start time in nsec */
> emit_mov_reg(&prog, true, BPF_REG_2, BPF_REG_6);
> + /* arg3: mov rdx, rsp (struct bpf_run_ctx *) */
> + EMIT3(0x48, 0x89, 0xE2);
> if (emit_call(&prog,
> p->aux->sleepable ? __bpf_prog_exit_sleepable :
> __bpf_prog_exit, prog))
> return -EINVAL;
>
> + /* pop struct bpf_trace_run_ctx
> + * add rsp, sizeof(struct bpf_trace_run_ctx)
> + */
> + EMIT4(0x48, 0x83, 0xC4, sizeof(struct bpf_trace_run_ctx));
the sub rsp; add rsp pair for every prog call will add up.
Could you do sub rsp once at the beginning of trampoline?
And move
mov qword ptr[rsp + sizeof(struct bpf_trace_run_ctx)], rsp
to the beginning as well?
> +
> + EMIT1(0x5A); /* pop rdx */
> +
> *pprog = prog;
> return 0;
> }
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 3dcae8550c21..d20a23953696 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -681,6 +681,8 @@ struct bpf_tramp_links {
> int nr_links;
> };
>
> +struct bpf_trace_run_ctx;
> +
> /* Different use cases for BPF trampoline:
> * 1. replace nop at the function entry (kprobe equivalent)
> * flags = BPF_TRAMP_F_RESTORE_REGS
> @@ -707,10 +709,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *i
> struct bpf_tramp_links *tlinks,
> void *orig_call);
> /* these two functions are called from generated trampoline */
> -u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
> -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
> -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
> -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
> +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx);
> +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_trace_run_ctx *run_ctx);
> +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx);
> +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
> + struct bpf_trace_run_ctx *run_ctx);
> void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
> void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
>
> @@ -1291,6 +1294,7 @@ struct bpf_cg_run_ctx {
> struct bpf_trace_run_ctx {
> struct bpf_run_ctx run_ctx;
> u64 bpf_cookie;
> + struct bpf_run_ctx *saved_run_ctx;
> };
>
> static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index fecfc803785d..a289ef55ea17 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -4793,13 +4793,13 @@ BPF_CALL_3(bpf_sys_bpf, int, cmd, union bpf_attr *, attr, u32, attr_size)
> return -EINVAL;
> }
>
> - if (!__bpf_prog_enter_sleepable(prog)) {
> + if (!__bpf_prog_enter_sleepable(prog, NULL)) {
> /* recursion detected */
> bpf_prog_put(prog);
> return -EBUSY;
> }
> attr->test.retval = bpf_prog_run(prog, (void *) (long) attr->test.ctx_in);
> - __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */);
> + __bpf_prog_exit_sleepable(prog, 0 /* bpf_prog_run does runtime stats */, NULL);
> bpf_prog_put(prog);
> return 0;
> #endif
> diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
> index 54c695d49ec9..0b050aa2f159 100644
> --- a/kernel/bpf/trampoline.c
> +++ b/kernel/bpf/trampoline.c
> @@ -580,9 +580,12 @@ static void notrace inc_misses_counter(struct bpf_prog *prog)
> * [2..MAX_U64] - execute bpf prog and record execution time.
> * This is start time.
> */
> -u64 notrace __bpf_prog_enter(struct bpf_prog *prog)
> +u64 notrace __bpf_prog_enter(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx)
> __acquires(RCU)
> {
> + if (run_ctx)
> + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
We can remove these branches from critical path if we path actual run_ctx
instead of NULL in bpf_sys_bpf, right?
> +
> rcu_read_lock();
> migrate_disable();
> if (unlikely(__this_cpu_inc_return(*(prog->active)) != 1)) {
> @@ -614,17 +617,23 @@ static void notrace update_prog_stats(struct bpf_prog *prog,
> }
> }
>
> -void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start)
> +void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start, struct bpf_trace_run_ctx *run_ctx)
> __releases(RCU)
> {
> + if (run_ctx)
> + bpf_reset_run_ctx(run_ctx->saved_run_ctx);
> +
> update_prog_stats(prog, start);
> __this_cpu_dec(*(prog->active));
> migrate_enable();
> rcu_read_unlock();
> }
>
> -u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
> +u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog, struct bpf_trace_run_ctx *run_ctx)
> {
> + if (run_ctx)
> + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
> +
> rcu_read_lock_trace();
> migrate_disable();
> might_fault();
> @@ -635,8 +644,12 @@ u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog)
> return bpf_prog_start_time();
> }
>
> -void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
> +void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start,
> + struct bpf_trace_run_ctx *run_ctx)
> {
> + if (run_ctx)
> + bpf_reset_run_ctx(run_ctx->saved_run_ctx);
> +
> update_prog_stats(prog, start);
> __this_cpu_dec(*(prog->active));
> migrate_enable();
> --
> 2.30.2
>
--
next prev parent reply other threads:[~2022-03-18 19:09 UTC|newest]
Thread overview: 31+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-03-16 0:42 [PATCH bpf-next v2 0/4] Attach a cookie to a tracing program Kui-Feng Lee
2022-03-16 0:42 ` [PATCH bpf-next v2 1/4] bpf, x86: Generate trampolines from bpf_links Kui-Feng Lee
2022-03-16 0:42 ` [PATCH bpf-next v2 2/4] bpf, x86: Create bpf_trace_run_ctx on the caller thread's stack Kui-Feng Lee
2022-03-18 19:09 ` Alexei Starovoitov [this message]
2022-03-20 9:31 ` Kui-Feng Lee
2022-03-20 20:08 ` Alexei Starovoitov
2022-03-21 19:00 ` Kui-Feng Lee
2022-03-21 23:04 ` Andrii Nakryiko
2022-03-21 23:25 ` Alexei Starovoitov
2022-03-21 23:38 ` Andrii Nakryiko
2022-03-21 23:08 ` Andrii Nakryiko
2022-03-22 15:30 ` Kui-Feng Lee
2022-03-22 21:08 ` Andrii Nakryiko
2022-03-16 0:42 ` [PATCH bpf-next v2 3/4] bpf, x86: Support BPF cookie for fentry/fexit/fmod_ret Kui-Feng Lee
2022-03-18 19:13 ` Alexei Starovoitov
2022-03-21 23:24 ` Andrii Nakryiko
2022-03-21 23:37 ` Andrii Nakryiko
2022-04-12 16:50 ` Kui-Feng Lee
2022-03-22 1:15 ` Alexei Starovoitov
2022-03-22 4:32 ` Andrii Nakryiko
2022-04-06 5:35 ` Kui-Feng Lee
2022-04-06 17:00 ` Andrii Nakryiko
2022-03-21 23:18 ` Andrii Nakryiko
2022-03-22 16:08 ` Kui-Feng Lee
2022-03-22 21:06 ` Andrii Nakryiko
2022-04-06 22:44 ` Kui-Feng Lee
2022-03-16 0:42 ` [PATCH bpf-next v2 4/4] selftest/bpf: The test cses of " Kui-Feng Lee
2022-03-18 19:21 ` Alexei Starovoitov
2022-03-20 8:43 ` Kui-Feng Lee
2022-03-21 23:29 ` Andrii Nakryiko
2022-03-21 23:36 ` Andrii Nakryiko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20220318190917.tecjespuzkegwb2k@ast-mbp \
--to=alexei.starovoitov@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kuifeng@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox