BPF List
 help / color / mirror / Atom feed
From: Yonghong Song <yonghong.song@linux.dev>
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: bpf <bpf@vger.kernel.org>, Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Kernel Team <kernel-team@fb.com>,
	Martin KaFai Lau <martin.lau@kernel.org>
Subject: Re: [PATCH bpf-next v3 4/5] bpf, x86: Add jit support for private stack
Date: Mon, 30 Sep 2024 09:33:43 -0700	[thread overview]
Message-ID: <06f3db51-6bc7-4318-9b15-ae694cca7aad@linux.dev> (raw)
In-Reply-To: <CAADnVQ+v3u=9PEHQ0xJEf6wSRc2iR928Sc+6CULh390i3TDR=w@mail.gmail.com>


On 9/30/24 8:03 AM, Alexei Starovoitov wrote:
> On Thu, Sep 26, 2024 at 4:45 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>> Add jit support for private stack. For a particular subtree, e.g.,
>>    subtree_root <== stack depth 120
>>     subprog1    <== stack depth 80
>>      subprog2   <== stack depth 40
>>     subprog3    <== stack depth 160
>>
>> Let us say that private_stack_ptr is the memory address allocated for
>> private stack. The frame pointer for each above is calculated like below:
>>    subtree_root  <== subtree_root_fp = private_stack_ptr + 120
>>     subprog1     <== subtree_subprog1_fp = subtree_root_fp + 80
>>      subprog2    <== subtree_subprog2_fp = subtree_subprog1_fp + 40
>>     subprog3     <== subtree_subprog1_fp = subtree_root_fp + 160
>>
>> For any function call to helper/kfunc, push/pop prog frame pointer
>> is needed in order to preserve frame pointer value.
>>
>> To deal with exception handling, push/pop frame pointer is also used
>> surrounding call to subsequent subprog. For example,
>>    subtree_root
>>     subprog1
>>       ...
>>       insn: call bpf_throw
>>       ...
>>
>> After jit, we will have
>>    subtree_root
>>     insn: push r9
>>     subprog1
>>       ...
>>       insn: push r9
>>       insn: call bpf_throw
>>       insn: pop r9
>>       ...
>>     insn: pop r9
>>
>>    exception_handler
>>       pop r9
>>       ...
>> where r9 represents the fp for each subprog.
> Kumar,
> please review the interaction of priv_stack with exceptions.
>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   arch/x86/net/bpf_jit_comp.c | 87 ++++++++++++++++++++++++++++++++++---
>>   1 file changed, 81 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>> index 06b080b61aa5..c264822c926b 100644
>> --- a/arch/x86/net/bpf_jit_comp.c
>> +++ b/arch/x86/net/bpf_jit_comp.c
>> @@ -325,6 +325,22 @@ struct jit_context {
>>   /* Number of bytes that will be skipped on tailcall */
>>   #define X86_TAIL_CALL_OFFSET   (12 + ENDBR_INSN_SIZE)
>>
>> +static void push_r9(u8 **pprog)
>> +{
>> +       u8 *prog = *pprog;
>> +
>> +       EMIT2(0x41, 0x51);   /* push r9 */
>> +       *pprog = prog;
>> +}
>> +
>> +static void pop_r9(u8 **pprog)
>> +{
>> +       u8 *prog = *pprog;
>> +
>> +       EMIT2(0x41, 0x59);   /* pop r9 */
>> +       *pprog = prog;
>> +}
>> +
>>   static void push_r12(u8 **pprog)
>>   {
>>          u8 *prog = *pprog;
>> @@ -491,7 +507,7 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
>>    */
>>   static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
>>                            bool tail_call_reachable, bool is_subprog,
>> -                         bool is_exception_cb)
>> +                         bool is_exception_cb, enum bpf_pstack_state  pstack)
> enum bpf_priv_stack_mode priv_stack_mode

Okay.

>
>>   {
>>          u8 *prog = *pprog;
>>
>> @@ -518,6 +534,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
>>                   * first restore those callee-saved regs from stack, before
>>                   * reusing the stack frame.
>>                   */
>> +               if (pstack)
>> +                       pop_r9(&prog);
> This is an unnecessary cognitive load, since readers
> need to remember absolute values of enum.
> Just use
> if (priv_stack_mode != NO_PRIV_STACK)

Will do.

>
>>                  pop_callee_regs(&prog, all_callee_regs_used);
>>                  pop_r12(&prog);
>>                  /* Reset the stack frame. */
>> @@ -1404,6 +1422,22 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
>>          *pprog = prog;
>>   }
>>
>> +static void emit_private_frame_ptr(u8 **pprog, void *private_frame_ptr)
>> +{
>> +       u8 *prog = *pprog;
>> +
>> +       /* movabs r9, private_frame_ptr */
>> +       emit_mov_imm64(&prog, X86_REG_R9, (long) private_frame_ptr >> 32,
>> +                      (u32) (long) private_frame_ptr);
>> +
>> +       /* add <r9>, gs:[<off>] */
>> +       EMIT2(0x65, 0x4c);
>> +       EMIT3(0x03, 0x0c, 0x25);
>> +       EMIT((u32)(unsigned long)&this_cpu_off, 4);
>> +
>> +       *pprog = prog;
>> +}
>> +
>>   #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
>>
>>   #define __LOAD_TCC_PTR(off)                    \
>> @@ -1421,20 +1455,31 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
>>          int insn_cnt = bpf_prog->len;
>>          bool seen_exit = false;
>>          u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
>> +       void __percpu *private_frame_ptr = NULL;
>>          u64 arena_vm_start, user_vm_start;
>> +       u32 orig_stack_depth, stack_depth;
>>          int i, excnt = 0;
>>          int ilen, proglen = 0;
>>          u8 *prog = temp;
>>          int err;
>>
>> +       stack_depth = bpf_prog->aux->stack_depth;
>> +       orig_stack_depth = round_up(stack_depth, 8);
>> +       if (bpf_prog->pstack) {
>> +               stack_depth = 0;
>> +               if (bpf_prog->pstack == PSTACK_TREE_ROOT)
>> +                       private_frame_ptr = bpf_prog->private_stack_ptr + orig_stack_depth;
>> +       }
> Same issue.
> switch (priv_stack_mode) {
> case PRIV_STACK_MAIN_PROG:
>      priv_frame_ptr = bpf_prog->priv_stack_ptr + orig_stack_depth;
>      fallthrough;
> case PRIV_STACK_SUB_PROG:
>      stack_depth = 0;
>      break;
> }
>
> would be easier to read.

Will do.

>
>> +
>>          arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
>>          user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
>>
>>          detect_reg_usage(insn, insn_cnt, callee_regs_used);
>>
>> -       emit_prologue(&prog, bpf_prog->aux->stack_depth,
>> +       emit_prologue(&prog, stack_depth,
>>                        bpf_prog_was_classic(bpf_prog), tail_call_reachable,
>> -                     bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
>> +                     bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb,
>> +                     bpf_prog->pstack);
>>          /* Exception callback will clobber callee regs for its own use, and
>>           * restore the original callee regs from main prog's stack frame.
>>           */
>> @@ -1454,6 +1499,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
>>                  emit_mov_imm64(&prog, X86_REG_R12,
>>                                 arena_vm_start >> 32, (u32) arena_vm_start);
>>
>> +       if (bpf_prog->pstack == PSTACK_TREE_ROOT) {
>> +               emit_private_frame_ptr(&prog, private_frame_ptr);
>> +       } else if (bpf_prog->pstack == PSTACK_TREE_INTERNAL  && orig_stack_depth) {
>> +               /* r9 += orig_stack_depth */
>> +               maybe_emit_1mod(&prog, X86_REG_R9, true);
>> +               if (is_imm8(orig_stack_depth))
>> +                       EMIT3(0x83, add_1reg(0xC0, X86_REG_R9), orig_stack_depth);
>> +               else
>> +                       EMIT2_off32(0x81, add_1reg(0xC0, X86_REG_R9), orig_stack_depth);
>> +       }
> We've been open coding 'add' insn like this for way too long.
> Let's address this technical debt now.
> Please move
>                  case BPF_ALU | BPF_ADD | BPF_K:
>                  case BPF_ALU | BPF_SUB | BPF_K:
>                  case BPF_ALU | BPF_AND | BPF_K:
>                  case BPF_ALU | BPF_OR | BPF_K:
>                  case BPF_ALU | BPF_XOR | BPF_K:
>                  case BPF_ALU64 | BPF_ADD | BPF_K:
>                  case BPF_ALU64 | BPF_SUB | BPF_K:
>                  case BPF_ALU64 | BPF_AND | BPF_K:
>                  case BPF_ALU64 | BPF_OR | BPF_K:
>                  case BPF_ALU64 | BPF_XOR | BPF_K:
> into helpers and use it here.

Will do.


  reply	other threads:[~2024-09-30 16:33 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-26 23:45 [PATCH bpf-next v3 0/5] bpf: Support private stack for bpf progs Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 1/5] bpf: Allow each subprog having stack size of 512 bytes Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 2/5] bpf: Collect stack depth information Yonghong Song
2024-09-30 14:42   ` Alexei Starovoitov
2024-09-30 16:23     ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 3/5] bpf: Mark each subprog with proper pstack states Yonghong Song
2024-09-30 14:49   ` Alexei Starovoitov
2024-09-30 16:26     ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 4/5] bpf, x86: Add jit support for private stack Yonghong Song
2024-09-27  4:58   ` Leon Hwang
2024-09-27 15:24     ` Yonghong Song
2024-09-29  8:31   ` kernel test robot
2024-09-30 16:29     ` Yonghong Song
2024-09-29 13:02   ` kernel test robot
2024-09-30 16:31     ` Yonghong Song
2024-09-29 13:34   ` kernel test robot
2024-09-30 15:03   ` Alexei Starovoitov
2024-09-30 16:33     ` Yonghong Song [this message]
2024-10-01  4:31     ` Kumar Kartikeya Dwivedi
2024-10-01  4:37       ` Kumar Kartikeya Dwivedi
2024-10-01 18:49         ` Alexei Starovoitov
2024-10-01 19:53           ` yet another approach Was: " Alexei Starovoitov
2024-10-01 20:50             ` Kumar Kartikeya Dwivedi
2024-10-01 21:28               ` Alexei Starovoitov
2024-10-02  0:22                 ` Kumar Kartikeya Dwivedi
2024-10-02  1:26                   ` Alexei Starovoitov
2024-10-02  2:16                     ` Kumar Kartikeya Dwivedi
2024-10-02  6:28                       ` Yonghong Song
2024-10-02  6:48                         ` Yonghong Song
2024-10-03  6:17                     ` Yonghong Song
2024-10-03 13:39                       ` Kumar Kartikeya Dwivedi
2024-10-03 17:35                         ` Alexei Starovoitov
2024-10-03 18:53                           ` Yonghong Song
2024-10-03 20:44                           ` Yonghong Song
2024-10-03 20:47                             ` Kumar Kartikeya Dwivedi
2024-10-03 20:54                               ` Yonghong Song
2024-10-03 22:32                             ` Alexei Starovoitov
2024-10-04  5:22                               ` Yonghong Song
2024-10-04 19:27                                 ` Yonghong Song
2024-10-04 19:52                                   ` Alexei Starovoitov
2024-10-05  2:03                                     ` Yonghong Song
2024-10-08 22:10                                       ` Alexei Starovoitov
2024-10-09  2:06                                         ` Alexei Starovoitov
2024-10-09  6:31                                           ` Yonghong Song
2024-10-09 14:56                                             ` Alexei Starovoitov
2024-10-09 15:56                                               ` Yonghong Song
2024-10-09 16:36                                           ` Kumar Kartikeya Dwivedi
2024-10-09 16:38                                             ` Kumar Kartikeya Dwivedi
2024-10-09 17:37                                               ` Kumar Kartikeya Dwivedi
2024-10-09  6:12                                         ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 5/5] selftests/bpf: Add private stack tests Yonghong Song
2024-09-30 13:40   ` Jiri Olsa
2024-09-30 15:05     ` Alexei Starovoitov
2024-09-30 16:35       ` Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=06f3db51-6bc7-4318-9b15-ae694cca7aad@linux.dev \
    --to=yonghong.song@linux.dev \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@kernel.org \
    --cc=memxor@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox