All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yonghong Song <yonghong.song@linux.dev>
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>,
	Kumar Kartikeya Dwivedi <memxor@gmail.com>
Cc: bpf <bpf@vger.kernel.org>, Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Kernel Team <kernel-team@fb.com>,
	Martin KaFai Lau <martin.lau@kernel.org>
Subject: Re: [PATCH bpf-next v3 4/5] bpf, x86: Add jit support for private stack
Date: Mon, 30 Sep 2024 09:33:43 -0700	[thread overview]
Message-ID: <06f3db51-6bc7-4318-9b15-ae694cca7aad@linux.dev> (raw)
In-Reply-To: <CAADnVQ+v3u=9PEHQ0xJEf6wSRc2iR928Sc+6CULh390i3TDR=w@mail.gmail.com>


On 9/30/24 8:03 AM, Alexei Starovoitov wrote:
> On Thu, Sep 26, 2024 at 4:45 PM Yonghong Song <yonghong.song@linux.dev> wrote:
>> Add jit support for private stack. For a particular subtree, e.g.,
>>    subtree_root <== stack depth 120
>>     subprog1    <== stack depth 80
>>      subprog2   <== stack depth 40
>>     subprog3    <== stack depth 160
>>
>> Let us say that private_stack_ptr is the memory address allocated for
>> private stack. The frame pointer for each above is calculated like below:
>>    subtree_root  <== subtree_root_fp = private_stack_ptr + 120
>>     subprog1     <== subtree_subprog1_fp = subtree_root_fp + 80
>>      subprog2    <== subtree_subprog2_fp = subtree_subprog1_fp + 40
>>     subprog3     <== subtree_subprog1_fp = subtree_root_fp + 160
>>
>> For any function call to helper/kfunc, push/pop prog frame pointer
>> is needed in order to preserve frame pointer value.
>>
>> To deal with exception handling, push/pop frame pointer is also used
>> surrounding call to subsequent subprog. For example,
>>    subtree_root
>>     subprog1
>>       ...
>>       insn: call bpf_throw
>>       ...
>>
>> After jit, we will have
>>    subtree_root
>>     insn: push r9
>>     subprog1
>>       ...
>>       insn: push r9
>>       insn: call bpf_throw
>>       insn: pop r9
>>       ...
>>     insn: pop r9
>>
>>    exception_handler
>>       pop r9
>>       ...
>> where r9 represents the fp for each subprog.
> Kumar,
> please review the interaction of priv_stack with exceptions.
>
>> Signed-off-by: Yonghong Song <yonghong.song@linux.dev>
>> ---
>>   arch/x86/net/bpf_jit_comp.c | 87 ++++++++++++++++++++++++++++++++++---
>>   1 file changed, 81 insertions(+), 6 deletions(-)
>>
>> diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
>> index 06b080b61aa5..c264822c926b 100644
>> --- a/arch/x86/net/bpf_jit_comp.c
>> +++ b/arch/x86/net/bpf_jit_comp.c
>> @@ -325,6 +325,22 @@ struct jit_context {
>>   /* Number of bytes that will be skipped on tailcall */
>>   #define X86_TAIL_CALL_OFFSET   (12 + ENDBR_INSN_SIZE)
>>
>> +static void push_r9(u8 **pprog)
>> +{
>> +       u8 *prog = *pprog;
>> +
>> +       EMIT2(0x41, 0x51);   /* push r9 */
>> +       *pprog = prog;
>> +}
>> +
>> +static void pop_r9(u8 **pprog)
>> +{
>> +       u8 *prog = *pprog;
>> +
>> +       EMIT2(0x41, 0x59);   /* pop r9 */
>> +       *pprog = prog;
>> +}
>> +
>>   static void push_r12(u8 **pprog)
>>   {
>>          u8 *prog = *pprog;
>> @@ -491,7 +507,7 @@ static void emit_prologue_tail_call(u8 **pprog, bool is_subprog)
>>    */
>>   static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
>>                            bool tail_call_reachable, bool is_subprog,
>> -                         bool is_exception_cb)
>> +                         bool is_exception_cb, enum bpf_pstack_state  pstack)
> enum bpf_priv_stack_mode priv_stack_mode

Okay.

>
>>   {
>>          u8 *prog = *pprog;
>>
>> @@ -518,6 +534,8 @@ static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf,
>>                   * first restore those callee-saved regs from stack, before
>>                   * reusing the stack frame.
>>                   */
>> +               if (pstack)
>> +                       pop_r9(&prog);
> This is an unnecessary cognitive load, since readers
> need to remember absolute values of enum.
> Just use
> if (priv_stack_mode != NO_PRIV_STACK)

Will do.

>
>>                  pop_callee_regs(&prog, all_callee_regs_used);
>>                  pop_r12(&prog);
>>                  /* Reset the stack frame. */
>> @@ -1404,6 +1422,22 @@ static void emit_shiftx(u8 **pprog, u32 dst_reg, u8 src_reg, bool is64, u8 op)
>>          *pprog = prog;
>>   }
>>
>> +static void emit_private_frame_ptr(u8 **pprog, void *private_frame_ptr)
>> +{
>> +       u8 *prog = *pprog;
>> +
>> +       /* movabs r9, private_frame_ptr */
>> +       emit_mov_imm64(&prog, X86_REG_R9, (long) private_frame_ptr >> 32,
>> +                      (u32) (long) private_frame_ptr);
>> +
>> +       /* add <r9>, gs:[<off>] */
>> +       EMIT2(0x65, 0x4c);
>> +       EMIT3(0x03, 0x0c, 0x25);
>> +       EMIT((u32)(unsigned long)&this_cpu_off, 4);
>> +
>> +       *pprog = prog;
>> +}
>> +
>>   #define INSN_SZ_DIFF (((addrs[i] - addrs[i - 1]) - (prog - temp)))
>>
>>   #define __LOAD_TCC_PTR(off)                    \
>> @@ -1421,20 +1455,31 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
>>          int insn_cnt = bpf_prog->len;
>>          bool seen_exit = false;
>>          u8 temp[BPF_MAX_INSN_SIZE + BPF_INSN_SAFETY];
>> +       void __percpu *private_frame_ptr = NULL;
>>          u64 arena_vm_start, user_vm_start;
>> +       u32 orig_stack_depth, stack_depth;
>>          int i, excnt = 0;
>>          int ilen, proglen = 0;
>>          u8 *prog = temp;
>>          int err;
>>
>> +       stack_depth = bpf_prog->aux->stack_depth;
>> +       orig_stack_depth = round_up(stack_depth, 8);
>> +       if (bpf_prog->pstack) {
>> +               stack_depth = 0;
>> +               if (bpf_prog->pstack == PSTACK_TREE_ROOT)
>> +                       private_frame_ptr = bpf_prog->private_stack_ptr + orig_stack_depth;
>> +       }
> Same issue.
> switch (priv_stack_mode) {
> case PRIV_STACK_MAIN_PROG:
>      priv_frame_ptr = bpf_prog->priv_stack_ptr + orig_stack_depth;
>      fallthrough;
> case PRIV_STACK_SUB_PROG:
>      stack_depth = 0;
>      break;
> }
>
> would be easier to read.

Will do.

>
>> +
>>          arena_vm_start = bpf_arena_get_kern_vm_start(bpf_prog->aux->arena);
>>          user_vm_start = bpf_arena_get_user_vm_start(bpf_prog->aux->arena);
>>
>>          detect_reg_usage(insn, insn_cnt, callee_regs_used);
>>
>> -       emit_prologue(&prog, bpf_prog->aux->stack_depth,
>> +       emit_prologue(&prog, stack_depth,
>>                        bpf_prog_was_classic(bpf_prog), tail_call_reachable,
>> -                     bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb);
>> +                     bpf_is_subprog(bpf_prog), bpf_prog->aux->exception_cb,
>> +                     bpf_prog->pstack);
>>          /* Exception callback will clobber callee regs for its own use, and
>>           * restore the original callee regs from main prog's stack frame.
>>           */
>> @@ -1454,6 +1499,17 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, u8 *rw_image
>>                  emit_mov_imm64(&prog, X86_REG_R12,
>>                                 arena_vm_start >> 32, (u32) arena_vm_start);
>>
>> +       if (bpf_prog->pstack == PSTACK_TREE_ROOT) {
>> +               emit_private_frame_ptr(&prog, private_frame_ptr);
>> +       } else if (bpf_prog->pstack == PSTACK_TREE_INTERNAL  && orig_stack_depth) {
>> +               /* r9 += orig_stack_depth */
>> +               maybe_emit_1mod(&prog, X86_REG_R9, true);
>> +               if (is_imm8(orig_stack_depth))
>> +                       EMIT3(0x83, add_1reg(0xC0, X86_REG_R9), orig_stack_depth);
>> +               else
>> +                       EMIT2_off32(0x81, add_1reg(0xC0, X86_REG_R9), orig_stack_depth);
>> +       }
> We've been open coding 'add' insn like this for way too long.
> Let's address this technical debt now.
> Please move
>                  case BPF_ALU | BPF_ADD | BPF_K:
>                  case BPF_ALU | BPF_SUB | BPF_K:
>                  case BPF_ALU | BPF_AND | BPF_K:
>                  case BPF_ALU | BPF_OR | BPF_K:
>                  case BPF_ALU | BPF_XOR | BPF_K:
>                  case BPF_ALU64 | BPF_ADD | BPF_K:
>                  case BPF_ALU64 | BPF_SUB | BPF_K:
>                  case BPF_ALU64 | BPF_AND | BPF_K:
>                  case BPF_ALU64 | BPF_OR | BPF_K:
>                  case BPF_ALU64 | BPF_XOR | BPF_K:
> into helpers and use it here.

Will do.


  reply	other threads:[~2024-09-30 16:33 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-26 23:45 [PATCH bpf-next v3 0/5] bpf: Support private stack for bpf progs Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 1/5] bpf: Allow each subprog having stack size of 512 bytes Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 2/5] bpf: Collect stack depth information Yonghong Song
2024-09-30 14:42   ` Alexei Starovoitov
2024-09-30 16:23     ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 3/5] bpf: Mark each subprog with proper pstack states Yonghong Song
2024-09-30 14:49   ` Alexei Starovoitov
2024-09-30 16:26     ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 4/5] bpf, x86: Add jit support for private stack Yonghong Song
2024-09-27  4:58   ` Leon Hwang
2024-09-27 15:24     ` Yonghong Song
2024-09-29  8:31   ` kernel test robot
2024-09-30 16:29     ` Yonghong Song
2024-09-29 13:02   ` kernel test robot
2024-09-30 16:31     ` Yonghong Song
2024-09-29 13:34   ` kernel test robot
2024-09-30 15:03   ` Alexei Starovoitov
2024-09-30 16:33     ` Yonghong Song [this message]
2024-10-01  4:31     ` Kumar Kartikeya Dwivedi
2024-10-01  4:37       ` Kumar Kartikeya Dwivedi
2024-10-01 18:49         ` Alexei Starovoitov
2024-10-01 19:53           ` yet another approach Was: " Alexei Starovoitov
2024-10-01 20:50             ` Kumar Kartikeya Dwivedi
2024-10-01 21:28               ` Alexei Starovoitov
2024-10-02  0:22                 ` Kumar Kartikeya Dwivedi
2024-10-02  1:26                   ` Alexei Starovoitov
2024-10-02  2:16                     ` Kumar Kartikeya Dwivedi
2024-10-02  6:28                       ` Yonghong Song
2024-10-02  6:48                         ` Yonghong Song
2024-10-03  6:17                     ` Yonghong Song
2024-10-03 13:39                       ` Kumar Kartikeya Dwivedi
2024-10-03 17:35                         ` Alexei Starovoitov
2024-10-03 18:53                           ` Yonghong Song
2024-10-03 20:44                           ` Yonghong Song
2024-10-03 20:47                             ` Kumar Kartikeya Dwivedi
2024-10-03 20:54                               ` Yonghong Song
2024-10-03 22:32                             ` Alexei Starovoitov
2024-10-04  5:22                               ` Yonghong Song
2024-10-04 19:27                                 ` Yonghong Song
2024-10-04 19:52                                   ` Alexei Starovoitov
2024-10-05  2:03                                     ` Yonghong Song
2024-10-08 22:10                                       ` Alexei Starovoitov
2024-10-09  2:06                                         ` Alexei Starovoitov
2024-10-09  6:31                                           ` Yonghong Song
2024-10-09 14:56                                             ` Alexei Starovoitov
2024-10-09 15:56                                               ` Yonghong Song
2024-10-09 16:36                                           ` Kumar Kartikeya Dwivedi
2024-10-09 16:38                                             ` Kumar Kartikeya Dwivedi
2024-10-09 17:37                                               ` Kumar Kartikeya Dwivedi
2024-10-09  6:12                                         ` Yonghong Song
2024-09-26 23:45 ` [PATCH bpf-next v3 5/5] selftests/bpf: Add private stack tests Yonghong Song
2024-09-30 13:40   ` Jiri Olsa
2024-09-30 15:05     ` Alexei Starovoitov
2024-09-30 16:35       ` Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=06f3db51-6bc7-4318-9b15-ae694cca7aad@linux.dev \
    --to=yonghong.song@linux.dev \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@kernel.org \
    --cc=memxor@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.