From: Puranjay Mohan <puranjay@kernel.org>
To: Pu Lehui <pulehui@huaweicloud.com>,
bpf@vger.kernel.org, linux-riscv@lists.infradead.org,
netdev@vger.kernel.org
Cc: "Björn Töpel" <bjorn@kernel.org>,
"Alexei Starovoitov" <ast@kernel.org>,
"Daniel Borkmann" <daniel@iogearbox.net>,
"Andrii Nakryiko" <andrii@kernel.org>,
"Martin KaFai Lau" <martin.lau@linux.dev>,
"Eduard Zingerman" <eddyz87@gmail.com>,
"Song Liu" <song@kernel.org>,
"Yonghong Song" <yonghong.song@linux.dev>,
"John Fastabend" <john.fastabend@gmail.com>,
"KP Singh" <kpsingh@kernel.org>,
"Stanislav Fomichev" <sdf@google.com>,
"Hao Luo" <haoluo@google.com>, "Jiri Olsa" <jolsa@kernel.org>,
"Palmer Dabbelt" <palmer@dabbelt.com>,
"Pu Lehui" <pulehui@huawei.com>
Subject: Re: [PATCH bpf-next v6 1/3] riscv, bpf: Add 12-argument support for RV64 bpf trampoline
Date: Fri, 05 Jul 2024 12:51:36 +0000 [thread overview]
Message-ID: <mb61p7ce0roh3.fsf@kernel.org> (raw)
In-Reply-To: <20240702121944.1091530-2-pulehui@huaweicloud.com>
[-- Attachment #1: Type: text/plain, Size: 5831 bytes --]
Pu Lehui <pulehui@huaweicloud.com> writes:
> From: Pu Lehui <pulehui@huawei.com>
>
> This patch adds 12 function arguments support for riscv64 bpf
> trampoline. The current bpf trampoline supports <= sizeof(u64) bytes
> scalar arguments [0] and <= 16 bytes struct arguments [1]. Therefore, we
> focus on the situation where scalars are at most XLEN bits and
> aggregates whose total size does not exceed 2×XLEN bits in the riscv
> calling convention [2].
>
> Link: https://elixir.bootlin.com/linux/v6.8/source/kernel/bpf/btf.c#L6184 [0]
> Link: https://elixir.bootlin.com/linux/v6.8/source/kernel/bpf/btf.c#L6769 [1]
> Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/releases/download/draft-20230929-e5c800e661a53efe3c2678d71a306323b60eb13b/riscv-abi.pdf [2]
> Signed-off-by: Pu Lehui <pulehui@huawei.com>
> Acked-by: Björn Töpel <bjorn@kernel.org>
> Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> ---
> arch/riscv/net/bpf_jit_comp64.c | 66 +++++++++++++++++++++++----------
> 1 file changed, 47 insertions(+), 19 deletions(-)
>
> diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
> index 351e1484205e..685c7389ae7e 100644
> --- a/arch/riscv/net/bpf_jit_comp64.c
> +++ b/arch/riscv/net/bpf_jit_comp64.c
> @@ -15,6 +15,7 @@
> #include <asm/percpu.h>
> #include "bpf_jit.h"
>
> +#define RV_MAX_REG_ARGS 8
> #define RV_FENTRY_NINSNS 2
> /* imm that allows emit_imm to emit max count insns */
> #define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF
> @@ -692,26 +693,45 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
> return ret;
> }
>
> -static void store_args(int nregs, int args_off, struct rv_jit_context *ctx)
> +static void store_args(int nr_arg_slots, int args_off, struct rv_jit_context *ctx)
> {
> int i;
>
> - for (i = 0; i < nregs; i++) {
> - emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
> + for (i = 0; i < nr_arg_slots; i++) {
> + if (i < RV_MAX_REG_ARGS) {
> + emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
> + } else {
> + /* skip slots for T0 and FP of traced function */
> + emit_ld(RV_REG_T1, 16 + (i - RV_MAX_REG_ARGS) * 8, RV_REG_FP, ctx);
> + emit_sd(RV_REG_FP, -args_off, RV_REG_T1, ctx);
> + }
> args_off -= 8;
> }
> }
>
> -static void restore_args(int nregs, int args_off, struct rv_jit_context *ctx)
> +static void restore_args(int nr_reg_args, int args_off, struct rv_jit_context *ctx)
> {
> int i;
>
> - for (i = 0; i < nregs; i++) {
> + for (i = 0; i < nr_reg_args; i++) {
> emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx);
> args_off -= 8;
> }
> }
>
> +static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off,
> + struct rv_jit_context *ctx)
> +{
> + int i;
> +
> + for (i = 0; i < nr_stack_args; i++) {
> + emit_ld(RV_REG_T1, -(args_off - RV_MAX_REG_ARGS * 8), RV_REG_FP, ctx);
> + emit_sd(RV_REG_FP, -stk_arg_off, RV_REG_T1, ctx);
> + args_off -= 8;
> + stk_arg_off -= 8;
> + }
> +}
> +
> static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
> int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
> {
> @@ -784,8 +804,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
> {
> int i, ret, offset;
> int *branches_off = NULL;
> - int stack_size = 0, nregs = m->nr_args;
> - int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off;
> + int stack_size = 0, nr_arg_slots = 0;
> + int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, stk_arg_off;
> struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> @@ -831,20 +851,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
> * FP - sreg_off [ callee saved reg ]
> *
> * [ pads ] pads for 16 bytes alignment
> + *
> + * [ stack_argN ]
> + * [ ... ]
> + * FP - stk_arg_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG
> */
>
> if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
> return -ENOTSUPP;
>
> - /* extra regiters for struct arguments */
> - for (i = 0; i < m->nr_args; i++)
> - if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
> - nregs += round_up(m->arg_size[i], 8) / 8 - 1;
> -
> - /* 8 arguments passed by registers */
> - if (nregs > 8)
> + if (m->nr_args > MAX_BPF_FUNC_ARGS)
> return -ENOTSUPP;
>
> + for (i = 0; i < m->nr_args; i++)
> + nr_arg_slots += round_up(m->arg_size[i], 8) / 8;
> +
> /* room of trampoline frame to store return address and frame pointer */
> stack_size += 16;
>
> @@ -854,7 +875,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
> retval_off = stack_size;
> }
>
> - stack_size += nregs * 8;
> + stack_size += nr_arg_slots * 8;
> args_off = stack_size;
>
> stack_size += 8;
> @@ -871,8 +892,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
> stack_size += 8;
> sreg_off = stack_size;
>
> + if (nr_arg_slots - RV_MAX_REG_ARGS > 0)
> + stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8;
Hi Pu,
Although this is merged now, while working on this for arm64 I realised
that the above doesn't check for BPF_TRAMP_F_CALL_ORIG and can waste
some stack space, we should change this to:
if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - RV_MAX_REG_ARGS > 0))
stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8;
It will save some stack space when BPF_TRAMP_F_CALL_ORIG is not set?
I can send a patch if you think this is worth fixing.
Thanks,
Puranjay
[-- Attachment #2: signature.asc --]
[-- Type: application/pgp-signature, Size: 255 bytes --]
WARNING: multiple messages have this Message-ID (diff)
From: Puranjay Mohan <puranjay@kernel.org>
To: Pu Lehui <pulehui@huaweicloud.com>,
bpf@vger.kernel.org, linux-riscv@lists.infradead.org,
netdev@vger.kernel.org
Cc: "Björn Töpel" <bjorn@kernel.org>,
"Alexei Starovoitov" <ast@kernel.org>,
"Daniel Borkmann" <daniel@iogearbox.net>,
"Andrii Nakryiko" <andrii@kernel.org>,
"Martin KaFai Lau" <martin.lau@linux.dev>,
"Eduard Zingerman" <eddyz87@gmail.com>,
"Song Liu" <song@kernel.org>,
"Yonghong Song" <yonghong.song@linux.dev>,
"John Fastabend" <john.fastabend@gmail.com>,
"KP Singh" <kpsingh@kernel.org>,
"Stanislav Fomichev" <sdf@google.com>,
"Hao Luo" <haoluo@google.com>, "Jiri Olsa" <jolsa@kernel.org>,
"Palmer Dabbelt" <palmer@dabbelt.com>,
"Pu Lehui" <pulehui@huawei.com>
Subject: Re: [PATCH bpf-next v6 1/3] riscv, bpf: Add 12-argument support for RV64 bpf trampoline
Date: Fri, 05 Jul 2024 12:51:36 +0000 [thread overview]
Message-ID: <mb61p7ce0roh3.fsf@kernel.org> (raw)
In-Reply-To: <20240702121944.1091530-2-pulehui@huaweicloud.com>
[-- Attachment #1.1: Type: text/plain, Size: 5831 bytes --]
Pu Lehui <pulehui@huaweicloud.com> writes:
> From: Pu Lehui <pulehui@huawei.com>
>
> This patch adds 12 function arguments support for riscv64 bpf
> trampoline. The current bpf trampoline supports <= sizeof(u64) bytes
> scalar arguments [0] and <= 16 bytes struct arguments [1]. Therefore, we
> focus on the situation where scalars are at most XLEN bits and
> aggregates whose total size does not exceed 2×XLEN bits in the riscv
> calling convention [2].
>
> Link: https://elixir.bootlin.com/linux/v6.8/source/kernel/bpf/btf.c#L6184 [0]
> Link: https://elixir.bootlin.com/linux/v6.8/source/kernel/bpf/btf.c#L6769 [1]
> Link: https://github.com/riscv-non-isa/riscv-elf-psabi-doc/releases/download/draft-20230929-e5c800e661a53efe3c2678d71a306323b60eb13b/riscv-abi.pdf [2]
> Signed-off-by: Pu Lehui <pulehui@huawei.com>
> Acked-by: Björn Töpel <bjorn@kernel.org>
> Reviewed-by: Björn Töpel <bjorn@rivosinc.com>
> ---
> arch/riscv/net/bpf_jit_comp64.c | 66 +++++++++++++++++++++++----------
> 1 file changed, 47 insertions(+), 19 deletions(-)
>
> diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
> index 351e1484205e..685c7389ae7e 100644
> --- a/arch/riscv/net/bpf_jit_comp64.c
> +++ b/arch/riscv/net/bpf_jit_comp64.c
> @@ -15,6 +15,7 @@
> #include <asm/percpu.h>
> #include "bpf_jit.h"
>
> +#define RV_MAX_REG_ARGS 8
> #define RV_FENTRY_NINSNS 2
> /* imm that allows emit_imm to emit max count insns */
> #define RV_MAX_COUNT_IMM 0x7FFF7FF7FF7FF7FF
> @@ -692,26 +693,45 @@ int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type,
> return ret;
> }
>
> -static void store_args(int nregs, int args_off, struct rv_jit_context *ctx)
> +static void store_args(int nr_arg_slots, int args_off, struct rv_jit_context *ctx)
> {
> int i;
>
> - for (i = 0; i < nregs; i++) {
> - emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
> + for (i = 0; i < nr_arg_slots; i++) {
> + if (i < RV_MAX_REG_ARGS) {
> + emit_sd(RV_REG_FP, -args_off, RV_REG_A0 + i, ctx);
> + } else {
> + /* skip slots for T0 and FP of traced function */
> + emit_ld(RV_REG_T1, 16 + (i - RV_MAX_REG_ARGS) * 8, RV_REG_FP, ctx);
> + emit_sd(RV_REG_FP, -args_off, RV_REG_T1, ctx);
> + }
> args_off -= 8;
> }
> }
>
> -static void restore_args(int nregs, int args_off, struct rv_jit_context *ctx)
> +static void restore_args(int nr_reg_args, int args_off, struct rv_jit_context *ctx)
> {
> int i;
>
> - for (i = 0; i < nregs; i++) {
> + for (i = 0; i < nr_reg_args; i++) {
> emit_ld(RV_REG_A0 + i, -args_off, RV_REG_FP, ctx);
> args_off -= 8;
> }
> }
>
> +static void restore_stack_args(int nr_stack_args, int args_off, int stk_arg_off,
> + struct rv_jit_context *ctx)
> +{
> + int i;
> +
> + for (i = 0; i < nr_stack_args; i++) {
> + emit_ld(RV_REG_T1, -(args_off - RV_MAX_REG_ARGS * 8), RV_REG_FP, ctx);
> + emit_sd(RV_REG_FP, -stk_arg_off, RV_REG_T1, ctx);
> + args_off -= 8;
> + stk_arg_off -= 8;
> + }
> +}
> +
> static int invoke_bpf_prog(struct bpf_tramp_link *l, int args_off, int retval_off,
> int run_ctx_off, bool save_ret, struct rv_jit_context *ctx)
> {
> @@ -784,8 +804,8 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
> {
> int i, ret, offset;
> int *branches_off = NULL;
> - int stack_size = 0, nregs = m->nr_args;
> - int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off;
> + int stack_size = 0, nr_arg_slots = 0;
> + int retval_off, args_off, nregs_off, ip_off, run_ctx_off, sreg_off, stk_arg_off;
> struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY];
> struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT];
> struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN];
> @@ -831,20 +851,21 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
> * FP - sreg_off [ callee saved reg ]
> *
> * [ pads ] pads for 16 bytes alignment
> + *
> + * [ stack_argN ]
> + * [ ... ]
> + * FP - stk_arg_off [ stack_arg1 ] BPF_TRAMP_F_CALL_ORIG
> */
>
> if (flags & (BPF_TRAMP_F_ORIG_STACK | BPF_TRAMP_F_SHARE_IPMODIFY))
> return -ENOTSUPP;
>
> - /* extra regiters for struct arguments */
> - for (i = 0; i < m->nr_args; i++)
> - if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG)
> - nregs += round_up(m->arg_size[i], 8) / 8 - 1;
> -
> - /* 8 arguments passed by registers */
> - if (nregs > 8)
> + if (m->nr_args > MAX_BPF_FUNC_ARGS)
> return -ENOTSUPP;
>
> + for (i = 0; i < m->nr_args; i++)
> + nr_arg_slots += round_up(m->arg_size[i], 8) / 8;
> +
> /* room of trampoline frame to store return address and frame pointer */
> stack_size += 16;
>
> @@ -854,7 +875,7 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
> retval_off = stack_size;
> }
>
> - stack_size += nregs * 8;
> + stack_size += nr_arg_slots * 8;
> args_off = stack_size;
>
> stack_size += 8;
> @@ -871,8 +892,14 @@ static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im,
> stack_size += 8;
> sreg_off = stack_size;
>
> + if (nr_arg_slots - RV_MAX_REG_ARGS > 0)
> + stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8;
Hi Pu,
Although this is merged now, while working on this for arm64 I realised
that the above doesn't check for BPF_TRAMP_F_CALL_ORIG and can waste
some stack space, we should change this to:
if ((flags & BPF_TRAMP_F_CALL_ORIG) && (nr_arg_slots - RV_MAX_REG_ARGS > 0))
stack_size += (nr_arg_slots - RV_MAX_REG_ARGS) * 8;
It will save some stack space when BPF_TRAMP_F_CALL_ORIG is not set?
I can send a patch if you think this is worth fixing.
Thanks,
Puranjay
[-- Attachment #1.2: signature.asc --]
[-- Type: application/pgp-signature, Size: 255 bytes --]
[-- Attachment #2: Type: text/plain, Size: 161 bytes --]
_______________________________________________
linux-riscv mailing list
linux-riscv@lists.infradead.org
http://lists.infradead.org/mailman/listinfo/linux-riscv
next prev parent reply other threads:[~2024-07-05 12:51 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-07-02 12:19 [PATCH bpf-next v6 0/3] Add 12-argument support for RV64 bpf trampoline Pu Lehui
2024-07-02 12:19 ` Pu Lehui
2024-07-02 12:19 ` [PATCH bpf-next v6 1/3] riscv, bpf: " Pu Lehui
2024-07-02 12:19 ` Pu Lehui
2024-07-02 13:40 ` Puranjay Mohan
2024-07-02 13:40 ` Puranjay Mohan
2024-07-05 12:51 ` Puranjay Mohan [this message]
2024-07-05 12:51 ` Puranjay Mohan
2024-07-06 2:28 ` Pu Lehui
2024-07-06 2:28 ` Pu Lehui
2024-07-02 12:19 ` [PATCH bpf-next v6 2/3] selftests/bpf: Factor out many args tests from tracing_struct Pu Lehui
2024-07-02 12:19 ` Pu Lehui
2024-07-02 12:19 ` [PATCH bpf-next v6 3/3] selftests/bpf: Add testcase where 7th argment is struct Pu Lehui
2024-07-02 12:19 ` Pu Lehui
2024-07-02 13:57 ` [PATCH bpf-next v6 0/3] Add 12-argument support for RV64 bpf trampoline Jiri Olsa
2024-07-02 13:57 ` Jiri Olsa
2024-07-02 14:10 ` patchwork-bot+netdevbpf
2024-07-02 14:10 ` patchwork-bot+netdevbpf
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=mb61p7ce0roh3.fsf@kernel.org \
--to=puranjay@kernel.org \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bjorn@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=haoluo@google.com \
--cc=john.fastabend@gmail.com \
--cc=jolsa@kernel.org \
--cc=kpsingh@kernel.org \
--cc=linux-riscv@lists.infradead.org \
--cc=martin.lau@linux.dev \
--cc=netdev@vger.kernel.org \
--cc=palmer@dabbelt.com \
--cc=pulehui@huawei.com \
--cc=pulehui@huaweicloud.com \
--cc=sdf@google.com \
--cc=song@kernel.org \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.