From: Kui-Feng Lee <sinquersw@gmail.com>
To: Martin KaFai Lau <martin.lau@linux.dev>, thinker.li@gmail.com
Cc: kuifeng@meta.com, bpf@vger.kernel.org, ast@kernel.org,
song@kernel.org, kernel-team@meta.com, andrii@kernel.org
Subject: Re: [PATCH bpf-next v2 2/3] bpf: struct_ops supports more than one page for trampolines.
Date: Thu, 22 Feb 2024 17:35:10 -0800 [thread overview]
Message-ID: <3e4cc350-34c9-42c1-944f-303a466022d2@gmail.com> (raw)
In-Reply-To: <c59cc446-531b-4b4a-897d-3b298ac72dd2@linux.dev>
On 2/22/24 16:33, Martin KaFai Lau wrote:
> On 2/21/24 2:59 PM, thinker.li@gmail.com wrote:
>> @@ -531,10 +567,10 @@ static long
>> bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> const struct btf_type *module_type;
>> const struct btf_member *member;
>> const struct btf_type *t = st_ops_desc->type;
>> + void *image = NULL, *image_end = NULL;
>> struct bpf_tramp_links *tlinks;
>> void *udata, *kdata;
>> int prog_fd, err;
>> - void *image, *image_end;
>> u32 i;
>> if (flags)
>> @@ -573,15 +609,14 @@ static long
>> bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> udata = &uvalue->data;
>> kdata = &kvalue->data;
>> - image = st_map->image;
>> - image_end = st_map->image + PAGE_SIZE;
>> module_type = btf_type_by_id(btf_vmlinux,
>> st_ops_ids[IDX_MODULE_ID]);
>> for_each_member(i, t, member) {
>> const struct btf_type *mtype, *ptype;
>> struct bpf_prog *prog;
>> struct bpf_tramp_link *link;
>> - u32 moff;
>> + u32 moff, tflags;
>> + int tsize;
>> moff = __btf_member_bit_offset(t, member) / 8;
>> ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
>> @@ -653,10 +688,38 @@ static long
>> bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> &bpf_struct_ops_link_lops, prog);
>> st_map->links[i] = &link->link;
>> - err = bpf_struct_ops_prepare_trampoline(tlinks, link,
>> - &st_ops->func_models[i],
>> - *(void **)(st_ops->cfi_stubs + moff),
>> - image, image_end);
>> + tflags = BPF_TRAMP_F_INDIRECT;
>> + if (st_ops->func_models[i].ret_size > 0)
>> + tflags |= BPF_TRAMP_F_RET_FENTRY_RET;
>> +
>> + /* Compute the size of the trampoline */
>> + tlinks[BPF_TRAMP_FENTRY].links[0] = link;
>> + tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
>> + tsize = arch_bpf_trampoline_size(&st_ops->func_models[i],
>> + tflags, tlinks, NULL);
>> + if (tsize < 0) {
>> + err = tsize;
>> + goto reset_unlock;
>> + }
>> +
>> + /* Allocate pages */
>> + if (tsize > (unsigned long)image_end - (unsigned long)image) {
>> + if (tsize > PAGE_SIZE) {
>> + err = -E2BIG;
>> + goto reset_unlock;
>> + }
>> + image = bpf_struct_ops_map_inc_image(st_map);
>> + if (IS_ERR(image)) {
>> + err = PTR_ERR(image);
>> + goto reset_unlock;
>> + }
>> + image_end = image + PAGE_SIZE;
>> + }
>> +
>> + err = arch_prepare_bpf_trampoline(NULL, image, image_end,
>> + &st_ops->func_models[i],
>> + tflags, tlinks,
>> + *(void **)(st_ops->cfi_stubs + moff));
>
> I don't prefer to copy the BPF_TRAMP_F_* setting on tflags, tlinks, and
> the arch_*_trampoline_*() logic from bpf_struct_ops_prepare_trampoline()
> which is used by the bpf_dummy_ops for testing also. Considering
> struct_ops supports kernel module now, in the future, it is better to
> move bpf_dummy_ops out to the bpf_testmod somehow and avoid its
> bpf_struct_ops_prepare_trampoline() usage. For now, it is still better
> to keep bpf_struct_ops_prepare_trampoline() to be reusable by both.
>
> Have you thought about the earlier suggestion in v1 to do
> arch_alloc_bpf_trampoline() in bpf_struct_ops_prepare_trampoline()
> instead of copying codes from bpf_struct_ops_prepare_trampoline() to
> bpf_struct_ops_map_update_elem()?
>
> Something like this (untested code):
>
> void *bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
> struct bpf_tramp_link *link,
> const struct btf_func_model *model,
> void *stub_func, void *image,
> u32 *image_off,
> bool allow_alloc)
How about pass a struct bpf_struct_ops_map to
bpf_struct_ops_prepare_trampoline(). If the pointer of struct
bpf_struct_ops_map is not NULL, try to allocate new pages for the map?
For example,
static int
_bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct btf_func_model *model,
void *stub_func, void *image,
void *image_end,
struct bpf_struct_ops_map *st_map)
{
...
if (!image || size > PAGE_SIZE - *image_off) {
if (!st_map)
return -E2BIG;
image = bpf_struct_ops_map_inc_image(st_map);
if (IS_ERR(image))
return PTR_ERR(image);
image_end = image + PAGE_SIZE;
}
...
}
int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
struct bpf_tramp_link *link,
const struct
btf_func_model*model,
void *stub_func, void *image,
void *image_end)
{
return _bpf_struct_ops_prepare_trampoline(tlinks, link, model,
stub_func, image,
image_end, NULL);
}
> {
> u32 flags = BPF_TRAMP_F_INDIRECT;
> void *new_image = NULL;
> int size;
>
> tlinks[BPF_TRAMP_FENTRY].links[0] = link;
> tlinks[BPF_TRAMP_FENTRY].nr_links = 1;
>
> if (model->ret_size > 0)
> flags |= BPF_TRAMP_F_RET_FENTRY_RET;
>
> size = arch_bpf_trampoline_size(model, flags, tlinks, NULL);
> if (size < 0)
> return ERR_PTR(size);
> if (!image || size > PAGE_SIZE - *image_off) {
> int err;
>
> if (!allow_alloc)
> return ERR_PTR(-E2BIG);
>
> err = bpf_jit_charge_modmem(PAGE_SIZE);
> if (err)
> return ERR_PTR(err);
>
> new_image = image = arch_alloc_bpf_trampoline(PAGE_SIZE);
> if (!new_image) {
> bpf_jit_uncharge_modmem(PAGE_SIZE);
> return ERR_PTR(-ENOMEM);
> }
> *image_off = 0;
> }
>
>
> size = arch_prepare_bpf_trampoline(NULL, image + *image_off,
> image + PAGE_SIZE,
> model, flags, tlinks, stub_func);
> if (size >= 0) {
> *image_off += size;
> return image;
> }
>
> if (new_image) {
> bpf_jit_uncharge_modmem(PAGE_SIZE);
> arch_free_bpf_trampoline(new_image, PAGE_SIZE);
> }
>
> return ERR_PTR(size);
> }
>
> ----
>
> pw-bot: cr
>
>> if (err < 0)
>> goto reset_unlock;
>> @@ -672,10 +735,11 @@ static long
>> bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>> if (err)
>> goto reset_unlock;
>> }
>> + for (i = 0; i < st_map->image_pages_cnt; i++)
>> + arch_protect_bpf_trampoline(st_map->image_pages[i], PAGE_SIZE);
>> if (st_map->map.map_flags & BPF_F_LINK) {
>> err = 0;
>> - arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
>> /* Let bpf_link handle registration & unregistration.
>> *
>> * Pair with smp_load_acquire() during lookup_elem().
>> @@ -684,7 +748,6 @@ static long bpf_struct_ops_map_update_elem(struct
>> bpf_map *map, void *key,
>> goto unlock;
>> }
>> - arch_protect_bpf_trampoline(st_map->image, PAGE_SIZE);
>> err = st_ops->reg(kdata);
>> if (likely(!err)) {
>> /* This refcnt increment on the map here after
>> @@ -707,9 +770,9 @@ static long bpf_struct_ops_map_update_elem(struct
>> bpf_map *map, void *key,
>> * there was a race in registering the struct_ops (under the
>> same name) to
>> * a sub-system through different struct_ops's maps.
>> */
>> - arch_unprotect_bpf_trampoline(st_map->image, PAGE_SIZE);
>> reset_unlock:
>> + bpf_struct_ops_map_free_image(st_map);
>> bpf_struct_ops_map_put_progs(st_map);
>> memset(uvalue, 0, map->value_size);
>> memset(kvalue, 0, map->value_size);
>> @@ -776,10 +839,7 @@ static void __bpf_struct_ops_map_free(struct
>> bpf_map *map)
>> if (st_map->links)
>> bpf_struct_ops_map_put_progs(st_map);
>> bpf_map_area_free(st_map->links);
>> - if (st_map->image) {
>> - arch_free_bpf_trampoline(st_map->image, PAGE_SIZE);
>> - bpf_jit_uncharge_modmem(PAGE_SIZE);
>> - }
>> + bpf_struct_ops_map_free_image(st_map);
>> bpf_map_area_free(st_map->uvalue);
>> bpf_map_area_free(st_map);
>> }
>> @@ -889,20 +949,6 @@ static struct bpf_map
>> *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>> st_map->st_ops_desc = st_ops_desc;
>> map = &st_map->map;
>> - ret = bpf_jit_charge_modmem(PAGE_SIZE);
>> - if (ret)
>> - goto errout_free;
>> -
>> - st_map->image = arch_alloc_bpf_trampoline(PAGE_SIZE);
>> - if (!st_map->image) {
>> - /* __bpf_struct_ops_map_free() uses st_map->image as flag
>> - * for "charged or not". In this case, we need to unchange
>> - * here.
>> - */
>> - bpf_jit_uncharge_modmem(PAGE_SIZE);
>> - ret = -ENOMEM;
>> - goto errout_free;
>> - }
>> st_map->uvalue = bpf_map_area_alloc(vt->size, NUMA_NO_NODE);
>> st_map->links_cnt = btf_type_vlen(t);
>> st_map->links =
>
next prev parent reply other threads:[~2024-02-23 1:35 UTC|newest]
Thread overview: 18+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-02-21 22:59 [PATCH bpf-next v2 0/3] Allow struct_ops maps with a large number of programs thinker.li
2024-02-21 22:59 ` [PATCH bpf-next v2 1/3] bpf, net: validate struct_ops when updating value thinker.li
2024-02-21 22:59 ` [PATCH bpf-next v2 2/3] bpf: struct_ops supports more than one page for trampolines thinker.li
2024-02-23 0:33 ` Martin KaFai Lau
2024-02-23 1:35 ` Kui-Feng Lee [this message]
2024-02-23 2:16 ` Martin KaFai Lau
2024-02-23 3:01 ` Kui-Feng Lee
2024-02-23 5:25 ` Martin KaFai Lau
2024-02-23 17:36 ` Kui-Feng Lee
2024-02-23 18:29 ` Kui-Feng Lee
2024-02-23 18:42 ` Martin KaFai Lau
2024-02-23 19:05 ` Kui-Feng Lee
2024-02-23 19:15 ` Martin KaFai Lau
2024-02-23 22:06 ` Kui-Feng Lee
2024-02-24 3:20 ` Martin KaFai Lau
2024-02-23 18:32 ` Martin KaFai Lau
2024-02-21 22:59 ` [PATCH bpf-next v2 3/3] selftests/bpf: Test struct_ops maps with a large number of program links thinker.li
2024-02-21 23:02 ` [PATCH bpf-next v2 0/3] Allow struct_ops maps with a large number of programs Kui-Feng Lee
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=3e4cc350-34c9-42c1-944f-303a466022d2@gmail.com \
--to=sinquersw@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=kernel-team@meta.com \
--cc=kuifeng@meta.com \
--cc=martin.lau@linux.dev \
--cc=song@kernel.org \
--cc=thinker.li@gmail.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox