From: Martin KaFai Lau <martin.lau@linux.dev>
To: Xu Kuohai <xukuohai@huaweicloud.com>
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
Alexei Starovoitov <ast@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Andrii Nakryiko <andrii@kernel.org>,
Eduard Zingerman <eddyz87@gmail.com>,
Yonghong Song <yonghong.song@linux.dev>,
Kui-Feng Lee <thinker.li@gmail.com>
Subject: Re: [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline
Date: Mon, 11 Nov 2024 15:04:42 -0800 [thread overview]
Message-ID: <f35f4d0e-77df-4e52-b62e-9e1254fb4b5c@linux.dev> (raw)
In-Reply-To: <20241111121641.2679885-3-xukuohai@huaweicloud.com>
On 11/11/24 4:16 AM, Xu Kuohai wrote:
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index e99fce81e916..d6dd56fc80d8 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
>
> struct bpf_struct_ops_map {
> struct bpf_map map;
> - struct rcu_head rcu;
Since it needs a respin (more on it later), it will be useful to separate this
cleanup as a separate patch in the same patch series.
> const struct bpf_struct_ops_desc *st_ops_desc;
> /* protect map_update */
> struct mutex lock;
> @@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
> * (in kvalue.data).
> */
> struct bpf_link **links;
> + /* ksyms for bpf trampolines */
> + struct bpf_ksym **ksyms;
> u32 funcs_cnt;
> u32 image_pages_cnt;
> /* image_pages is an array of pages that has all the trampolines
> @@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
> return 0;
> }
>
> +static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
> + void *image, unsigned int size,
> + struct bpf_ksym *ksym)
> +{
> + snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
> + INIT_LIST_HEAD_RCU(&ksym->lnode);
> + bpf_image_ksym_init(image, size, ksym);
> +}
> +
> +static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
> +{
> + u32 i;
> +
> + for (i = 0; i < st_map->funcs_cnt; i++) {
> + if (!st_map->ksyms[i])
> + break;
> + bpf_image_ksym_add(st_map->ksyms[i]);
> + }
> +}
> +
> +static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
> +{
> + u32 i;
> +
> + for (i = 0; i < st_map->funcs_cnt; i++) {
> + if (!st_map->ksyms[i])
> + break;
> + bpf_image_ksym_del(st_map->ksyms[i]);
> + }
> +}
> +
> +static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
> +{
> + u32 i;
> +
> + for (i = 0; i < st_map->funcs_cnt; i++) {
> + if (!st_map->ksyms[i])
> + break;
> + kfree(st_map->ksyms[i]);
> + st_map->links[i] = NULL;
s/links/ksyms/
pw-bot: cr
> + }
> +}
> +
> static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> void *value, u64 flags)
> {
> @@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> u32 i, trampoline_start, image_off = 0;
> void *cur_image = NULL, *image = NULL;
> struct bpf_link **plink;
> + struct bpf_ksym **pksym;
> + const char *tname, *mname;
>
> if (flags)
> return -EINVAL;
> @@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> kdata = &kvalue->data;
>
> plink = st_map->links;
> + pksym = st_map->ksyms;
> + tname = btf_name_by_offset(st_map->btf, t->name_off);
> module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
> for_each_member(i, t, member) {
> const struct btf_type *mtype, *ptype;
> struct bpf_prog *prog;
> struct bpf_tramp_link *link;
> + struct bpf_ksym *ksym;
> u32 moff;
>
> moff = __btf_member_bit_offset(t, member) / 8;
> + mname = btf_name_by_offset(st_map->btf, member->name_off);
> ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
> if (ptype == module_type) {
> if (*(void **)(udata + moff))
> @@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> &bpf_struct_ops_link_lops, prog);
> *plink++ = &link->link;
>
> + ksym = kzalloc(sizeof(*ksym), GFP_USER);
link is also using kzalloc but probably both link and ksym allocation should use
bpf_map_kzalloc instead. This switch can be done for both together later as a
follow up patch.
> + if (!ksym) {
> + bpf_prog_put(prog);
afaik, this bpf_prog_put is not needed. The bpf_link_init above took the prog
ownership and the bpf_struct_ops_map_put_progs() at the error path will take
care of it.
> + err = -ENOMEM;
> + goto reset_unlock;
> + }
> + *pksym = ksym;
nit. Follow the *plink++ style above and does the same *pksym++ here.
> +
> trampoline_start = image_off;
> err = bpf_struct_ops_prepare_trampoline(tlinks, link,
> &st_ops->func_models[i],
> @@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>
> /* put prog_id to udata */
> *(unsigned long *)(udata + moff) = prog->aux->id;
> +
> + /* init ksym for this trampoline */
> + bpf_struct_ops_ksym_init(tname, mname,
> + image + trampoline_start,
> + image_off - trampoline_start,
> + *pksym++);
then uses "ksym" here.
> }
>
> if (st_ops->validate) {
> @@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> */
>
> reset_unlock:
> + bpf_struct_ops_map_free_ksyms(st_map);
> bpf_struct_ops_map_free_image(st_map);
> bpf_struct_ops_map_put_progs(st_map);
> memset(uvalue, 0, map->value_size);
> @@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
> unlock:
> kfree(tlinks);
> mutex_unlock(&st_map->lock);
> + if (!err)
> + bpf_struct_ops_map_ksyms_add(st_map);
> return err;
> }
>
> @@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>
> if (st_map->links)
> bpf_struct_ops_map_put_progs(st_map);
> + if (st_map->ksyms)
> + bpf_struct_ops_map_free_ksyms(st_map);
> bpf_map_area_free(st_map->links);
> + bpf_map_area_free(st_map->ksyms);
> bpf_struct_ops_map_free_image(st_map);
> bpf_map_area_free(st_map->uvalue);
> bpf_map_area_free(st_map);
> @@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
> if (btf_is_module(st_map->btf))
> module_put(st_map->st_ops_desc->st_ops->owner);
>
> + if (st_map->ksyms)
This null test should not be needed.
> + bpf_struct_ops_map_del_ksyms(st_map);
> +
> /* The struct_ops's function may switch to another struct_ops.
> *
> * For example, bpf_tcp_cc_x->init() may switch to
> @@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
> st_map->links =
> bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
> NUMA_NO_NODE);
> - if (!st_map->uvalue || !st_map->links) {
> +
> + st_map->ksyms =
> + bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
> + NUMA_NO_NODE);
.map_mem_usage at least needs to include the st_map->ksyms[] pointer array.
func_cnts should be used instead of btf_type_vlen(vt) for link also in
.map_mem_usage.
> + if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
> ret = -ENOMEM;
> goto errout_free;
> }
next prev parent reply other threads:[~2024-11-11 23:04 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-11-11 12:16 [PATCH bpf-next v3 0/2] Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 1/2] bpf: Use function pointers count as struct_ops links count Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 23:04 ` Martin KaFai Lau [this message]
2024-11-12 12:06 ` Xu Kuohai
2024-11-11 12:54 ` [PATCH bpf-next v3 0/2] " Xu Kuohai
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=f35f4d0e-77df-4e52-b62e-9e1254fb4b5c@linux.dev \
--to=martin.lau@linux.dev \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=netdev@vger.kernel.org \
--cc=thinker.li@gmail.com \
--cc=xukuohai@huaweicloud.com \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.