netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Xu Kuohai <xukuohai@huaweicloud.com>
Cc: bpf@vger.kernel.org, netdev@vger.kernel.org,
	Alexei Starovoitov <ast@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Andrii Nakryiko <andrii@kernel.org>,
	Eduard Zingerman <eddyz87@gmail.com>,
	Yonghong Song <yonghong.song@linux.dev>,
	Kui-Feng Lee <thinker.li@gmail.com>
Subject: Re: [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline
Date: Mon, 11 Nov 2024 15:04:42 -0800	[thread overview]
Message-ID: <f35f4d0e-77df-4e52-b62e-9e1254fb4b5c@linux.dev> (raw)
In-Reply-To: <20241111121641.2679885-3-xukuohai@huaweicloud.com>

On 11/11/24 4:16 AM, Xu Kuohai wrote:
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index e99fce81e916..d6dd56fc80d8 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -23,7 +23,6 @@ struct bpf_struct_ops_value {
>   
>   struct bpf_struct_ops_map {
>   	struct bpf_map map;
> -	struct rcu_head rcu;

Since it needs a respin (more on it later), it will be useful to separate this 
cleanup as a separate patch in the same patch series.

>   	const struct bpf_struct_ops_desc *st_ops_desc;
>   	/* protect map_update */
>   	struct mutex lock;
> @@ -32,6 +31,8 @@ struct bpf_struct_ops_map {
>   	 * (in kvalue.data).
>   	 */
>   	struct bpf_link **links;
> +	/* ksyms for bpf trampolines */
> +	struct bpf_ksym **ksyms;
>   	u32 funcs_cnt;
>   	u32 image_pages_cnt;
>   	/* image_pages is an array of pages that has all the trampolines
> @@ -586,6 +587,49 @@ int bpf_struct_ops_prepare_trampoline(struct bpf_tramp_links *tlinks,
>   	return 0;
>   }
>   
> +static void bpf_struct_ops_ksym_init(const char *tname, const char *mname,
> +				     void *image, unsigned int size,
> +				     struct bpf_ksym *ksym)
> +{
> +	snprintf(ksym->name, KSYM_NAME_LEN, "bpf__%s_%s", tname, mname);
> +	INIT_LIST_HEAD_RCU(&ksym->lnode);
> +	bpf_image_ksym_init(image, size, ksym);
> +}
> +
> +static void bpf_struct_ops_map_ksyms_add(struct bpf_struct_ops_map *st_map)
> +{
> +	u32 i;
> +
> +	for (i = 0; i < st_map->funcs_cnt; i++) {
> +		if (!st_map->ksyms[i])
> +			break;
> +		bpf_image_ksym_add(st_map->ksyms[i]);
> +	}
> +}
> +
> +static void bpf_struct_ops_map_del_ksyms(struct bpf_struct_ops_map *st_map)
> +{
> +	u32 i;
> +
> +	for (i = 0; i < st_map->funcs_cnt; i++) {
> +		if (!st_map->ksyms[i])
> +			break;
> +		bpf_image_ksym_del(st_map->ksyms[i]);
> +	}
> +}
> +
> +static void bpf_struct_ops_map_free_ksyms(struct bpf_struct_ops_map *st_map)
> +{
> +	u32 i;
> +
> +	for (i = 0; i < st_map->funcs_cnt; i++) {
> +		if (!st_map->ksyms[i])
> +			break;
> +		kfree(st_map->ksyms[i]);
> +		st_map->links[i] = NULL;

s/links/ksyms/

pw-bot: cr

> +	}
> +}
> +
>   static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   					   void *value, u64 flags)
>   {
> @@ -602,6 +646,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   	u32 i, trampoline_start, image_off = 0;
>   	void *cur_image = NULL, *image = NULL;
>   	struct bpf_link **plink;
> +	struct bpf_ksym **pksym;
> +	const char *tname, *mname;
>   
>   	if (flags)
>   		return -EINVAL;
> @@ -641,14 +687,18 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   	kdata = &kvalue->data;
>   
>   	plink = st_map->links;
> +	pksym = st_map->ksyms;
> +	tname = btf_name_by_offset(st_map->btf, t->name_off);
>   	module_type = btf_type_by_id(btf_vmlinux, st_ops_ids[IDX_MODULE_ID]);
>   	for_each_member(i, t, member) {
>   		const struct btf_type *mtype, *ptype;
>   		struct bpf_prog *prog;
>   		struct bpf_tramp_link *link;
> +		struct bpf_ksym *ksym;
>   		u32 moff;
>   
>   		moff = __btf_member_bit_offset(t, member) / 8;
> +		mname = btf_name_by_offset(st_map->btf, member->name_off);
>   		ptype = btf_type_resolve_ptr(st_map->btf, member->type, NULL);
>   		if (ptype == module_type) {
>   			if (*(void **)(udata + moff))
> @@ -718,6 +768,14 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   			      &bpf_struct_ops_link_lops, prog);
>   		*plink++ = &link->link;
>   
> +		ksym = kzalloc(sizeof(*ksym), GFP_USER);

link is also using kzalloc but probably both link and ksym allocation should use 
bpf_map_kzalloc instead. This switch can be done for both together later as a 
follow up patch.

> +		if (!ksym) {
> +			bpf_prog_put(prog);

afaik, this bpf_prog_put is not needed. The bpf_link_init above took the prog 
ownership and the bpf_struct_ops_map_put_progs() at the error path will take 
care of it.

> +			err = -ENOMEM;
> +			goto reset_unlock;
> +		}
> +		*pksym = ksym;

nit. Follow the *plink++ style above and does the same *pksym++ here.

> +
>   		trampoline_start = image_off;
>   		err = bpf_struct_ops_prepare_trampoline(tlinks, link,
>   						&st_ops->func_models[i],
> @@ -737,6 +795,12 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   
>   		/* put prog_id to udata */
>   		*(unsigned long *)(udata + moff) = prog->aux->id;
> +
> +		/* init ksym for this trampoline */
> +		bpf_struct_ops_ksym_init(tname, mname,
> +					 image + trampoline_start,
> +					 image_off - trampoline_start,
> +					 *pksym++);

then uses "ksym" here.

>   	}
>   
>   	if (st_ops->validate) {
> @@ -785,6 +849,7 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   	 */
>   
>   reset_unlock:
> +	bpf_struct_ops_map_free_ksyms(st_map);
>   	bpf_struct_ops_map_free_image(st_map);
>   	bpf_struct_ops_map_put_progs(st_map);
>   	memset(uvalue, 0, map->value_size);
> @@ -792,6 +857,8 @@ static long bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   unlock:
>   	kfree(tlinks);
>   	mutex_unlock(&st_map->lock);
> +	if (!err)
> +		bpf_struct_ops_map_ksyms_add(st_map);
>   	return err;
>   }
>   
> @@ -851,7 +918,10 @@ static void __bpf_struct_ops_map_free(struct bpf_map *map)
>   
>   	if (st_map->links)
>   		bpf_struct_ops_map_put_progs(st_map);
> +	if (st_map->ksyms)
> +		bpf_struct_ops_map_free_ksyms(st_map);
>   	bpf_map_area_free(st_map->links);
> +	bpf_map_area_free(st_map->ksyms);
>   	bpf_struct_ops_map_free_image(st_map);
>   	bpf_map_area_free(st_map->uvalue);
>   	bpf_map_area_free(st_map);
> @@ -868,6 +938,9 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
>   	if (btf_is_module(st_map->btf))
>   		module_put(st_map->st_ops_desc->st_ops->owner);
>   
> +	if (st_map->ksyms)

This null test should not be needed.

> +		bpf_struct_ops_map_del_ksyms(st_map);
> +
>   	/* The struct_ops's function may switch to another struct_ops.
>   	 *
>   	 * For example, bpf_tcp_cc_x->init() may switch to
> @@ -980,7 +1053,11 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
>   	st_map->links =
>   		bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_links *),
>   				   NUMA_NO_NODE);
> -	if (!st_map->uvalue || !st_map->links) {
> +
> +	st_map->ksyms =
> +		bpf_map_area_alloc(st_map->funcs_cnt * sizeof(struct bpf_ksyms *),
> +				   NUMA_NO_NODE);

.map_mem_usage at least needs to include the st_map->ksyms[] pointer array. 
func_cnts should be used instead of btf_type_vlen(vt) for link also in 
.map_mem_usage.

> +	if (!st_map->uvalue || !st_map->links || !st_map->ksyms) {
>   		ret = -ENOMEM;
>   		goto errout_free;
>   	}

  reply	other threads:[~2024-11-11 23:04 UTC|newest]

Thread overview: 6+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-11-11 12:16 [PATCH bpf-next v3 0/2] Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 1/2] bpf: Use function pointers count as struct_ops links count Xu Kuohai
2024-11-11 12:16 ` [PATCH bpf-next v3 2/2] bpf: Add kernel symbol for struct_ops trampoline Xu Kuohai
2024-11-11 23:04   ` Martin KaFai Lau [this message]
2024-11-12 12:06     ` Xu Kuohai
2024-11-11 12:54 ` [PATCH bpf-next v3 0/2] " Xu Kuohai

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=f35f4d0e-77df-4e52-b62e-9e1254fb4b5c@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=netdev@vger.kernel.org \
    --cc=thinker.li@gmail.com \
    --cc=xukuohai@huaweicloud.com \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).