Re: [PATCH bpf-next 3/7] bpf: Register and unregister a struct_ops by their bpf_links.

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Stanislav Fomichev <sdf@google.com>
To: Kui-Feng Lee <kuifeng@meta.com>
Cc: bpf@vger.kernel.org, ast@kernel.org, martin.lau@linux.dev,
	song@kernel.org, kernel-team@meta.com, andrii@kernel.org
Subject: Re: [PATCH bpf-next 3/7] bpf: Register and unregister a struct_ops by their bpf_links.
Date: Tue, 14 Feb 2023 18:53:24 -0800	[thread overview]
Message-ID: <Y+xJJLAhPBzboOvo@google.com> (raw)
In-Reply-To: <20230214221718.503964-4-kuifeng@meta.com>

On 02/14, Kui-Feng Lee wrote:
> Registration via bpf_links ensures a uniform behavior, just like other
> BPF programs.  BPF struct_ops were registered/unregistered when
> updating/deleting their values.  Only the maps of struct_ops having
> the BPF_F_LINK flag are allowed to back a bpf_link.

> Signed-off-by: Kui-Feng Lee <kuifeng@meta.com>
> ---
>   include/uapi/linux/bpf.h       |  3 ++
>   kernel/bpf/bpf_struct_ops.c    | 59 +++++++++++++++++++++++++++++++---
>   tools/include/uapi/linux/bpf.h |  3 ++
>   3 files changed, 61 insertions(+), 4 deletions(-)

> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 1e6cdd0f355d..48d8b3058aa1 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1267,6 +1267,9 @@ enum {

>   /* Create a map that is suitable to be an inner map with dynamic max  
> entries */
>   	BPF_F_INNER_MAP		= (1U << 12),
> +
> +/* Create a map that will be registered/unregesitered by the backed  
> bpf_link */
> +	BPF_F_LINK		= (1U << 13),
>   };

>   /* Flags for BPF_PROG_QUERY. */
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index 621c8e24481a..d16ca06cf09a 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -390,7 +390,7 @@ static int bpf_struct_ops_map_update_elem(struct  
> bpf_map *map, void *key,

>   	mutex_lock(&st_map->lock);

> -	if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT) {
> +	if (kvalue->state != BPF_STRUCT_OPS_STATE_INIT ||  
> refcount_read(&kvalue->refcnt)) {
>   		err = -EBUSY;
>   		goto unlock;
>   	}
> @@ -491,6 +491,12 @@ static int bpf_struct_ops_map_update_elem(struct  
> bpf_map *map, void *key,
>   		*(unsigned long *)(udata + moff) = prog->aux->id;
>   	}

> +	if (st_map->map.map_flags & BPF_F_LINK) {
> +		/* Let bpf_link handle registration & unregistration. */
> +		smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_INUSE);
> +		goto unlock;
> +	}
> +
>   	refcount_set(&kvalue->refcnt, 1);
>   	bpf_map_inc(map);


[..]

> @@ -522,6 +528,7 @@ static int bpf_struct_ops_map_update_elem(struct  
> bpf_map *map, void *key,
>   	kfree(tlinks);
>   	mutex_unlock(&st_map->lock);
>   	return err;
> +
>   }

>   static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)

Seems like a left over hunk?

> @@ -535,6 +542,8 @@ static int bpf_struct_ops_map_delete_elem(struct  
> bpf_map *map, void *key)
>   			     BPF_STRUCT_OPS_STATE_TOBEFREE);
>   	switch (prev_state) {
>   	case BPF_STRUCT_OPS_STATE_INUSE:
> +		if (st_map->map.map_flags & BPF_F_LINK)
> +			return 0;
>   		st_map->st_ops->unreg(&st_map->kvalue.data);
>   		if (refcount_dec_and_test(&st_map->kvalue.refcnt))
>   			bpf_map_put(map);
> @@ -585,7 +594,7 @@ static void bpf_struct_ops_map_free(struct bpf_map  
> *map)
>   static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
>   {
>   	if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
> -	    attr->map_flags || !attr->btf_vmlinux_value_type_id)
> +	    (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id)
>   		return -EINVAL;
>   	return 0;
>   }
> @@ -638,6 +647,8 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union  
> bpf_attr *attr)
>   	set_vm_flush_reset_perms(st_map->image);
>   	bpf_map_init_from_attr(map, attr);


[..]

> +	map->map_flags |= attr->map_flags & BPF_F_LINK;

You seem to have the following check above:

if (.... (attr->map_flags & ~BPF_F_LINK) ...) return -EINVAL;

And here you do:

map->map_flags |= attr->map_flags & BPF_F_LINK;

So maybe we can simplify to:
map->map_flags |= attr->map_flags;

?

> +
>   	return map;
>   }

> @@ -699,10 +710,25 @@ void bpf_struct_ops_put(const void *kdata)
>   	}
>   }

> +static void bpf_struct_ops_kvalue_put(struct bpf_struct_ops_value  
> *kvalue)
> +{
> +	struct bpf_struct_ops_map *st_map;
> +
> +	if (refcount_dec_and_test(&kvalue->refcnt)) {
> +		st_map = container_of(kvalue, struct bpf_struct_ops_map,
> +				      kvalue);
> +		bpf_map_put(&st_map->map);
> +	}
> +}
> +
>   static void bpf_struct_ops_map_link_release(struct bpf_link *link)
>   {
> +	struct bpf_struct_ops_map *st_map;
> +
>   	if (link->map) {
> -		bpf_map_put(link->map);
> +		st_map = (struct bpf_struct_ops_map *)link->map;
> +		st_map->st_ops->unreg(&st_map->kvalue.data);
> +		bpf_struct_ops_kvalue_put(&st_map->kvalue);
>   		link->map = NULL;
>   	}
>   }
> @@ -735,13 +761,15 @@ static const struct bpf_link_ops  
> bpf_struct_ops_map_lops = {

>   int link_create_struct_ops_map(union bpf_attr *attr, bpfptr_t uattr)
>   {
> +	struct bpf_struct_ops_map *st_map;
>   	struct bpf_link_primer link_primer;
> +	struct bpf_struct_ops_value *kvalue;
>   	struct bpf_map *map;
>   	struct bpf_link *link = NULL;
>   	int err;

>   	map = bpf_map_get(attr->link_create.prog_fd);
> -	if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS)
> +	if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS || !(map->map_flags &  
> BPF_F_LINK))
>   		return -EINVAL;

>   	link = kzalloc(sizeof(*link), GFP_USER);
> @@ -752,6 +780,29 @@ int link_create_struct_ops_map(union bpf_attr *attr,  
> bpfptr_t uattr)
>   	bpf_link_init(link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops,  
> NULL);
>   	link->map = map;


[..]

> +	if (map->map_flags & BPF_F_LINK) {

We seem to bail out above when we don't have BPF_F_LINK flags above?

if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS || !(map->map_flags &  
BPF_F_LINK))
	return -EINVAL;

So why check this 'if (map->map_flags & BPF_F_LINK)' condition here?


> +		st_map = (struct bpf_struct_ops_map *)map;
> +		kvalue = (struct bpf_struct_ops_value *)&st_map->kvalue;
> +
> +		if (kvalue->state != BPF_STRUCT_OPS_STATE_INUSE ||
> +		    refcount_read(&kvalue->refcnt) != 0) {
> +			err = -EINVAL;
> +			goto err_out;
> +		}
> +
> +		refcount_set(&kvalue->refcnt, 1);
> +
> +		set_memory_rox((long)st_map->image, 1);
> +		err = st_map->st_ops->reg(kvalue->data);
> +		if (err) {
> +			refcount_set(&kvalue->refcnt, 0);
> +
> +			set_memory_nx((long)st_map->image, 1);
> +			set_memory_rw((long)st_map->image, 1);
> +			goto err_out;
> +		}
> +	}
> +
>   	err = bpf_link_prime(link, &link_primer);
>   	if (err)
>   		goto err_out;
> diff --git a/tools/include/uapi/linux/bpf.h  
> b/tools/include/uapi/linux/bpf.h
> index 1e6cdd0f355d..48d8b3058aa1 100644
> --- a/tools/include/uapi/linux/bpf.h
> +++ b/tools/include/uapi/linux/bpf.h
> @@ -1267,6 +1267,9 @@ enum {

>   /* Create a map that is suitable to be an inner map with dynamic max  
> entries */
>   	BPF_F_INNER_MAP		= (1U << 12),
> +
> +/* Create a map that will be registered/unregesitered by the backed  
> bpf_link */
> +	BPF_F_LINK		= (1U << 13),
>   };

>   /* Flags for BPF_PROG_QUERY. */
> --
> 2.30.2

next prev parent reply	other threads:[~2023-02-15  2:53 UTC|newest]

Thread overview: 44+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-02-14 22:17 [PATCH bpf-next 0/7] Transit between BPF TCP congestion controls Kui-Feng Lee
2023-02-14 22:17 ` [PATCH bpf-next 1/7] bpf: Create links for BPF struct_ops maps Kui-Feng Lee
2023-02-15  0:26   ` kernel test robot
2023-02-15  2:39   ` Stanislav Fomichev
2023-02-15 18:04     ` Kui-Feng Lee
2023-02-15 18:44       ` Stanislav Fomichev
2023-02-15 20:24         ` Kui-Feng Lee
2023-02-15 21:28           ` Martin KaFai Lau
2023-02-15 20:30       ` Martin KaFai Lau
2023-02-15 20:55         ` Kui-Feng Lee
2023-02-15  9:32   ` kernel test robot
2023-02-15 22:58   ` Martin KaFai Lau
2023-02-16 17:59     ` Kui-Feng Lee
2023-02-14 22:17 ` [PATCH bpf-next 2/7] net: Update an existing TCP congestion control algorithm Kui-Feng Lee
2023-02-15  2:43   ` Stanislav Fomichev
2023-02-15 18:15     ` Kui-Feng Lee
2023-02-14 22:17 ` [PATCH bpf-next 3/7] bpf: Register and unregister a struct_ops by their bpf_links Kui-Feng Lee
2023-02-15  2:53   ` Stanislav Fomichev [this message]
2023-02-15 18:29     ` Kui-Feng Lee
2023-02-16  0:37   ` Martin KaFai Lau
2023-02-16 16:42     ` Kui-Feng Lee
2023-02-16 22:38       ` Martin KaFai Lau
2023-02-17 22:17         ` Kui-Feng Lee
2023-02-14 22:17 ` [PATCH bpf-next 4/7] libbpf: Create a bpf_link in bpf_map__attach_struct_ops() Kui-Feng Lee
2023-02-15  2:58   ` Stanislav Fomichev
2023-02-15 18:44     ` Kui-Feng Lee
2023-02-15 18:48       ` Stanislav Fomichev
2023-02-15 22:20         ` Kui-Feng Lee
2023-02-16 22:40   ` Andrii Nakryiko
2023-02-16 22:59     ` Martin KaFai Lau
2023-02-18  0:05     ` Kui-Feng Lee
2023-02-18  1:08       ` Andrii Nakryiko
2023-02-14 22:17 ` [PATCH bpf-next 5/7] bpf: Update the struct_ops of a bpf_link Kui-Feng Lee
2023-02-16  1:02   ` Martin KaFai Lau
2023-02-16 19:17     ` Kui-Feng Lee
2023-02-16 19:40       ` Martin KaFai Lau
2023-02-14 22:17 ` [PATCH bpf-next 6/7] libbpf: Update a bpf_link with another struct_ops Kui-Feng Lee
2023-02-16 22:48   ` Andrii Nakryiko
2023-02-18  0:22     ` Kui-Feng Lee
2023-02-18  1:10       ` Andrii Nakryiko
2023-02-21 22:20         ` Kui-Feng Lee
2023-02-14 22:17 ` [PATCH bpf-next 7/7] selftests/bpf: Test switching TCP Congestion Control algorithms Kui-Feng Lee
2023-02-16 22:50   ` Andrii Nakryiko
2023-02-18  0:23     ` Kui-Feng Lee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y+xJJLAhPBzboOvo@google.com \
    --to=sdf@google.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kuifeng@meta.com \
    --cc=martin.lau@linux.dev \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.