Re: [PATCH bpf-next v5 3/8] bpf: Create links for BPF struct_ops maps.

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Martin KaFai Lau <martin.lau@linux.dev>
To: Kui-Feng Lee <kuifeng@meta.com>
Cc: bpf@vger.kernel.org, ast@kernel.org, song@kernel.org,
	kernel-team@meta.com, andrii@kernel.org, sdf@google.com
Subject: Re: [PATCH bpf-next v5 3/8] bpf: Create links for BPF struct_ops maps.
Date: Wed, 8 Mar 2023 12:04:55 -0800	[thread overview]
Message-ID: <0fcce83c-30f6-87d1-7ead-281fb154e589@linux.dev> (raw)
In-Reply-To: <20230308005050.255859-4-kuifeng@meta.com>

On 3/7/23 4:50 PM, Kui-Feng Lee wrote:
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 00b6e1a2edaf..afca6c526fe4 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1548,6 +1548,7 @@ static inline void bpf_module_put(const void *data, struct module *owner)
>   	else
>   		module_put(owner);
>   }
> +int bpf_struct_ops_link_create(union bpf_attr *attr);
>   
>   #ifdef CONFIG_NET
>   /* Define it here to avoid the use of forward declaration */
> @@ -1588,6 +1589,11 @@ static inline int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map,
>   {
>   	return -EINVAL;
>   }
> +static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
> +{
> +	return -EOPNOTSUPP;
> +}
> +
>   #endif
>   
>   #if defined(CONFIG_CGROUP_BPF) && defined(CONFIG_BPF_LSM)
> @@ -2379,6 +2385,11 @@ static inline void bpf_link_put(struct bpf_link *link)
>   {
>   }
>   
> +static inline int bpf_struct_ops_link_create(union bpf_attr *attr)
> +{
> +	return -EOPNOTSUPP;
> +}

The inline version is double defined. It does not look right. Please double check.

> +
>   static inline int bpf_obj_get_user(const char __user *pathname, int flags)
>   {
>   	return -EOPNOTSUPP;
> diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> index 976b194eb775..f9fc7b8af3c4 100644
> --- a/include/uapi/linux/bpf.h
> +++ b/include/uapi/linux/bpf.h
> @@ -1033,6 +1033,7 @@ enum bpf_attach_type {
>   	BPF_PERF_EVENT,
>   	BPF_TRACE_KPROBE_MULTI,
>   	BPF_LSM_CGROUP,
> +	BPF_STRUCT_OPS,
>   	__MAX_BPF_ATTACH_TYPE
>   };
>   
> @@ -1266,6 +1267,9 @@ enum {
>   
>   /* Create a map that is suitable to be an inner map with dynamic max entries */
>   	BPF_F_INNER_MAP		= (1U << 12),
> +
> +/* Create a map that will be registered/unregesitered by the backed bpf_link */
> +	BPF_F_LINK		= (1U << 13),
>   };
>   
>   /* Flags for BPF_PROG_QUERY. */
> @@ -1507,7 +1511,10 @@ union bpf_attr {
>   	} task_fd_query;
>   
>   	struct { /* struct used by BPF_LINK_CREATE command */
> -		__u32		prog_fd;	/* eBPF program to attach */
> +		union {
> +			__u32		prog_fd;	/* eBPF program to attach */
> +			__u32		map_fd;		/* struct_ops to attach */
> +		};
>   		union {
>   			__u32		target_fd;	/* object to attach to */
>   			__u32		target_ifindex; /* target ifindex */
> @@ -6379,6 +6386,9 @@ struct bpf_link_info {
>   		struct {
>   			__u32 ifindex;
>   		} xdp;
> +		struct {
> +			__u32 map_id;
> +		} struct_ops;
>   	};
>   } __attribute__((aligned(8)));
>   
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index 9e097fcc9cf4..5a7e86cf67b5 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -16,6 +16,7 @@ enum bpf_struct_ops_state {
>   	BPF_STRUCT_OPS_STATE_INIT,
>   	BPF_STRUCT_OPS_STATE_INUSE,
>   	BPF_STRUCT_OPS_STATE_TOBEFREE,
> +	BPF_STRUCT_OPS_STATE_READY,
>   };
>   
>   #define BPF_STRUCT_OPS_COMMON_VALUE			\
> @@ -58,6 +59,11 @@ struct bpf_struct_ops_map {
>   	struct bpf_struct_ops_value kvalue;
>   };
>   
> +struct bpf_struct_ops_link {
> +	struct bpf_link link;
> +	struct bpf_map __rcu *map;
> +};
> +
>   static DEFINE_MUTEX(update_mutex);
>   
>   #define VALUE_PREFIX "bpf_struct_ops_"
> @@ -496,11 +502,24 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   		*(unsigned long *)(udata + moff) = prog->aux->id;
>   	}
>   
> -	bpf_map_inc(map);
> -
>   	set_memory_rox((long)st_map->image, 1);
> +	if (st_map->map.map_flags & BPF_F_LINK) {
> +		if (st_ops->validate) {
> +			err = st_ops->validate(kdata);
> +			if (err)
> +				goto unlock;

This should at least be 'goto reset_unlock' to release the progs.

set_memory_rox(..., 1) should also be done after validate?

> +		}
> +		/* Let bpf_link handle registration & unregistration.
> +		 *
> +		 * Pair with smp_load_acquire() during lookup_elem().
> +		 */
> +		smp_store_release(&kvalue->state, BPF_STRUCT_OPS_STATE_READY);
> +		goto unlock;
> +	}
> +
>   	err = st_ops->reg(kdata);
>   	if (likely(!err)) {
> +		bpf_map_inc(map);
>   		/* Pair with smp_load_acquire() during lookup_elem().
>   		 * It ensures the above udata updates (e.g. prog->aux->id)
>   		 * can be seen once BPF_STRUCT_OPS_STATE_INUSE is set.
> @@ -516,7 +535,6 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   	 */
>   	set_memory_nx((long)st_map->image, 1);
>   	set_memory_rw((long)st_map->image, 1);
> -	bpf_map_put(map);
>   
>   reset_unlock:
>   	bpf_struct_ops_map_put_progs(st_map);
> @@ -534,6 +552,9 @@ static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
>   	struct bpf_struct_ops_map *st_map;
>   
>   	st_map = (struct bpf_struct_ops_map *)map;
> +	if (st_map->map.map_flags & BPF_F_LINK)
> +		return -EOPNOTSUPP;
> +
>   	prev_state = cmpxchg(&st_map->kvalue.state,
>   			     BPF_STRUCT_OPS_STATE_INUSE,
>   			     BPF_STRUCT_OPS_STATE_TOBEFREE);
> @@ -601,7 +622,7 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
>   static int bpf_struct_ops_map_alloc_check(union bpf_attr *attr)
>   {
>   	if (attr->key_size != sizeof(unsigned int) || attr->max_entries != 1 ||
> -	    attr->map_flags || !attr->btf_vmlinux_value_type_id)
> +	    (attr->map_flags & ~BPF_F_LINK) || !attr->btf_vmlinux_value_type_id)
>   		return -EINVAL;
>   	return 0;
>   }
> @@ -712,3 +733,98 @@ void bpf_struct_ops_put(const void *kdata)
>   
>   	bpf_map_put(&st_map->map);
>   }
> +
> +static void bpf_struct_ops_map_link_dealloc(struct bpf_link *link)
> +{
> +	struct bpf_struct_ops_link *st_link;
> +	struct bpf_struct_ops_map *st_map;
> +
> +	st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	st_map = (struct bpf_struct_ops_map *)st_link->map;

/* protected by refcnt and no one is replacing it */
rcu_dereference_protected(st_link->map, true);

st_link->map is with __rcu. It should have warning when compile with 'make C=1 
...'. Patchwork also reports this: 
https://patchwork.kernel.org/project/netdevbpf/patch/20230308005050.255859-4-kuifeng@meta.com/. 
Please pay attention to patchwork for errors.

> +	st_map->st_ops->unreg(&st_map->kvalue.data);
> +	bpf_map_put(st_link->map);

Same here. Reading __rcu pointer without rcu_dereference_xxx.

or simply use &st_map->map here. Otherwise, it will also have type mismatch warning.

> +	kfree(st_link);
> +}
> +
> +static void bpf_struct_ops_map_link_show_fdinfo(const struct bpf_link *link,
> +					    struct seq_file *seq)
> +{
> +	struct bpf_struct_ops_link *st_link;
> +	struct bpf_map *map;
> +
> +	st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	rcu_read_lock();
> +	map = rcu_dereference(st_link->map);
> +	if (map)

map cannot be NULL?

> +		seq_printf(seq, "map_id:\t%d\n", map->id);
> +	rcu_read_unlock();
> +}
> +
> +static int bpf_struct_ops_map_link_fill_link_info(const struct bpf_link *link,
> +					       struct bpf_link_info *info)
> +{
> +	struct bpf_struct_ops_link *st_link;
> +	struct bpf_map *map;
> +
> +	st_link = container_of(link, struct bpf_struct_ops_link, link);
> +	rcu_read_lock();
> +	map = rcu_dereference(st_link->map);
> +	if (map)

Same here.

> +		info->struct_ops.map_id = map->id;
> +	rcu_read_unlock();
> +	return 0;
> +}
> +
> +static const struct bpf_link_ops bpf_struct_ops_map_lops = {
> +	.dealloc = bpf_struct_ops_map_link_dealloc,
> +	.show_fdinfo = bpf_struct_ops_map_link_show_fdinfo,
> +	.fill_link_info = bpf_struct_ops_map_link_fill_link_info,
> +};
> +
> +int bpf_struct_ops_link_create(union bpf_attr *attr)
> +{
> +	struct bpf_struct_ops_link *link = NULL;
> +	struct bpf_link_primer link_primer;
> +	struct bpf_struct_ops_map *st_map;
> +	struct bpf_map *map;
> +	int err;
> +
> +	map = bpf_map_get(attr->link_create.map_fd);
> +	if (!map)
> +		return -EINVAL;
> +
> +	st_map = (struct bpf_struct_ops_map *)map;
> +
> +	if (map->map_type != BPF_MAP_TYPE_STRUCT_OPS || !(map->map_flags & BPF_F_LINK) ||
> +	    /* Pair with smp_store_release() during map_update */
> +	    smp_load_acquire(&st_map->kvalue.state) != BPF_STRUCT_OPS_STATE_READY) {
> +		err = -EINVAL;
> +		goto err_out;
> +	}
> +
> +	link = kzalloc(sizeof(*link), GFP_USER);
> +	if (!link) {
> +		err = -ENOMEM;
> +		goto err_out;
> +	}
> +	bpf_link_init(&link->link, BPF_LINK_TYPE_STRUCT_OPS, &bpf_struct_ops_map_lops, NULL);
> +	RCU_INIT_POINTER(link->map, map);
> +
> +	err = bpf_link_prime(&link->link, &link_primer);
> +	if (err)
> +		goto err_out;
> +
> +	err = st_map->st_ops->reg(st_map->kvalue.data);
> +	if (err) {
> +		bpf_link_cleanup(&link_primer);
> +		goto err_out;
> +	}
> +
> +	return bpf_link_settle(&link_primer);
> +
> +err_out:
> +	bpf_map_put(map);
> +	kfree(link);
> +	return err;
> +}
> +

next prev parent reply	other threads:[~2023-03-08 20:05 UTC|newest]

Thread overview: 28+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-08  0:50 [PATCH bpf-next v5 0/8] Transit between BPF TCP congestion controls Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 1/8] bpf: Retire the struct_ops map kvalue->refcnt Kui-Feng Lee
2023-03-08 18:30   ` Martin KaFai Lau
2023-03-08 22:30     ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 2/8] net: Update an existing TCP congestion control algorithm Kui-Feng Lee
2023-03-08 18:59   ` Martin KaFai Lau
2023-03-08 22:51     ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 3/8] bpf: Create links for BPF struct_ops maps Kui-Feng Lee
2023-03-08 15:01   ` kernel test robot
2023-03-08 15:32   ` kernel test robot
2023-03-08 16:03   ` kernel test robot
2023-03-08 20:04   ` Martin KaFai Lau [this message]
2023-03-08 23:46     ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 4/8] libbpf: Create a bpf_link in bpf_map__attach_struct_ops() Kui-Feng Lee
2023-03-08 21:42   ` Martin KaFai Lau
2023-03-09  0:22     ` Kui-Feng Lee
2023-03-09 17:09       ` Martin KaFai Lau
2023-03-09 18:16         ` Kui-Feng Lee
2023-03-09 18:19           ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 5/8] bpf: Update the struct_ops of a bpf_link Kui-Feng Lee
2023-03-08 22:21   ` Martin KaFai Lau
2023-03-09  3:09     ` Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 6/8] libbpf: Update a bpf_link with another struct_ops Kui-Feng Lee
2023-03-08 22:32   ` Martin KaFai Lau
2023-03-08  0:50 ` [PATCH bpf-next v5 7/8] libbpf: Use .struct_ops.link section to indicate a struct_ops with a link Kui-Feng Lee
2023-03-08  0:50 ` [PATCH bpf-next v5 8/8] selftests/bpf: Test switching TCP Congestion Control algorithms Kui-Feng Lee
2023-03-08 23:10   ` Martin KaFai Lau
2023-03-09  3:34     ` Kui-Feng Lee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=0fcce83c-30f6-87d1-7ead-281fb154e589@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kuifeng@meta.com \
    --cc=sdf@google.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.