All of lore.kernel.org
 help / color / mirror / Atom feed
From: Martin KaFai Lau <martin.lau@linux.dev>
To: Kui-Feng Lee <kuifeng@meta.com>
Cc: bpf@vger.kernel.org, ast@kernel.org, song@kernel.org,
	kernel-team@meta.com, andrii@kernel.org, sdf@google.com
Subject: Re: [PATCH bpf-next v7 1/8] bpf: Retire the struct_ops map kvalue->refcnt.
Date: Fri, 17 Mar 2023 09:47:40 -0700	[thread overview]
Message-ID: <d77e2767-f8cf-14f0-a72b-47e9343ecc75@linux.dev> (raw)
In-Reply-To: <20230316023641.2092778-2-kuifeng@meta.com>

On 3/15/23 7:36 PM, Kui-Feng Lee wrote:
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -1943,6 +1943,7 @@ struct bpf_map *bpf_map_get_with_uref(u32 ufd);
>   struct bpf_map *__bpf_map_get(struct fd f);
>   void bpf_map_inc(struct bpf_map *map);
>   void bpf_map_inc_with_uref(struct bpf_map *map);
> +struct bpf_map *__bpf_map_inc_not_zero(struct bpf_map *map, bool uref);
>   struct bpf_map * __must_check bpf_map_inc_not_zero(struct bpf_map *map);
>   void bpf_map_put_with_uref(struct bpf_map *map);
>   void bpf_map_put(struct bpf_map *map);
> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index 38903fb52f98..2a854e9cee52 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -58,6 +58,8 @@ struct bpf_struct_ops_map {
>   	struct bpf_struct_ops_value kvalue;
>   };
>   
> +static DEFINE_MUTEX(update_mutex);

There has been a comment on the unused "update_mutex" since v3 and v5:

"...This is only used in patch 5 of this set. Please move it there..."


> +
>   #define VALUE_PREFIX "bpf_struct_ops_"
>   #define VALUE_PREFIX_LEN (sizeof(VALUE_PREFIX) - 1)
>   
> @@ -249,6 +251,7 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
>   	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
>   	struct bpf_struct_ops_value *uvalue, *kvalue;
>   	enum bpf_struct_ops_state state;
> +	s64 refcnt;
>   
>   	if (unlikely(*(u32 *)key != 0))
>   		return -ENOENT;
> @@ -267,7 +270,14 @@ int bpf_struct_ops_map_sys_lookup_elem(struct bpf_map *map, void *key,
>   	uvalue = value;
>   	memcpy(uvalue, st_map->uvalue, map->value_size);
>   	uvalue->state = state;
> -	refcount_set(&uvalue->refcnt, refcount_read(&kvalue->refcnt));
> +
> +	/* This value offers the user space a general estimate of how
> +	 * many sockets are still utilizing this struct_ops for TCP
> +	 * congestion control. The number might not be exact, but it
> +	 * should sufficiently meet our present goals.
> +	 */
> +	refcnt = atomic64_read(&map->refcnt) - atomic64_read(&map->usercnt);
> +	refcount_set(&uvalue->refcnt, max_t(s64, refcnt, 0));
>   
>   	return 0;
>   }
> @@ -491,7 +501,6 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
>   		*(unsigned long *)(udata + moff) = prog->aux->id;
>   	}
>   
> -	refcount_set(&kvalue->refcnt, 1);
>   	bpf_map_inc(map);
>   
>   	set_memory_rox((long)st_map->image, 1);
> @@ -536,8 +545,7 @@ static int bpf_struct_ops_map_delete_elem(struct bpf_map *map, void *key)
>   	switch (prev_state) {
>   	case BPF_STRUCT_OPS_STATE_INUSE:
>   		st_map->st_ops->unreg(&st_map->kvalue.data);
> -		if (refcount_dec_and_test(&st_map->kvalue.refcnt))
> -			bpf_map_put(map);
> +		bpf_map_put(map);
>   		return 0;
>   	case BPF_STRUCT_OPS_STATE_TOBEFREE:
>   		return -EINPROGRESS;
> @@ -570,7 +578,7 @@ static void bpf_struct_ops_map_seq_show_elem(struct bpf_map *map, void *key,
>   	kfree(value);
>   }
>   
> -static void bpf_struct_ops_map_free(struct bpf_map *map)
> +static void bpf_struct_ops_map_free_nosync(struct bpf_map *map)

nit. __bpf_struct_ops_map_free() is the usual alternative name to use in this case.

>   {
>   	struct bpf_struct_ops_map *st_map = (struct bpf_struct_ops_map *)map;
>   
> @@ -582,6 +590,25 @@ static void bpf_struct_ops_map_free(struct bpf_map *map)
>   	bpf_map_area_free(st_map);
>   }
>   
> +static void bpf_struct_ops_map_free(struct bpf_map *map)
> +{
> +	/* The struct_ops's function may switch to another struct_ops.
> +	 *
> +	 * For example, bpf_tcp_cc_x->init() may switch to
> +	 * another tcp_cc_y by calling
> +	 * setsockopt(TCP_CONGESTION, "tcp_cc_y").
> +	 * During the switch,  bpf_struct_ops_put(tcp_cc_x) is called
> +	 * and its refcount may reach 0 which then free its
> +	 * trampoline image while tcp_cc_x is still running.
> +	 *
> +	 * Thus, a rcu grace period is needed here.
> +	 */
> +	synchronize_rcu();
> +	synchronize_rcu_tasks();

synchronize_rcu_mult(call_rcu, call_rcu_tasks) to wait both in parallel (credit 
to Paul's tip).



  reply	other threads:[~2023-03-17 16:48 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-03-16  2:36 [PATCH bpf-next v7 0/8] Transit between BPF TCP congestion controls Kui-Feng Lee
2023-03-16  2:36 ` [PATCH bpf-next v7 1/8] bpf: Retire the struct_ops map kvalue->refcnt Kui-Feng Lee
2023-03-17 16:47   ` Martin KaFai Lau [this message]
2023-03-17 20:41     ` Kui-Feng Lee
2023-03-16  2:36 ` [PATCH bpf-next v7 2/8] net: Update an existing TCP congestion control algorithm Kui-Feng Lee
2023-03-17 15:23   ` Daniel Borkmann
2023-03-17 17:18     ` Martin KaFai Lau
2023-03-17 17:23       ` Daniel Borkmann
2023-03-17 21:46         ` Kui-Feng Lee
2023-03-17 23:07     ` Kui-Feng Lee
2023-03-16  2:36 ` [PATCH bpf-next v7 3/8] bpf: Create links for BPF struct_ops maps Kui-Feng Lee
2023-03-17 18:10   ` Martin KaFai Lau
2023-03-17 20:52     ` Kui-Feng Lee
2023-03-16  2:36 ` [PATCH bpf-next v7 4/8] libbpf: Create a bpf_link in bpf_map__attach_struct_ops() Kui-Feng Lee
2023-03-17 18:44   ` Martin KaFai Lau
2023-03-17 21:00     ` Kui-Feng Lee
2023-03-17 22:23   ` Andrii Nakryiko
2023-03-17 23:48     ` Kui-Feng Lee
2023-03-16  2:36 ` [PATCH bpf-next v7 5/8] bpf: Update the struct_ops of a bpf_link Kui-Feng Lee
2023-03-17 19:23   ` Martin KaFai Lau
2023-03-17 21:39     ` Kui-Feng Lee
2023-03-18  1:11     ` Kui-Feng Lee
2023-03-18  5:38       ` Martin KaFai Lau
2023-03-17 22:27   ` Andrii Nakryiko
2023-03-18  0:41     ` Kui-Feng Lee
2023-03-16  2:36 ` [PATCH bpf-next v7 6/8] libbpf: Update a bpf_link with another struct_ops Kui-Feng Lee
2023-03-17 19:42   ` Martin KaFai Lau
2023-03-17 21:40     ` Kui-Feng Lee
2023-03-17 22:33   ` Andrii Nakryiko
2023-03-18  1:17     ` Kui-Feng Lee
2023-03-16  2:36 ` [PATCH bpf-next v7 7/8] libbpf: Use .struct_ops.link section to indicate a struct_ops with a link Kui-Feng Lee
2023-03-17 22:35   ` Andrii Nakryiko
2023-03-16  2:36 ` [PATCH bpf-next v7 8/8] selftests/bpf: Test switching TCP Congestion Control algorithms Kui-Feng Lee

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d77e2767-f8cf-14f0-a72b-47e9343ecc75@linux.dev \
    --to=martin.lau@linux.dev \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=kernel-team@meta.com \
    --cc=kuifeng@meta.com \
    --cc=sdf@google.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.