public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
From: "Emil Tsalapatis" <emil@etsalapatis.com>
To: "Mykyta Yatsenko" <mykyta.yatsenko5@gmail.com>,
	<bpf@vger.kernel.org>, <ast@kernel.org>, <andrii@kernel.org>,
	<daniel@iogearbox.net>, <kafai@meta.com>, <kernel-team@meta.com>,
	<eddyz87@gmail.com>, <memxor@gmail.com>,
	<herbert@gondor.apana.org.au>
Cc: "Mykyta Yatsenko" <yatsenko@meta.com>
Subject: Re: [PATCH RFC bpf-next v2 03/18] bpf: Implement lookup, delete, update for resizable hashtab
Date: Mon, 13 Apr 2026 16:37:00 -0400	[thread overview]
Message-ID: <DHSBDLRKCAQO.1IPJTSEMPESV7@etsalapatis.com> (raw)
In-Reply-To: <20260408-rhash-v2-3-3b3675da1f6e@meta.com>

On Wed Apr 8, 2026 at 11:10 AM EDT, Mykyta Yatsenko wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> Use rhashtable_lookup_likely() for lookups, rhashtable_remove_fast()
> for deletes, and rhashtable_lookup_get_insert_fast() for inserts.
>
> Updates modify values in place under RCU rather than allocating a
> new element and swapping the pointer (as regular htab does). This
> trades read consistency for performance: concurrent readers may
> see partial updates. Users requiring consistent reads should use
> BPF_F_LOCK.
>
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> ---
>  kernel/bpf/hashtab.c | 141 ++++++++++++++++++++++++++++++++++++++++++++++++---
>  1 file changed, 134 insertions(+), 7 deletions(-)
>
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index 9e7806814fec..ea7314cc3703 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -2755,6 +2755,11 @@ struct bpf_rhtab {
>  	u32 elem_size;
>  };
>  
> +static inline void *rhtab_elem_value(struct rhtab_elem *l, u32 key_size)
> +{
> +	return l->data + round_up(key_size, 8);
> +}
> +
>  static struct bpf_map *rhtab_map_alloc(union bpf_attr *attr)
>  {
>  	return ERR_PTR(-EOPNOTSUPP);
> @@ -2769,33 +2774,155 @@ static void rhtab_map_free(struct bpf_map *map)
>  {
>  }
>  
> +static void *rhtab_lookup_elem(struct bpf_map *map, void *key)
> +{
> +	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
> +	/* Using constant zeroed params to force rhashtable use inlined hashfunc */
> +	static const struct rhashtable_params params = { 0 };
> +
> +	return rhashtable_lookup_likely(&rhtab->ht, key, params);
> +}
> +
>  static void *rhtab_map_lookup_elem(struct bpf_map *map, void *key) __must_hold(RCU)
>  {
> -	return NULL;
> +	struct rhtab_elem *l;
> +
> +	l = rhtab_lookup_elem(map, key);
> +	return l ? rhtab_elem_value(l, map->key_size) : NULL;
> +}
> +
> +static int rhtab_delete_elem(struct bpf_rhtab *rhtab, struct rhtab_elem *elem)
> +{
> +	int err;
> +
> +	err = rhashtable_remove_fast(&rhtab->ht, &elem->node, rhtab->params);
> +	if (err)
> +		return err;
> +
> +	bpf_map_free_internal_structs(&rhtab->map, rhtab_elem_value(elem, rhtab->map.key_size));
> +	bpf_mem_cache_free_rcu(&rhtab->ma, elem);
> +	return 0;
>  }
>  
>  static long rhtab_map_delete_elem(struct bpf_map *map, void *key)
>  {
> -	return -EOPNOTSUPP;
> +	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
> +	struct rhtab_elem *l;
> +
> +	guard(rcu)();
> +	l = rhtab_lookup_elem(map, key);
> +	return l ? rhtab_delete_elem(rhtab, l) : -ENOENT;
> +}
> +
> +static void rhtab_read_elem_value(struct bpf_map *map, void *dst, struct rhtab_elem *elem,
> +				  u64 flags)
> +{
> +	void *src = rhtab_elem_value(elem, map->key_size);
> +
> +	if (flags & BPF_F_LOCK)
> +		copy_map_value_locked(map, dst, src, true);
> +	else
> +		copy_map_value(map, dst, src);
>  }
>  
>  static int rhtab_map_lookup_and_delete_elem(struct bpf_map *map, void *key, void *value, u64 flags)
>  {
> -	return -EOPNOTSUPP;
> +	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
> +	struct rhtab_elem *l;
> +	int err;
> +
> +	if ((flags & ~BPF_F_LOCK) ||
> +	    ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)))
> +		return -EINVAL;
> +
> +	/* Make sure element is not deleted between lookup and copy */
> +	guard(rcu)();
> +
> +	l = rhtab_lookup_elem(map, key);
> +	if (!l)
> +		return -ENOENT;
> +
> +	rhtab_read_elem_value(map, value, l, flags);
> +	err = rhtab_delete_elem(rhtab, l);
> +	if (err)
> +		return err;
> +
> +	check_and_init_map_value(map, value);
> +	return 0;
>  }
>  
> -static long rhtab_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags)
> +static long rhtab_map_update_existing(struct bpf_map *map, struct rhtab_elem *elem, void *value,
> +				      u64 map_flags)
>  {
> -	return -EOPNOTSUPP;
> +	if (map_flags & BPF_NOEXIST)
> +		return -EEXIST;
> +
> +	if (map_flags & BPF_F_LOCK)
> +		copy_map_value_locked(map, rhtab_elem_value(elem, map->key_size), value, false);
> +	else
> +		copy_map_value(map, rhtab_elem_value(elem, map->key_size), value);

It looks like Sashiko is accurate about special fields not getting handled here.

> +	return 0;
>  }
>  
> -static void rhtab_map_free_internal_structs(struct bpf_map *map)
> +static long rhtab_map_update_elem(struct bpf_map *map, void *key, void *value, u64 map_flags)
>  {
> +	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
> +	struct rhtab_elem *elem, *tmp;
> +
> +	if (unlikely((map_flags & ~BPF_F_LOCK) > BPF_EXIST))
> +		return -EINVAL;
> +
> +	if ((map_flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
> +		return -EINVAL;
> +
> +	guard(rcu)();
> +	elem = rhtab_lookup_elem(map, key);
> +	if (elem)
> +		return rhtab_map_update_existing(map, elem, value, map_flags);
> +
> +	if (map_flags & BPF_EXIST)
> +		return -ENOENT;
> +
> +	/* Check max_entries limit before inserting new element */
> +	if (atomic_read(&rhtab->ht.nelems) >= map->max_entries)
> +		return -E2BIG;
> +
> +	elem = bpf_mem_cache_alloc(&rhtab->ma);
> +	if (!elem)
> +		return -ENOMEM;
> +
> +	memcpy(elem->data, key, map->key_size);
> +	copy_map_value(map, rhtab_elem_value(elem, map->key_size), value);
> +
> +	tmp = rhashtable_lookup_get_insert_fast(&rhtab->ht, &elem->node, rhtab->params);
> +	if (tmp) {
> +		bpf_mem_cache_free(&rhtab->ma, elem);
> +		if (IS_ERR(tmp))
> +			return PTR_ERR(tmp);
> +
> +		return rhtab_map_update_existing(map, tmp, value, map_flags);
> +	}
> +
> +	return 0;
>  }

Note: I am actually skeptical about Sashiko's warning here. Sure, the
update may get overwritten even as we are returning 0, but we are
providing no guarantees about how long the write will survive in the
map, and there is no inherent atomicity between an update and any other
operations.

>  
>  static int rhtab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
>  {
> -	return -EOPNOTSUPP;
> +	struct bpf_insn *insn = insn_buf;
> +	const int ret = BPF_REG_0;
> +
> +	BUILD_BUG_ON(!__same_type(&rhtab_lookup_elem,
> +				  (void *(*)(struct bpf_map *map, void *key)) NULL));
> +	*insn++ = BPF_EMIT_CALL(rhtab_lookup_elem);
> +	*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
> +	*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
> +				offsetof(struct rhtab_elem, data) + round_up(map->key_size, 8));
> +
> +	return insn - insn_buf;
> +}
> +
> +static void rhtab_map_free_internal_structs(struct bpf_map *map)
> +{
>  }

This gets filled in in later patches, but the fact it's here and a no-op
debatably the line into being non-bisectable. Can we move it to the walk
patch, since that's where it gets populated?

>  
>  static int rhtab_map_get_next_key(struct bpf_map *map, void *key, void *next_key)


  parent reply	other threads:[~2026-04-13 20:37 UTC|newest]

Thread overview: 42+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-08 15:10 [PATCH RFC bpf-next v2 00/18] bpf: Introduce resizable hash map Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 01/18] bpf: Register rhash map Mykyta Yatsenko
2026-04-10 22:31   ` Emil Tsalapatis
2026-04-13  8:10     ` Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 02/18] bpf: Add resizable hashtab skeleton Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 03/18] bpf: Implement lookup, delete, update for resizable hashtab Mykyta Yatsenko
2026-04-12 23:10   ` Alexei Starovoitov
2026-04-13 10:52     ` Mykyta Yatsenko
2026-04-13 16:24       ` Alexei Starovoitov
2026-04-13 16:27         ` Daniel Borkmann
2026-04-13 19:43           ` Mykyta Yatsenko
2026-04-13 20:37   ` Emil Tsalapatis [this message]
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 04/18] rhashtable: Add rhashtable_walk_enter_from() Mykyta Yatsenko
2026-04-12 23:13   ` Alexei Starovoitov
2026-04-13 12:22     ` Mykyta Yatsenko
2026-04-13 22:22   ` Emil Tsalapatis
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 05/18] bpf: Implement get_next_key and free_internal_structs for resizable hashtab Mykyta Yatsenko
2026-04-13 22:44   ` Emil Tsalapatis
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 06/18] bpf: Implement bpf_each_rhash_elem() using walk API Mykyta Yatsenko
2026-04-13 23:02   ` Emil Tsalapatis
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 07/18] bpf: Implement batch ops for resizable hashtab Mykyta Yatsenko
2026-04-13 23:25   ` Emil Tsalapatis
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 08/18] bpf: Implement iterator APIs " Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 09/18] bpf: Implement alloc and free " Mykyta Yatsenko
2026-04-12 23:15   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 10/18] bpf: Allow timers, workqueues and task_work in " Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 11/18] libbpf: Support resizable hashtable Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 12/18] selftests/bpf: Add basic tests for resizable hash map Mykyta Yatsenko
2026-04-12 23:16   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 13/18] selftests/bpf: Support resizable hashtab in test_maps Mykyta Yatsenko
2026-04-12 23:17   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 14/18] selftests/bpf: Resizable hashtab BPF_F_LOCK tests Mykyta Yatsenko
2026-04-12 23:18   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 15/18] selftests/bpf: Add stress tests for resizable hash get_next_key Mykyta Yatsenko
2026-04-12 23:19   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 16/18] selftests/bpf: Add BPF iterator tests for resizable hash map Mykyta Yatsenko
2026-04-12 23:20   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 17/18] bpftool: Add rhash map documentation Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 18/18] selftests/bpf: Add resizable hashmap to benchmarks Mykyta Yatsenko
2026-04-12 23:25   ` Alexei Starovoitov
2026-04-12 23:11 ` [PATCH RFC bpf-next v2 00/18] bpf: Introduce resizable hash map Alexei Starovoitov
2026-04-13  8:28   ` Mykyta Yatsenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DHSBDLRKCAQO.1IPJTSEMPESV7@etsalapatis.com \
    --to=emil@etsalapatis.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=kafai@meta.com \
    --cc=kernel-team@meta.com \
    --cc=memxor@gmail.com \
    --cc=mykyta.yatsenko5@gmail.com \
    --cc=yatsenko@meta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox