public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
From: "Emil Tsalapatis" <emil@etsalapatis.com>
To: "Mykyta Yatsenko" <mykyta.yatsenko5@gmail.com>,
	<bpf@vger.kernel.org>, <ast@kernel.org>, <andrii@kernel.org>,
	<daniel@iogearbox.net>, <kafai@meta.com>, <kernel-team@meta.com>,
	<eddyz87@gmail.com>, <memxor@gmail.com>,
	<herbert@gondor.apana.org.au>
Cc: "Mykyta Yatsenko" <yatsenko@meta.com>
Subject: Re: [PATCH RFC bpf-next v2 04/18] rhashtable: Add rhashtable_walk_enter_from()
Date: Mon, 13 Apr 2026 18:22:42 -0400	[thread overview]
Message-ID: <DHSDMJ8CY09A.30D3JJMR1B945@etsalapatis.com> (raw)
In-Reply-To: <20260408-rhash-v2-4-3b3675da1f6e@meta.com>

On Wed Apr 8, 2026 at 11:10 AM EDT, Mykyta Yatsenko wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> BPF resizable hashmap needs efficient iteration resume for
> get_next_key and seq_file iterators. rhashtable_walk_enter()
> always starts from bucket 0, forcing linear skip of already-seen
> elements.
>
> Add rhashtable_walk_enter_from() that looks up the key's bucket
> and positions the walker there, so walk_next returns the successor
> directly. If a resize moved the key to the future table, the
> walker is migrated to that table.
>
> Refactor __rhashtable_lookup into __rhashtable_lookup_one to reuse
> the single-table lookup in both the two-table search and the new
> enter_from positioning.
>
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>

Provided tests are updated:

Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>

> ---
>  include/linux/rhashtable.h |  31 ++++++++++--
>  lib/rhashtable.c           |  53 ++++++++++++++++++++
>  lib/test_rhashtable.c      | 120 +++++++++++++++++++++++++++++++++++++++++++++
>  3 files changed, 199 insertions(+), 5 deletions(-)
>
> diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
> index 133ccb39137a..2c7a343ac592 100644
> --- a/include/linux/rhashtable.h
> +++ b/include/linux/rhashtable.h
> @@ -253,6 +253,11 @@ static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
>  	(void)rhashtable_walk_start_check(iter);
>  }
>  
> +void rhashtable_walk_enter_from(struct rhashtable *ht,
> +				struct rhashtable_iter *iter,
> +				const void *key,
> +				const struct rhashtable_params params);
> +
>  void *rhashtable_walk_next(struct rhashtable_iter *iter);
>  void *rhashtable_walk_peek(struct rhashtable_iter *iter);
>  void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases_shared(RCU);
> @@ -613,8 +618,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
>  }
>  
>  /* Internal function, do not use. */
> -static __always_inline struct rhash_head *__rhashtable_lookup(
> -	struct rhashtable *ht, const void *key,
> +static __always_inline struct rhash_head *__rhashtable_lookup_one(
> +	struct rhashtable *ht, struct bucket_table *tbl, const void *key,
>  	const struct rhashtable_params params,
>  	const enum rht_lookup_freq freq)
>  	__must_hold_shared(RCU)
> @@ -624,13 +629,10 @@ static __always_inline struct rhash_head *__rhashtable_lookup(
>  		.key = key,
>  	};
>  	struct rhash_lock_head __rcu *const *bkt;
> -	struct bucket_table *tbl;
>  	struct rhash_head *he;
>  	unsigned int hash;
>  
>  	BUILD_BUG_ON(!__builtin_constant_p(freq));
> -	tbl = rht_dereference_rcu(ht->tbl, ht);
> -restart:
>  	hash = rht_key_hashfn(ht, tbl, key, params);
>  	bkt = rht_bucket(tbl, hash);
>  	do {
> @@ -646,6 +648,25 @@ static __always_inline struct rhash_head *__rhashtable_lookup(
>  		 */
>  	} while (he != RHT_NULLS_MARKER(bkt));
>  
> +	return NULL;
> +}
> +
> +/* Internal function, do not use. */
> +static __always_inline struct rhash_head *__rhashtable_lookup(
> +	struct rhashtable *ht, const void *key,
> +	const struct rhashtable_params params,
> +	const enum rht_lookup_freq freq)
> +	__must_hold_shared(RCU)
> +{
> +	struct bucket_table *tbl;
> +	struct rhash_head *he;
> +
> +	tbl = rht_dereference_rcu(ht->tbl, ht);
> +restart:
> +	he = __rhashtable_lookup_one(ht, tbl, key, params, freq);
> +	if (he)
> +		return he;
> +
>  	/* Ensure we see any new tables. */
>  	smp_rmb();
>  
> diff --git a/lib/rhashtable.c b/lib/rhashtable.c
> index 6074ed5f66f3..2fc277207dcc 100644
> --- a/lib/rhashtable.c
> +++ b/lib/rhashtable.c
> @@ -692,6 +692,59 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
>  }
>  EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
>  
> +/**
> + * rhashtable_walk_enter_from - Initialise a walk starting at a key's bucket
> + * @ht:		Table to walk over
> + * @iter:	Hash table iterator
> + * @key:	Key whose bucket to start from
> + * @params:	Hash table parameters
> + *
> + * Like rhashtable_walk_enter(), but positions the iterator at the bucket
> + * containing @key. If a resize is in progress and @key has been migrated
> + * to the future table, the walker is moved to that table.
> + *
> + * Same constraints as rhashtable_walk_enter() apply.
> + */
> +void rhashtable_walk_enter_from(struct rhashtable *ht,
> +				struct rhashtable_iter *iter,
> +				const void *key,
> +				const struct rhashtable_params params)
> +				__must_hold(RCU)
> +{
> +	struct bucket_table *tbl;
> +	struct rhash_head *he;
> +
> +	rhashtable_walk_enter(ht, iter);
> +
> +	if (!key)
> +		return;
> +
> +	tbl = rht_dereference_rcu(ht->tbl, ht);
> +	he = __rhashtable_lookup_one(ht, tbl, key, params,
> +				     RHT_LOOKUP_NORMAL);
> +	if (!he) {
> +		smp_rmb();
> +		tbl = rht_dereference_rcu(tbl->future_tbl, ht);
> +		if (!tbl)
> +			return;
> +
> +		he = __rhashtable_lookup_one(ht, tbl, key, params,
> +					     RHT_LOOKUP_NORMAL);
> +		if (!he)
> +			return;
> +
> +		spin_lock(&ht->lock);
> +		list_del(&iter->walker.list);
> +		iter->walker.tbl = tbl;
> +		list_add(&iter->walker.list, &tbl->walkers);
> +		spin_unlock(&ht->lock);
> +	}
> +
> +	iter->slot = rht_key_hashfn(ht, tbl, key, params);
> +	iter->p = he;
> +}
> +EXPORT_SYMBOL_GPL(rhashtable_walk_enter_from);
> +
>  /**
>   * rhashtable_walk_exit - Free an iterator
>   * @iter:	Hash table Iterator
> diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
> index 0b33559a910b..0084157a96b4 100644
> --- a/lib/test_rhashtable.c
> +++ b/lib/test_rhashtable.c
> @@ -23,6 +23,7 @@
>  #include <linux/random.h>
>  #include <linux/vmalloc.h>
>  #include <linux/wait.h>
> +#include <linux/cleanup.h>
>  
>  #define MAX_ENTRIES	1000000
>  #define TEST_INSERT_FAIL INT_MAX
> @@ -679,6 +680,122 @@ static int threadfunc(void *data)
>  	return err;
>  }
>  
> +static int __init test_walk_enter_from(void)
> +{
> +	struct rhashtable ht;
> +	struct test_obj objs[4];
> +	struct rhashtable_iter iter;
> +	struct test_obj *obj;
> +	int err, i;
> +
> +	err = rhashtable_init(&ht, &test_rht_params);
> +	if (err)
> +		return err;
> +
> +	/* Insert 4 elements with keys 0, 2, 4, 6 */
> +	for (i = 0; i < 4; i++) {
> +		objs[i].value.id = i * 2;
> +		objs[i].value.tid = 0;
> +		err = rhashtable_insert_fast(&ht, &objs[i].node,
> +					     test_rht_params);
> +		if (err) {
> +			pr_warn("walk_enter_from: insert %d failed: %d\n",
> +				i, err);
> +			goto out;
> +		}
> +	}
> +
> +	/*
> +	 * Test 1: walk_enter_from positions at key, walk_next returns
> +	 * the successor (not the key itself).
> +	 */
> +	for (i = 0; i < 4; i++) {
> +		struct test_obj_val key = { .id = i * 2 };
> +
> +		scoped_guard(rcu) {
> +			rhashtable_walk_enter_from(&ht, &iter, &key,
> +						   test_rht_params);
> +			rhashtable_walk_start(&iter);
> +		}
> +
> +		obj = rhashtable_walk_next(&iter);
> +		while (IS_ERR(obj) && PTR_ERR(obj) == -EAGAIN)
> +			obj = rhashtable_walk_next(&iter);
> +
> +		/* Successor must not be the key itself */
> +		if (obj && obj->value.id == i * 2) {
> +			pr_warn("walk_enter_from: returned key %d instead of successor\n",
> +				i * 2);
> +			err = -EINVAL;
> +			rhashtable_walk_stop(&iter);
> +			rhashtable_walk_exit(&iter);
> +			goto out;
> +		}
> +
> +		rhashtable_walk_stop(&iter);
> +		rhashtable_walk_exit(&iter);
> +	}
> +
> +	/* Test 2: walk_enter_from with non-existent key starts from bucket */
> +	{
> +		struct test_obj_val key = { .id = 99 };
> +
> +		scoped_guard(rcu) {
> +			rhashtable_walk_enter_from(&ht, &iter, &key,
> +						   test_rht_params);
> +			rhashtable_walk_start(&iter);
> +		}
> +
> +		obj = rhashtable_walk_next(&iter);
> +		while (IS_ERR(obj) && PTR_ERR(obj) == -EAGAIN)
> +			obj = rhashtable_walk_next(&iter);
> +
> +		/* Should still return some element (iteration from bucket start) */
> +		rhashtable_walk_stop(&iter);
> +		rhashtable_walk_exit(&iter);
> +	}
> +
> +	/* Test 3: verify walk_enter_from + walk_next can iterate remaining elements */
> +	{
> +		struct test_obj_val key = { .id = 0 };
> +		int count = 0;
> +
> +		scoped_guard(rcu) {
> +			rhashtable_walk_enter_from(&ht, &iter, &key,
> +						   test_rht_params);
> +			rhashtable_walk_start(&iter);
> +		}
> +
> +		while ((obj = rhashtable_walk_next(&iter))) {
> +			if (IS_ERR(obj)) {
> +				if (PTR_ERR(obj) == -EAGAIN)
> +					continue;
> +				break;
> +			}
> +			count++;
> +		}
> +
> +		rhashtable_walk_stop(&iter);
> +		rhashtable_walk_exit(&iter);
> +
> +		/*
> +		 * Should see at least some elements after key 0.
> +		 * Exact count depends on hash distribution.
> +		 */
> +		if (count == 0) {
> +			pr_warn("walk_enter_from: no elements found after key 0\n");
> +			err = -EINVAL;
> +			goto out;
> +		}
> +	}
> +
> +	pr_info("walk_enter_from: all tests passed\n");
> +	err = 0;
> +out:
> +	rhashtable_destroy(&ht);
> +	return err;
> +}
> +
>  static int __init test_rht_init(void)
>  {
>  	unsigned int entries;
> @@ -738,6 +855,9 @@ static int __init test_rht_init(void)
>  
>  	test_insert_duplicates_run();
>  
> +	pr_info("Testing walk_enter_from: %s\n",
> +		test_walk_enter_from() == 0 ? "pass" : "FAIL");
> +
>  	if (!tcount)
>  		return 0;
>  


  parent reply	other threads:[~2026-04-13 22:22 UTC|newest]

Thread overview: 47+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-08 15:10 [PATCH RFC bpf-next v2 00/18] bpf: Introduce resizable hash map Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 01/18] bpf: Register rhash map Mykyta Yatsenko
2026-04-10 22:31   ` Emil Tsalapatis
2026-04-13  8:10     ` Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 02/18] bpf: Add resizable hashtab skeleton Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 03/18] bpf: Implement lookup, delete, update for resizable hashtab Mykyta Yatsenko
2026-04-12 23:10   ` Alexei Starovoitov
2026-04-13 10:52     ` Mykyta Yatsenko
2026-04-13 16:24       ` Alexei Starovoitov
2026-04-13 16:27         ` Daniel Borkmann
2026-04-13 19:43           ` Mykyta Yatsenko
2026-04-13 20:37   ` Emil Tsalapatis
2026-04-14  8:34     ` Mykyta Yatsenko
2026-04-14 10:25   ` Leon Hwang
2026-04-14 10:28     ` Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 04/18] rhashtable: Add rhashtable_walk_enter_from() Mykyta Yatsenko
2026-04-12 23:13   ` Alexei Starovoitov
2026-04-13 12:22     ` Mykyta Yatsenko
2026-04-13 22:22   ` Emil Tsalapatis [this message]
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 05/18] bpf: Implement get_next_key and free_internal_structs for resizable hashtab Mykyta Yatsenko
2026-04-13 22:44   ` Emil Tsalapatis
2026-04-14  8:11     ` Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 06/18] bpf: Implement bpf_each_rhash_elem() using walk API Mykyta Yatsenko
2026-04-13 23:02   ` Emil Tsalapatis
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 07/18] bpf: Implement batch ops for resizable hashtab Mykyta Yatsenko
2026-04-13 23:25   ` Emil Tsalapatis
2026-04-14  8:08     ` Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 08/18] bpf: Implement iterator APIs " Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 09/18] bpf: Implement alloc and free " Mykyta Yatsenko
2026-04-12 23:15   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 10/18] bpf: Allow timers, workqueues and task_work in " Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 11/18] libbpf: Support resizable hashtable Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 12/18] selftests/bpf: Add basic tests for resizable hash map Mykyta Yatsenko
2026-04-12 23:16   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 13/18] selftests/bpf: Support resizable hashtab in test_maps Mykyta Yatsenko
2026-04-12 23:17   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 14/18] selftests/bpf: Resizable hashtab BPF_F_LOCK tests Mykyta Yatsenko
2026-04-12 23:18   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 15/18] selftests/bpf: Add stress tests for resizable hash get_next_key Mykyta Yatsenko
2026-04-12 23:19   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 16/18] selftests/bpf: Add BPF iterator tests for resizable hash map Mykyta Yatsenko
2026-04-12 23:20   ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 17/18] bpftool: Add rhash map documentation Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 18/18] selftests/bpf: Add resizable hashmap to benchmarks Mykyta Yatsenko
2026-04-12 23:25   ` Alexei Starovoitov
2026-04-12 23:11 ` [PATCH RFC bpf-next v2 00/18] bpf: Introduce resizable hash map Alexei Starovoitov
2026-04-13  8:28   ` Mykyta Yatsenko

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=DHSDMJ8CY09A.30D3JJMR1B945@etsalapatis.com \
    --to=emil@etsalapatis.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=kafai@meta.com \
    --cc=kernel-team@meta.com \
    --cc=memxor@gmail.com \
    --cc=mykyta.yatsenko5@gmail.com \
    --cc=yatsenko@meta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox