From: Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>
To: bpf@vger.kernel.org, ast@kernel.org, andrii@kernel.org,
daniel@iogearbox.net, kafai@meta.com, kernel-team@meta.com,
eddyz87@gmail.com, memxor@gmail.com,
herbert@gondor.apana.org.au
Cc: Mykyta Yatsenko <yatsenko@meta.com>
Subject: [PATCH RFC bpf-next v2 04/18] rhashtable: Add rhashtable_walk_enter_from()
Date: Wed, 08 Apr 2026 08:10:09 -0700 [thread overview]
Message-ID: <20260408-rhash-v2-4-3b3675da1f6e@meta.com> (raw)
In-Reply-To: <20260408-rhash-v2-0-3b3675da1f6e@meta.com>
From: Mykyta Yatsenko <yatsenko@meta.com>
BPF resizable hashmap needs efficient iteration resume for
get_next_key and seq_file iterators. rhashtable_walk_enter()
always starts from bucket 0, forcing linear skip of already-seen
elements.
Add rhashtable_walk_enter_from() that looks up the key's bucket
and positions the walker there, so walk_next returns the successor
directly. If a resize moved the key to the future table, the
walker is migrated to that table.
Refactor __rhashtable_lookup into __rhashtable_lookup_one to reuse
the single-table lookup in both the two-table search and the new
enter_from positioning.
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
include/linux/rhashtable.h | 31 ++++++++++--
lib/rhashtable.c | 53 ++++++++++++++++++++
lib/test_rhashtable.c | 120 +++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 199 insertions(+), 5 deletions(-)
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 133ccb39137a..2c7a343ac592 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -253,6 +253,11 @@ static inline void rhashtable_walk_start(struct rhashtable_iter *iter)
(void)rhashtable_walk_start_check(iter);
}
+void rhashtable_walk_enter_from(struct rhashtable *ht,
+ struct rhashtable_iter *iter,
+ const void *key,
+ const struct rhashtable_params params);
+
void *rhashtable_walk_next(struct rhashtable_iter *iter);
void *rhashtable_walk_peek(struct rhashtable_iter *iter);
void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases_shared(RCU);
@@ -613,8 +618,8 @@ static inline int rhashtable_compare(struct rhashtable_compare_arg *arg,
}
/* Internal function, do not use. */
-static __always_inline struct rhash_head *__rhashtable_lookup(
- struct rhashtable *ht, const void *key,
+static __always_inline struct rhash_head *__rhashtable_lookup_one(
+ struct rhashtable *ht, struct bucket_table *tbl, const void *key,
const struct rhashtable_params params,
const enum rht_lookup_freq freq)
__must_hold_shared(RCU)
@@ -624,13 +629,10 @@ static __always_inline struct rhash_head *__rhashtable_lookup(
.key = key,
};
struct rhash_lock_head __rcu *const *bkt;
- struct bucket_table *tbl;
struct rhash_head *he;
unsigned int hash;
BUILD_BUG_ON(!__builtin_constant_p(freq));
- tbl = rht_dereference_rcu(ht->tbl, ht);
-restart:
hash = rht_key_hashfn(ht, tbl, key, params);
bkt = rht_bucket(tbl, hash);
do {
@@ -646,6 +648,25 @@ static __always_inline struct rhash_head *__rhashtable_lookup(
*/
} while (he != RHT_NULLS_MARKER(bkt));
+ return NULL;
+}
+
+/* Internal function, do not use. */
+static __always_inline struct rhash_head *__rhashtable_lookup(
+ struct rhashtable *ht, const void *key,
+ const struct rhashtable_params params,
+ const enum rht_lookup_freq freq)
+ __must_hold_shared(RCU)
+{
+ struct bucket_table *tbl;
+ struct rhash_head *he;
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+restart:
+ he = __rhashtable_lookup_one(ht, tbl, key, params, freq);
+ if (he)
+ return he;
+
/* Ensure we see any new tables. */
smp_rmb();
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6074ed5f66f3..2fc277207dcc 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -692,6 +692,59 @@ void rhashtable_walk_enter(struct rhashtable *ht, struct rhashtable_iter *iter)
}
EXPORT_SYMBOL_GPL(rhashtable_walk_enter);
+/**
+ * rhashtable_walk_enter_from - Initialise a walk starting at a key's bucket
+ * @ht: Table to walk over
+ * @iter: Hash table iterator
+ * @key: Key whose bucket to start from
+ * @params: Hash table parameters
+ *
+ * Like rhashtable_walk_enter(), but positions the iterator at the bucket
+ * containing @key. If a resize is in progress and @key has been migrated
+ * to the future table, the walker is moved to that table.
+ *
+ * Same constraints as rhashtable_walk_enter() apply.
+ */
+void rhashtable_walk_enter_from(struct rhashtable *ht,
+ struct rhashtable_iter *iter,
+ const void *key,
+ const struct rhashtable_params params)
+ __must_hold(RCU)
+{
+ struct bucket_table *tbl;
+ struct rhash_head *he;
+
+ rhashtable_walk_enter(ht, iter);
+
+ if (!key)
+ return;
+
+ tbl = rht_dereference_rcu(ht->tbl, ht);
+ he = __rhashtable_lookup_one(ht, tbl, key, params,
+ RHT_LOOKUP_NORMAL);
+ if (!he) {
+ smp_rmb();
+ tbl = rht_dereference_rcu(tbl->future_tbl, ht);
+ if (!tbl)
+ return;
+
+ he = __rhashtable_lookup_one(ht, tbl, key, params,
+ RHT_LOOKUP_NORMAL);
+ if (!he)
+ return;
+
+ spin_lock(&ht->lock);
+ list_del(&iter->walker.list);
+ iter->walker.tbl = tbl;
+ list_add(&iter->walker.list, &tbl->walkers);
+ spin_unlock(&ht->lock);
+ }
+
+ iter->slot = rht_key_hashfn(ht, tbl, key, params);
+ iter->p = he;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_enter_from);
+
/**
* rhashtable_walk_exit - Free an iterator
* @iter: Hash table Iterator
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 0b33559a910b..0084157a96b4 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -23,6 +23,7 @@
#include <linux/random.h>
#include <linux/vmalloc.h>
#include <linux/wait.h>
+#include <linux/cleanup.h>
#define MAX_ENTRIES 1000000
#define TEST_INSERT_FAIL INT_MAX
@@ -679,6 +680,122 @@ static int threadfunc(void *data)
return err;
}
+static int __init test_walk_enter_from(void)
+{
+ struct rhashtable ht;
+ struct test_obj objs[4];
+ struct rhashtable_iter iter;
+ struct test_obj *obj;
+ int err, i;
+
+ err = rhashtable_init(&ht, &test_rht_params);
+ if (err)
+ return err;
+
+ /* Insert 4 elements with keys 0, 2, 4, 6 */
+ for (i = 0; i < 4; i++) {
+ objs[i].value.id = i * 2;
+ objs[i].value.tid = 0;
+ err = rhashtable_insert_fast(&ht, &objs[i].node,
+ test_rht_params);
+ if (err) {
+ pr_warn("walk_enter_from: insert %d failed: %d\n",
+ i, err);
+ goto out;
+ }
+ }
+
+ /*
+ * Test 1: walk_enter_from positions at key, walk_next returns
+ * the successor (not the key itself).
+ */
+ for (i = 0; i < 4; i++) {
+ struct test_obj_val key = { .id = i * 2 };
+
+ scoped_guard(rcu) {
+ rhashtable_walk_enter_from(&ht, &iter, &key,
+ test_rht_params);
+ rhashtable_walk_start(&iter);
+ }
+
+ obj = rhashtable_walk_next(&iter);
+ while (IS_ERR(obj) && PTR_ERR(obj) == -EAGAIN)
+ obj = rhashtable_walk_next(&iter);
+
+ /* Successor must not be the key itself */
+ if (obj && obj->value.id == i * 2) {
+ pr_warn("walk_enter_from: returned key %d instead of successor\n",
+ i * 2);
+ err = -EINVAL;
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+ goto out;
+ }
+
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+ }
+
+ /* Test 2: walk_enter_from with non-existent key starts from bucket */
+ {
+ struct test_obj_val key = { .id = 99 };
+
+ scoped_guard(rcu) {
+ rhashtable_walk_enter_from(&ht, &iter, &key,
+ test_rht_params);
+ rhashtable_walk_start(&iter);
+ }
+
+ obj = rhashtable_walk_next(&iter);
+ while (IS_ERR(obj) && PTR_ERR(obj) == -EAGAIN)
+ obj = rhashtable_walk_next(&iter);
+
+ /* Should still return some element (iteration from bucket start) */
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+ }
+
+ /* Test 3: verify walk_enter_from + walk_next can iterate remaining elements */
+ {
+ struct test_obj_val key = { .id = 0 };
+ int count = 0;
+
+ scoped_guard(rcu) {
+ rhashtable_walk_enter_from(&ht, &iter, &key,
+ test_rht_params);
+ rhashtable_walk_start(&iter);
+ }
+
+ while ((obj = rhashtable_walk_next(&iter))) {
+ if (IS_ERR(obj)) {
+ if (PTR_ERR(obj) == -EAGAIN)
+ continue;
+ break;
+ }
+ count++;
+ }
+
+ rhashtable_walk_stop(&iter);
+ rhashtable_walk_exit(&iter);
+
+ /*
+ * Should see at least some elements after key 0.
+ * Exact count depends on hash distribution.
+ */
+ if (count == 0) {
+ pr_warn("walk_enter_from: no elements found after key 0\n");
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+ pr_info("walk_enter_from: all tests passed\n");
+ err = 0;
+out:
+ rhashtable_destroy(&ht);
+ return err;
+}
+
static int __init test_rht_init(void)
{
unsigned int entries;
@@ -738,6 +855,9 @@ static int __init test_rht_init(void)
test_insert_duplicates_run();
+ pr_info("Testing walk_enter_from: %s\n",
+ test_walk_enter_from() == 0 ? "pass" : "FAIL");
+
if (!tcount)
return 0;
--
2.52.0
next prev parent reply other threads:[~2026-04-08 15:10 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-04-08 15:10 [PATCH RFC bpf-next v2 00/18] bpf: Introduce resizable hash map Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 01/18] bpf: Register rhash map Mykyta Yatsenko
2026-04-10 22:31 ` Emil Tsalapatis
2026-04-13 8:10 ` Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 02/18] bpf: Add resizable hashtab skeleton Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 03/18] bpf: Implement lookup, delete, update for resizable hashtab Mykyta Yatsenko
2026-04-12 23:10 ` Alexei Starovoitov
2026-04-13 10:52 ` Mykyta Yatsenko
2026-04-13 16:24 ` Alexei Starovoitov
2026-04-13 16:27 ` Daniel Borkmann
2026-04-08 15:10 ` Mykyta Yatsenko [this message]
2026-04-12 23:13 ` [PATCH RFC bpf-next v2 04/18] rhashtable: Add rhashtable_walk_enter_from() Alexei Starovoitov
2026-04-13 12:22 ` Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 05/18] bpf: Implement get_next_key and free_internal_structs for resizable hashtab Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 06/18] bpf: Implement bpf_each_rhash_elem() using walk API Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 07/18] bpf: Implement batch ops for resizable hashtab Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 08/18] bpf: Implement iterator APIs " Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 09/18] bpf: Implement alloc and free " Mykyta Yatsenko
2026-04-12 23:15 ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 10/18] bpf: Allow timers, workqueues and task_work in " Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 11/18] libbpf: Support resizable hashtable Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 12/18] selftests/bpf: Add basic tests for resizable hash map Mykyta Yatsenko
2026-04-12 23:16 ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 13/18] selftests/bpf: Support resizable hashtab in test_maps Mykyta Yatsenko
2026-04-12 23:17 ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 14/18] selftests/bpf: Resizable hashtab BPF_F_LOCK tests Mykyta Yatsenko
2026-04-12 23:18 ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 15/18] selftests/bpf: Add stress tests for resizable hash get_next_key Mykyta Yatsenko
2026-04-12 23:19 ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 16/18] selftests/bpf: Add BPF iterator tests for resizable hash map Mykyta Yatsenko
2026-04-12 23:20 ` Alexei Starovoitov
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 17/18] bpftool: Add rhash map documentation Mykyta Yatsenko
2026-04-08 15:10 ` [PATCH RFC bpf-next v2 18/18] selftests/bpf: Add resizable hashmap to benchmarks Mykyta Yatsenko
2026-04-12 23:25 ` Alexei Starovoitov
2026-04-12 23:11 ` [PATCH RFC bpf-next v2 00/18] bpf: Introduce resizable hash map Alexei Starovoitov
2026-04-13 8:28 ` Mykyta Yatsenko
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20260408-rhash-v2-4-3b3675da1f6e@meta.com \
--to=mykyta.yatsenko5@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=herbert@gondor.apana.org.au \
--cc=kafai@meta.com \
--cc=kernel-team@meta.com \
--cc=memxor@gmail.com \
--cc=yatsenko@meta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox