[PATCH bpf-next v3 04/10] bpf: Implement batch ops and iterators for resizable hashtab

public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed

From: Mykyta Yatsenko <mykyta.yatsenko5@gmail.com>
To: bpf@vger.kernel.org, ast@kernel.org, andrii@kernel.org,
	 daniel@iogearbox.net, kafai@meta.com, kernel-team@meta.com,
	 eddyz87@gmail.com, memxor@gmail.com,
	herbert@gondor.apana.org.au
Cc: Mykyta Yatsenko <yatsenko@meta.com>,
	 Emil Tsalapatis <emil@etsalapatis.com>
Subject: [PATCH bpf-next v3 04/10] bpf: Implement batch ops and iterators for resizable hashtab
Date: Fri, 24 Apr 2026 12:50:46 -0700	[thread overview]
Message-ID: <20260424-rhash-v3-4-d0fa0ce4379b@meta.com> (raw)
In-Reply-To: <20260424-rhash-v3-0-d0fa0ce4379b@meta.com>

From: Mykyta Yatsenko <yatsenko@meta.com>

Add batch operations for BPF_MAP_TYPE_RHASH.
Batch operations:
 * rhtab_map_lookup_batch: Bulk lookup of elements by bucket
 * rhtab_map_lookup_and_delete_batch: Atomic bulk lookup and delete

The batch implementation uses rhashtable_walk_enter_from() to resume
iteration from the last collected key. When the buffer fills, the last
key becomes the cursor for the next batch call.

Also implements rhtab_map_mem_usage() to report memory consumption.

Wire up seq_file BPF iterator for BPF_MAP_TYPE_RHASH so that
bpf_iter and bpftool map dump work with resizable hash maps.

Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
Reviewed-by: Emil Tsalapatis <emil@etsalapatis.com>
---
 kernel/bpf/hashtab.c | 280 +++++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 273 insertions(+), 7 deletions(-)

diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d37f3d548d36..000caa2c7f4c 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -3066,64 +3066,330 @@ static int rhtab_map_get_next_key(struct bpf_map *map, void *key, void *next_key
 
 static void rhtab_map_seq_show_elem(struct bpf_map *map, void *key, struct seq_file *m)
 {
+	void *value;
+
+	/* Guarantee that hashtab value is not freed */
+	guard(rcu)();
+
+	value = rhtab_map_lookup_elem(map, key);
+	if (!value)
+		return;
+
+	btf_type_seq_show(map->btf, map->btf_key_type_id, key, m);
+	seq_puts(m, ": ");
+	btf_type_seq_show(map->btf, map->btf_value_type_id, value, m);
+	seq_putc(m, '\n');
 }
 
 static long bpf_each_rhash_elem(struct bpf_map *map, bpf_callback_t callback_fn,
 				void *callback_ctx, u64 flags)
 {
-	return -EOPNOTSUPP;
+	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
+	struct rhashtable_iter iter;
+	struct rhtab_elem *elem;
+	int num_elems = 0;
+	u64 ret = 0;
+
+	if (flags != 0)
+		return -EINVAL;
+
+	/*
+	 * The rhashtable walk API uses spin_lock in rhashtable_walk_start/stop,
+	 * which is not safe in NMI or soft/hard IRQ context.
+	 */
+	if (in_nmi() || in_hardirq() || in_softirq())
+		return -EOPNOTSUPP;
+
+	rhashtable_walk_enter(&rhtab->ht, &iter);
+	rhashtable_walk_start(&iter);
+
+	while ((elem = rhashtable_walk_next(&iter))) {
+		/* rhashtable_walk_next returns -EAGAIN on resize, abort */
+		if (IS_ERR(elem)) {
+			num_elems = -EBUSY;
+			break;
+		}
+		num_elems++;
+		ret = callback_fn((u64)(long)map,
+				  (u64)(long)elem->data,
+				  (u64)(long)rhtab_elem_value(elem, map->key_size),
+				  (u64)(long)callback_ctx, 0);
+		if (ret)
+			break;
+	}
+
+	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
+
+	return num_elems;
 }
 
 static u64 rhtab_map_mem_usage(const struct bpf_map *map)
 {
-	return 0;
+	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
+	u64 num_entries;
+
+	num_entries = atomic_read(&rhtab->ht.nelems);
+	return sizeof(struct bpf_rhtab) + rhtab->elem_size * num_entries;
+}
+
+static int __rhtab_map_lookup_and_delete_batch(struct bpf_map *map,
+					       const union bpf_attr *attr,
+					       union bpf_attr __user *uattr,
+					       bool do_delete)
+{
+	struct bpf_rhtab *rhtab = container_of(map, struct bpf_rhtab, map);
+	void __user *uvalues = u64_to_user_ptr(attr->batch.values);
+	void __user *ukeys = u64_to_user_ptr(attr->batch.keys);
+	void __user *ubatch = u64_to_user_ptr(attr->batch.in_batch);
+	void *buf = NULL, *keys = NULL, *values = NULL, *dst_key, *dst_val;
+	struct rhtab_elem **del_elems = NULL;
+	u32 max_count, total, key_size, value_size, i;
+	struct rhashtable_iter iter;
+	struct rhtab_elem *elem;
+	u64 elem_map_flags, map_flags;
+	int ret = 0;
+
+	elem_map_flags = attr->batch.elem_flags;
+	ret = bpf_map_check_op_flags(map, elem_map_flags, BPF_F_LOCK);
+	if (ret)
+		return ret;
+
+	map_flags = attr->batch.flags;
+	if (map_flags)
+		return -EINVAL;
+
+	max_count = attr->batch.count;
+	if (!max_count)
+		return 0;
+
+	if (put_user(0, &uattr->batch.count))
+		return -EFAULT;
+
+	key_size = map->key_size;
+	value_size = map->value_size;
+
+	keys = kvmalloc_array(max_count, key_size, GFP_USER | __GFP_NOWARN);
+	values = kvmalloc_array(max_count, value_size, GFP_USER | __GFP_NOWARN);
+	if (do_delete)
+		del_elems = kvmalloc_array(max_count, sizeof(void *),
+					   GFP_USER | __GFP_NOWARN);
+
+	if (!keys || !values || (do_delete && !del_elems)) {
+		ret = -ENOMEM;
+		goto free;
+	}
+
+	/*
+	 * Use the last key from the previous batch as cursor.
+	 * enter_from positions at that key's bucket, walk_next
+	 * returns the successor in O(1).
+	 * First call (ubatch == NULL): starts from bucket 0.
+	 */
+	if (ubatch) {
+		buf = kmalloc(key_size, GFP_USER | __GFP_NOWARN);
+		if (!buf) {
+			ret = -ENOMEM;
+			goto free;
+		}
+		if (copy_from_user(buf, ubatch, key_size)) {
+			ret = -EFAULT;
+			goto free;
+		}
+	}
+
+	scoped_guard(rcu) {
+		rhashtable_walk_enter_from(&rhtab->ht, &iter, buf, rhtab->params);
+		rhashtable_walk_start(&iter);
+	}
+
+	dst_key = keys;
+	dst_val = values;
+	total = 0;
+
+	while (total < max_count) {
+		elem = rhtab_iter_next(&iter);
+		if (!elem)
+			break;
+
+		memcpy(dst_key, elem->data, key_size);
+		rhtab_read_elem_value(map, dst_val, elem, elem_map_flags);
+		check_and_init_map_value(map, dst_val);
+
+		if (do_delete)
+			del_elems[total] = elem;
+
+		dst_key += key_size;
+		dst_val += value_size;
+		total++;
+	}
+
+	if (do_delete) {
+		for (i = 0; i < total; i++)
+			rhtab_delete_elem(rhtab, del_elems[i]);
+	}
+
+	rhashtable_walk_stop(&iter);
+	rhashtable_walk_exit(&iter);
+
+	if (total == 0) {
+		ret = -ENOENT;
+		goto free;
+	}
+
+	/* Signal end of table when we collected fewer than requested */
+	if (total < max_count)
+		ret = -ENOENT;
+
+	/* Write last key as cursor for the next batch call */
+	if (copy_to_user(ukeys, keys, total * key_size) ||
+	    copy_to_user(uvalues, values, total * value_size) ||
+	    put_user(total, &uattr->batch.count) ||
+	    copy_to_user(u64_to_user_ptr(attr->batch.out_batch),
+			 dst_key - key_size, key_size)) {
+		ret = -EFAULT;
+		goto free;
+	}
+
+free:
+	kfree(buf);
+	kvfree(keys);
+	kvfree(values);
+	kvfree(del_elems);
+	return ret;
 }
 
 static int rhtab_map_lookup_batch(struct bpf_map *map, const union bpf_attr *attr,
 				  union bpf_attr __user *uattr)
 {
-	return -EOPNOTSUPP;
+	return __rhtab_map_lookup_and_delete_batch(map, attr, uattr, false);
 }
 
 static int rhtab_map_lookup_and_delete_batch(struct bpf_map *map, const union bpf_attr *attr,
 					     union bpf_attr __user *uattr)
 {
-	return -EOPNOTSUPP;
+	return __rhtab_map_lookup_and_delete_batch(map, attr, uattr, true);
 }
 
 struct bpf_iter_seq_rhash_map_info {
 	struct bpf_map *map;
 	struct bpf_rhtab *rhtab;
 	struct rhashtable_iter iter;
+	void *last_key;
+	bool last_key_valid;
 	bool iter_active;
 };
 
 static void *bpf_rhash_map_seq_start(struct seq_file *seq, loff_t *pos)
+	__acquires(RCU)
 {
-	return NULL;
+	struct bpf_iter_seq_rhash_map_info *info = seq->private;
+	struct rhtab_elem *elem;
+	void *key = *pos > 0 && info->last_key_valid ? info->last_key : NULL;
+
+	scoped_guard(rcu) {
+		rhashtable_walk_enter_from(&info->rhtab->ht, &info->iter,
+					   key, info->rhtab->params);
+		rhashtable_walk_start(&info->iter);
+	}
+	info->iter_active = true;
+
+	elem = rhashtable_walk_next(&info->iter);
+	/* rhashtable_walk_next returns -EAGAIN on resize, abort */
+	if (IS_ERR(elem))
+		return ERR_PTR(-EBUSY);
+	if (!elem)
+		return NULL;
+	if (*pos == 0)
+		++*pos;
+	return elem;
 }
 
 static void *bpf_rhash_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
-	return NULL;
+	struct bpf_iter_seq_rhash_map_info *info = seq->private;
+	struct rhtab_elem *elem = v;
+
+	memcpy(info->last_key, elem->data, info->map->key_size);
+	info->last_key_valid = true;
+
+	++*pos;
+
+	elem = rhashtable_walk_next(&info->iter);
+	/* rhashtable_walk_next returns -EAGAIN on resize, abort */
+	if (IS_ERR(elem))
+		return ERR_PTR(-EBUSY);
+
+	return elem;
+}
+
+static int __bpf_rhash_map_seq_show(struct seq_file *seq,
+				    struct rhtab_elem *elem)
+{
+	struct bpf_iter_seq_rhash_map_info *info = seq->private;
+	struct bpf_iter__bpf_map_elem ctx = {};
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	int ret = 0;
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, elem == NULL);
+	if (prog) {
+		ctx.meta = &meta;
+		ctx.map = info->map;
+		if (elem) {
+			ctx.key = elem->data;
+			ctx.value = rhtab_elem_value(elem, info->map->key_size);
+		}
+		ret = bpf_iter_run_prog(prog, &ctx);
+	}
+
+	return ret;
 }
 
 static int bpf_rhash_map_seq_show(struct seq_file *seq, void *v)
 {
-	return 0;
+	return __bpf_rhash_map_seq_show(seq, v);
 }
 
 static void bpf_rhash_map_seq_stop(struct seq_file *seq, void *v)
+	__releases(RCU)
 {
+	struct bpf_iter_seq_rhash_map_info *info = seq->private;
+
+	if (!v)
+		(void)__bpf_rhash_map_seq_show(seq, NULL);
+
+	if (info->iter_active) {
+		rhashtable_walk_stop(&info->iter);
+		rhashtable_walk_exit(&info->iter);
+		info->iter_active = false;
+	}
 }
 
 static int bpf_iter_init_rhash_map(void *priv_data, struct bpf_iter_aux_info *aux)
 {
+	struct bpf_iter_seq_rhash_map_info *info = priv_data;
+	struct bpf_map *map = aux->map;
+
+	info->last_key_valid = false;
+	info->last_key = kmalloc(map->key_size, GFP_USER);
+	if (!info->last_key)
+		return -ENOMEM;
+
+	bpf_map_inc_with_uref(map);
+	info->map = map;
+	info->rhtab = container_of(map, struct bpf_rhtab, map);
+	info->iter_active = false;
 	return 0;
 }
 
 static void bpf_iter_fini_rhash_map(void *priv_data)
 {
+	struct bpf_iter_seq_rhash_map_info *info = priv_data;
+
+	kfree(info->last_key);
+	bpf_map_put_with_uref(info->map);
 }
 
 static const struct seq_operations bpf_rhash_map_seq_ops = {

-- 
2.52.0

next prev parent reply	other threads:[~2026-04-24 19:51 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-24 19:50 [PATCH bpf-next v3 00/10] bpf: Introduce resizable hash map Mykyta Yatsenko
2026-04-24 19:50 ` [PATCH bpf-next v3 01/10] bpf: Implement resizable hashmap basic functions Mykyta Yatsenko
2026-04-24 20:40   ` sashiko-bot
2026-04-25 20:41     ` Mykyta Yatsenko
2026-04-24 20:45   ` bot+bpf-ci
2026-04-25 20:50     ` Mykyta Yatsenko
2026-04-24 19:50 ` [PATCH bpf-next v3 02/10] rhashtable: Add rhashtable_walk_enter_from() Mykyta Yatsenko
2026-04-24 20:15   ` sashiko-bot
2026-04-24 20:45   ` bot+bpf-ci
2026-04-28 10:35   ` Herbert Xu
2026-04-24 19:50 ` [PATCH bpf-next v3 03/10] bpf: Implement get_next_key() resizable hashtab Mykyta Yatsenko
2026-04-28 10:33   ` Herbert Xu
2026-04-28 13:20     ` Mykyta Yatsenko
2026-04-24 19:50 ` Mykyta Yatsenko [this message]
2026-04-24 20:28   ` [PATCH bpf-next v3 04/10] bpf: Implement batch ops and iterators for " sashiko-bot
2026-04-25 21:24     ` Mykyta Yatsenko
2026-04-27 13:36       ` Mykyta Yatsenko
2026-04-24 19:50 ` [PATCH bpf-next v3 05/10] bpf: Allow timers, workqueues and task_work in " Mykyta Yatsenko
2026-04-24 21:05   ` sashiko-bot
2026-04-25 21:29     ` Mykyta Yatsenko
2026-04-24 19:50 ` [PATCH bpf-next v3 06/10] libbpf: Support resizable hashtable Mykyta Yatsenko
2026-04-24 19:50 ` [PATCH bpf-next v3 07/10] selftests/bpf: Add basic tests for resizable hash map Mykyta Yatsenko
2026-04-24 20:02   ` sashiko-bot
2026-04-24 20:32   ` bot+bpf-ci
2026-04-24 19:50 ` [PATCH bpf-next v3 08/10] selftests/bpf: Add BPF iterator " Mykyta Yatsenko
2026-04-24 19:50 ` [PATCH bpf-next v3 09/10] bpftool: Add rhash map documentation Mykyta Yatsenko
2026-04-24 19:50 ` [PATCH bpf-next v3 10/10] selftests/bpf: Add resizable hashmap to benchmarks Mykyta Yatsenko

find likely ancestor, descendant, or conflicting patches for this message:
( dfblob:d37f3d548d3 dfblob:000caa2c7f4 )
 OR (
bs:"[PATCH bpf-next v3 04/10] bpf: Implement batch ops and iterators for resizable hashtab" )
	(help)

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260424-rhash-v3-4-d0fa0ce4379b@meta.com \
    --to=mykyta.yatsenko5@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=emil@etsalapatis.com \
    --cc=herbert@gondor.apana.org.au \
    --cc=kafai@meta.com \
    --cc=kernel-team@meta.com \
    --cc=memxor@gmail.com \
    --cc=yatsenko@meta.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox