BPF List
 help / color / mirror / Atom feed
From: Emil Tsalapatis <emil@etsalapatis.com>
To: bpf@vger.kernel.org
Cc: ast@kernel.org, andrii@kernel.org, memxor@gmail.com,
	daniel@iogearbox.net, eddyz87@gmail.com, song@kernel.org,
	mattbobfrowski@google.com, Emil Tsalapatis <emil@etsalapatis.com>
Subject: [PATCH bpf-next 2/2] selftests/bpf: libarena: Add Lev-Chase queue data structure
Date: Mon, 11 May 2026 17:07:40 -0400	[thread overview]
Message-ID: <20260511210740.5395-3-emil@etsalapatis.com> (raw)
In-Reply-To: <20260511210740.5395-1-emil@etsalapatis.com>

Expand libarena with a Lev-Chase deque data structure. This is a single
producer, multiple consumer lockless queue that permits efficient
work stealing. The structure is lock-free and wait-free to minimize
overhead.

The data structure exposes three main calls. two of them are available to
the thread owning the queue and one available to all threads in the program:

lvqueue_owner_push(): Push an item to the top of the lvqueue.
lvqueue_owner_pop(): Pop an item from the top of the lvqueue.
lvqueue_steal(): Steal a thread from the bottom of the lvqueue from
any thread.

Signed-off-by: Emil Tsalapatis <emil@etsalapatis.com>
---
 .../bpf/libarena/include/libarena/lvqueue.h   |  33 +++
 .../bpf/libarena/selftests/st_lvqueue.bpf.c   | 194 ++++++++++++++
 .../selftests/bpf/libarena/src/lvqueue.bpf.c  | 241 ++++++++++++++++++
 3 files changed, 468 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/libarena/include/libarena/lvqueue.h
 create mode 100644 tools/testing/selftests/bpf/libarena/selftests/st_lvqueue.bpf.c
 create mode 100644 tools/testing/selftests/bpf/libarena/src/lvqueue.bpf.c

diff --git a/tools/testing/selftests/bpf/libarena/include/libarena/lvqueue.h b/tools/testing/selftests/bpf/libarena/include/libarena/lvqueue.h
new file mode 100644
index 000000000000..c4091387c7a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/libarena/include/libarena/lvqueue.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause */
+
+#pragma once
+
+struct lv_arr;
+
+#define LV_ARR_BASESZ 128
+#define LV_ARR_ORDERS 10
+
+struct lv_arr {
+	u64 __arena *data;
+	u64 order;
+};
+
+typedef volatile struct lv_arr __arena lv_arr_t;
+
+struct lv_queue {
+	lv_arr_t *cur;
+	volatile u64 top;
+	volatile u64 bottom;
+	struct lv_arr arr[LV_ARR_ORDERS];
+};
+
+typedef struct lv_queue __arena lv_queue_t;
+
+int lvq_owner_push(lv_queue_t *lvq, u64 val);
+int lvq_owner_pop(lv_queue_t *lvq, u64 *val);
+int lvq_steal(lv_queue_t *lvq, u64 *val);
+
+u64 lvq_create_internal(void);
+#define lvq_create() ((lv_queue_t *)lvq_create_internal())
+
+int lvq_destroy(lv_queue_t *lvq);
diff --git a/tools/testing/selftests/bpf/libarena/selftests/st_lvqueue.bpf.c b/tools/testing/selftests/bpf/libarena/selftests/st_lvqueue.bpf.c
new file mode 100644
index 000000000000..d53416d22f0a
--- /dev/null
+++ b/tools/testing/selftests/bpf/libarena/selftests/st_lvqueue.bpf.c
@@ -0,0 +1,194 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+
+#include <libarena/common.h>
+
+#include <libarena/asan.h>
+#include <libarena/lvqueue.h>
+
+/*
+ * NOTE: These selftests only test for the single-threaded use case, which for
+ * Lev-Chase queues is obviously the simplest one. Still, it is important to
+ * exercise the API to ensure it passes verification and basic checks.
+ */
+
+SEC("syscall")
+int test_lvqueue_pop_empty(void)
+{
+	u64 val;
+	int ret;
+
+	lv_queue_t *lvq = lvq_create();
+
+	if (!lvq)
+		return 1;
+
+	ret = lvq_owner_pop(lvq, &val);
+	if (ret != -ENOENT)
+		return 1;
+
+	lvq_destroy(lvq);
+
+	return 0;
+}
+
+SEC("syscall")
+int test_lvqueue_steal_empty(void)
+{
+	u64 val;
+	int ret;
+
+	lv_queue_t *lvq = lvq_create();
+
+	if (!lvq)
+		return 1;
+
+	ret = lvq_steal(lvq, &val);
+	if (ret != -ENOENT)
+		return 1;
+
+	lvq_destroy(lvq);
+
+	return 0;
+}
+
+SEC("syscall")
+int test_lvqueue_steal_one(void)
+{
+	u64 val, newval;
+	int ret, i;
+
+	lv_queue_t *lvq = lvq_create();
+
+	if (!lvq)
+		return 1;
+
+	for (i = 0; i < 10 && can_loop; i++) {
+		val = i;
+
+		ret = lvq_owner_push(lvq, val);
+		if (ret)
+			return 1;
+
+		ret = lvq_steal(lvq, &newval);
+		if (ret)
+			return 2;
+
+		if (val != newval)
+			return 3;
+	}
+
+	lvq_destroy(lvq);
+
+	return 0;
+}
+
+SEC("syscall")
+int test_lvqueue_pop_one(void)
+{
+	u64 val, newval;
+	int ret, i;
+
+	lv_queue_t *lvq = lvq_create();
+
+	if (!lvq)
+		return 1;
+
+	for (i = 0; i < 10 && can_loop; i++) {
+		val = i;
+
+		ret = lvq_owner_push(lvq, val);
+		if (ret)
+			return 1;
+
+		ret = lvq_owner_pop(lvq, &newval);
+		if (ret)
+			return 2;
+
+		if (val != newval)
+			return 3;
+	}
+
+	lvq_destroy(lvq);
+
+	return 0;
+}
+
+SEC("syscall")
+int test_lvqueue_pop_many(void)
+{
+	u64 val, newval;
+	int ret, i;
+	u64 expected;
+
+	lv_queue_t *lvq = lvq_create();
+
+	if (!lvq)
+		return 1;
+
+	for (i = 0; i < 500 && can_loop; i++) {
+		val = i;
+
+		ret = lvq_owner_push(lvq, val);
+		if (ret) {
+			arena_stderr("%s:%d error %d\n", __func__, __LINE__, ret);
+			return 1;
+		}
+	}
+
+	for (i = 0; i < 500 && can_loop; i++) {
+		ret = lvq_owner_pop(lvq, &newval);
+		if (ret) {
+			arena_stderr("%s:%d error %d\n", __func__, __LINE__, ret);
+			return 1;
+		}
+
+		expected = 500 - 1 - i;
+		if (newval != expected) {
+			arena_stderr("%s:%d expected %lu found %lu\n", __func__, __LINE__, expected, newval);
+			return 1;
+		}
+	}
+
+	lvq_destroy(lvq);
+
+	return 0;
+}
+
+SEC("syscall")
+int test_lvqueue_steal_many(void)
+{
+	u64 val, newval;
+	int ret, i;
+
+	lv_queue_t *lvq = lvq_create();
+
+	if (!lvq)
+		return 1;
+
+	for (i = 0; i < 500 && can_loop; i++) {
+		val = i;
+
+		ret = lvq_owner_push(lvq, val);
+		if (ret) {
+			arena_stderr("%s:%d error %d\n", __func__, __LINE__, ret);
+			return 1;
+		}
+	}
+
+	for (i = 0; i < 500 && can_loop; i++) {
+		ret = lvq_steal(lvq, &newval);
+		if (ret) {
+			arena_stderr("%s:%d error %d\n", __func__, __LINE__, ret);
+			return 1;
+		}
+
+		if (newval != i) {
+			arena_stderr("%s:%d expected %lu found %lu\n", __func__, __LINE__, i, newval);
+			return 1;
+		}
+	}
+
+	lvq_destroy(lvq);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/libarena/src/lvqueue.bpf.c b/tools/testing/selftests/bpf/libarena/src/lvqueue.bpf.c
new file mode 100644
index 000000000000..b93c4f9d1c92
--- /dev/null
+++ b/tools/testing/selftests/bpf/libarena/src/lvqueue.bpf.c
@@ -0,0 +1,241 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/*
+ * Copyright (c) 2025-2026 Meta Platforms, Inc. and affiliates.
+ * Copyright (c) 2025-2026 Emil Tsalapatis <etsal@meta.com>
+ */
+
+#include <bpf_atomic.h>
+
+#include <libarena/common.h>
+
+#include <libarena/asan.h>
+#include <libarena/lvqueue.h>
+
+static inline
+u64 lv_arr_size(lv_arr_t *lv_arr)
+{
+	return LV_ARR_BASESZ << READ_ONCE(lv_arr->order);
+}
+
+static inline
+u64 lv_arr_get(lv_arr_t *lv_arr, u64 ind)
+{
+	u64 ret = READ_ONCE(lv_arr->data[ind % lv_arr_size(lv_arr)]);
+
+	return ret;
+}
+
+static inline
+void lv_arr_put(lv_arr_t *lv_arr, u64 ind, u64 value)
+{
+	WRITE_ONCE(lv_arr->data[ind % lv_arr_size(lv_arr)], value);
+}
+
+static inline
+void lv_arr_copy(lv_arr_t *dst, lv_arr_t *src, u64 b, u64 t)
+{
+	u64 i;
+
+	for (i = t; i < b && can_loop; i++)
+		lv_arr_put(dst, i, lv_arr_get(src, i));
+}
+
+static inline
+int lvq_order_init(lv_queue_t *lvq __arg_arena, int order)
+{
+	lv_arr_t *arr = &lvq->arr[order];
+
+	if (unlikely(!lvq))
+		return -EINVAL;
+
+	if (order >= LV_ARR_ORDERS)
+		return -E2BIG;
+
+	/* Already allocated? */
+	if (arr->data)
+		return 0;
+
+	arr->data = (u64 __arena *)malloc((LV_ARR_BASESZ << order) * sizeof(*arr->data));
+	if (!arr->data)
+		return -ENOMEM;
+
+	return 0;
+}
+
+__weak
+int lvq_owner_push(lv_queue_t *lvq __arg_arena, u64 val)
+{
+	volatile u64 b, t;
+	lv_arr_t *newarr;
+	lv_arr_t *arr;
+	ssize_t sz;
+	int ret;
+
+	if (unlikely(!lvq))
+		return -EINVAL;
+
+	b = smp_load_acquire(&lvq->bottom);
+
+	/*
+	 * In this call, loads from bottom and top should be
+	 * in this order specifically (also see lvq_steal()).
+	 */
+	smp_rmb();
+
+	t = READ_ONCE(lvq->top);
+	arr = READ_ONCE(lvq->cur);
+
+	sz = b - t;
+	if (sz >= lv_arr_size(arr) - 1) {
+		ret = lvq_order_init(lvq, arr->order + 1);
+		if (ret)
+			return ret;
+
+		newarr = &lvq->arr[arr->order + 1];
+
+		lv_arr_copy(newarr, arr, b, t);
+		smp_store_release(&lvq->cur, newarr);
+	}
+
+	lv_arr_put(lvq->cur, b, val);
+	smp_store_release(&lvq->bottom, b + 1);
+
+	return 0;
+}
+
+
+__weak
+int lvq_owner_pop(lv_queue_t *lvq __arg_arena, u64 *val)
+{
+	lv_arr_t *arr;
+	volatile u64 b, t;
+	int ret = 0;
+	ssize_t sz;
+	u64 value;
+
+	if (unlikely(!lvq || !val))
+		return -EINVAL;
+
+	arr = smp_load_acquire(&lvq->cur);
+
+	b = READ_ONCE(lvq->bottom);
+	b -= 1;
+
+	WRITE_ONCE(lvq->bottom, b);
+
+	smp_mb();
+
+	t = READ_ONCE(lvq->top);
+	sz = b - t;
+	if (sz < 0) {
+		smp_store_release(&lvq->bottom, t);
+		return -ENOENT;
+	}
+
+	value = lv_arr_get(arr, b);
+	if (sz > 0) {
+		*val = value;
+		return 0;
+	}
+
+	if (cmpxchg(&lvq->top, t, t + 1) != t)
+		ret = -EAGAIN;
+
+	smp_store_release(&lvq->bottom, t + 1);
+
+	if (ret)
+		return ret;
+
+	*val = value;
+
+	return 0;
+}
+
+__weak
+int lvq_steal(lv_queue_t *lvq __arg_arena, u64 *val)
+{
+	volatile u64 b, t;
+	lv_arr_t *arr;
+	ssize_t sz;
+	u64 value;
+
+	if (unlikely(!lvq || !val))
+		return -EINVAL;
+
+	t = smp_load_acquire(&lvq->top);
+
+	/*
+	 * It is important that t is read before b for
+	 * stealers to avoid racing with the owner.
+	 * Races between stealers are dealt with using
+	 * CAS to increment the top value below.
+	 */
+	smp_rmb();
+
+	b = READ_ONCE(lvq->bottom);
+	arr = READ_ONCE(lvq->cur);
+
+	sz = b - t;
+	if (sz <= 0)
+		return -ENOENT;
+
+	value = lv_arr_get(arr, t);
+
+	if (cmpxchg(&lvq->top, t, t + 1) != t)
+		return -EAGAIN;
+
+	smp_store_release(val, value);
+
+	return 0;
+}
+
+
+__weak
+u64 lvq_create_internal(void)
+{
+	/*
+	 * Marked as volatile because otherwise the array
+	 * reference in the internal loop gets demoted to
+	 * scalar and the program fails verification.
+	 */
+	volatile lv_queue_t *lvq;
+	int ret, i;
+
+	lvq = malloc(sizeof(*lvq));
+	if (!lvq)
+		return (u64)NULL;
+
+	WRITE_ONCE(lvq->bottom, 0);
+	WRITE_ONCE(lvq->top, 0);
+
+	for (i = 0; i < LV_ARR_ORDERS && can_loop; i++) {
+		lvq->arr[i].data = NULL;
+		lvq->arr[i].order = i;
+	}
+
+	ret = lvq_order_init((lv_queue_t *)lvq, 0);
+	if (ret) {
+		free(lvq);
+		return (u64)NULL;
+	}
+
+	smp_store_release(&lvq->cur, &lvq->arr[0]);
+
+	return (u64)(lvq);
+}
+
+__weak
+int lvq_destroy(lv_queue_t *lvq __arg_arena)
+{
+	int i;
+
+	if (unlikely(!lvq))
+		return -EINVAL;
+
+	for (i = 0; i < LV_ARR_ORDERS && can_loop; i++)
+		free(lvq->arr[i].data);
+
+	free(lvq);
+
+	return 0;
+}
-- 
2.54.0


      parent reply	other threads:[~2026-05-11 21:07 UTC|newest]

Thread overview: 3+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-05-11 21:07 [PATCH bpf-next 0/2] selftests/bpf: libarena: Add initial data structures Emil Tsalapatis
2026-05-11 21:07 ` [PATCH bpf-next 1/2] selftests/bpf: libarena: Add rbtree data structure Emil Tsalapatis
2026-05-11 21:07 ` Emil Tsalapatis [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260511210740.5395-3-emil@etsalapatis.com \
    --to=emil@etsalapatis.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=mattbobfrowski@google.com \
    --cc=memxor@gmail.com \
    --cc=song@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox