BPF List
 help / color / mirror / Atom feed
* [PATCH v3 bpf-next 1/2] bpf: Add bits iterator
@ 2024-03-05  6:43 Yafang Shao
  2024-03-05  6:43 ` [PATCH v3 bpf-next 2/2] selftests/bpf: Add selftest for bits iter Yafang Shao
  2024-03-05 22:03 ` [PATCH v3 bpf-next 1/2] bpf: Add bits iterator Andrii Nakryiko
  0 siblings, 2 replies; 6+ messages in thread
From: Yafang Shao @ 2024-03-05  6:43 UTC (permalink / raw)
  To: ast, daniel, john.fastabend, andrii, martin.lau, eddyz87, song,
	yonghong.song, kpsingh, sdf, haoluo, jolsa
  Cc: bpf, Yafang Shao

Add three new kfuncs for the bits iterator:
- bpf_iter_bits_new
  Initialize a new bits iterator for a given memory area. Due to the
  limitation of bpf memalloc, the max number of bits that can be iterated
  over is limited to (4096 * 8).
- bpf_iter_bits_next
  Get the next bit in a bpf_iter_bits
- bpf_iter_bits_destroy
  Destroy a bpf_iter_bits

The bits iterator facilitates the iteration of the bits of a memory area,
such as cpumask. It can be used in any context and on any address.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 kernel/bpf/helpers.c | 117 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a89587859571..a769561b65ae 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -2545,6 +2545,120 @@ __bpf_kfunc void bpf_throw(u64 cookie)
 	WARN(1, "A call to BPF exception callback should never return\n");
 }
 
+struct bpf_iter_bits {
+	__u64 __opaque[2];
+} __aligned(8);
+
+struct bpf_iter_bits_kern {
+	union {
+		unsigned long *bits;
+		unsigned long bits_copy;
+	};
+	u32 nr_bits;
+	int bit;
+} __aligned(8);
+
+/**
+ * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
+ * @it: The new bpf_iter_bits to be created
+ * @unsafe_ptr__ign: A ponter pointing to a memory area to be iterated over
+ * @nr_bits: The number of bits to be iterated over. Due to the limitation of
+ * memalloc, it can't greater than (4096 * 8).
+ *
+ * This function initializes a new bpf_iter_bits structure for iterating over
+ * a memory area which is specified by the @unsafe_ptr__ign and @nr_bits. It
+ * copy the data of the memory area to the newly created bpf_iter_bits @it for
+ * subsequent iteration operations.
+ *
+ * On success, 0 is returned. On failure, ERR is returned.
+ */
+__bpf_kfunc int
+bpf_iter_bits_new(struct bpf_iter_bits *it, const void *unsafe_ptr__ign, u32 nr_bits)
+{
+	struct bpf_iter_bits_kern *kit = (void *)it;
+	u32 size = BITS_TO_BYTES(nr_bits);
+	int err;
+
+	BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits));
+	BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) !=
+		     __alignof__(struct bpf_iter_bits));
+
+	if (!unsafe_ptr__ign || !nr_bits) {
+		kit->bits = NULL;
+		return -EINVAL;
+	}
+
+	kit->nr_bits = 0;
+	/* Optimization for u64/u32 mask */
+	if (nr_bits <= 64) {
+		err = bpf_probe_read_kernel_common(&kit->bits_copy, size, unsafe_ptr__ign);
+		if (unlikely(err))
+			return -EFAULT;
+
+		kit->nr_bits = nr_bits;
+		kit->bit = -1;
+		return 0;
+	}
+
+	/* Fallback to memalloc */
+	kit->bits = bpf_mem_alloc(&bpf_global_ma, size);
+	if (!kit->bits)
+		return -ENOMEM;
+
+	err = bpf_probe_read_kernel_common(kit->bits, size, unsafe_ptr__ign);
+	if (err) {
+		bpf_mem_free(&bpf_global_ma, kit->bits);
+		return err;
+	}
+
+	kit->nr_bits = nr_bits;
+	kit->bit = -1;
+	return 0;
+}
+
+/**
+ * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits
+ * @it: The bpf_iter_bits to be checked
+ *
+ * This function returns a pointer to a number representing the value of the
+ * next bit in the bits.
+ *
+ * If there are no further bit available, it returns NULL.
+ */
+__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
+{
+	struct bpf_iter_bits_kern *kit = (void *)it;
+	u32 nr_bits = kit->nr_bits;
+	const unsigned long *bits;
+	int bit;
+
+	if (nr_bits == 0)
+		return NULL;
+
+	bits = nr_bits <= 64 ? &kit->bits_copy : kit->bits;
+	bit = find_next_bit(bits, kit->nr_bits, kit->bit + 1);
+	if (bit >= kit->nr_bits)
+		return NULL;
+
+	kit->bit = bit;
+	return &kit->bit;
+}
+
+/**
+ * bpf_iter_bits_destroy() - Destroy a bpf_iter_bits
+ * @it: The bpf_iter_bits to be destroyed
+ *
+ * Destroy the resource associated with the bpf_iter_bits.
+ */
+__bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
+{
+	struct bpf_iter_bits_kern *kit = (void *)it;
+
+	if (kit->nr_bits <= 64)
+		return;
+	bpf_mem_free(&bpf_global_ma, kit->bits);
+}
+
 __bpf_kfunc_end_defs();
 
 BTF_KFUNCS_START(generic_btf_ids)
@@ -2621,6 +2735,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
 BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
 BTF_ID_FLAGS(func, bpf_dynptr_size)
 BTF_ID_FLAGS(func, bpf_dynptr_clone)
+BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
+BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
 BTF_KFUNCS_END(common_btf_ids)
 
 static const struct btf_kfunc_id_set common_kfunc_set = {
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* [PATCH v3 bpf-next 2/2] selftests/bpf: Add selftest for bits iter
  2024-03-05  6:43 [PATCH v3 bpf-next 1/2] bpf: Add bits iterator Yafang Shao
@ 2024-03-05  6:43 ` Yafang Shao
  2024-03-05 22:05   ` Andrii Nakryiko
  2024-03-05 22:03 ` [PATCH v3 bpf-next 1/2] bpf: Add bits iterator Andrii Nakryiko
  1 sibling, 1 reply; 6+ messages in thread
From: Yafang Shao @ 2024-03-05  6:43 UTC (permalink / raw)
  To: ast, daniel, john.fastabend, andrii, martin.lau, eddyz87, song,
	yonghong.song, kpsingh, sdf, haoluo, jolsa
  Cc: bpf, Yafang Shao

Add selftests for the newly added bits iter.
- bits_iter_success
  - percpu data extracted from the percpu struct should be expected
  - RCU lock is not required
  - It is fine without calling bpf_iter_cpumask_next()
  - It can work as expected when invalid arguments are passed

- bits_iter_failure
  - bpf_iter_bits_destroy() is required after calling
    bpf_iter_bits_new()
  - bpf_iter_bits_destroy() can only destroy an initialized iter
  - bpf_iter_bits_next() must use an initialized iter

This test case can't work correctly on s390x for unknonw reason, thus it
is added to DENYLIST.s390x.

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 tools/testing/selftests/bpf/DENYLIST.s390x    |   3 +-
 .../selftests/bpf/prog_tests/bits_iter.c      | 137 ++++++++++++++++++
 .../bpf/progs/test_bits_iter_failure.c        |  54 +++++++
 .../bpf/progs/test_bits_iter_success.c        | 122 ++++++++++++++++
 4 files changed, 315 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bits_iter.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_bits_iter_failure.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_bits_iter_success.c

diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
index 1a63996c0304..0cd6d2bf1ff4 100644
--- a/tools/testing/selftests/bpf/DENYLIST.s390x
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -1,5 +1,6 @@
 # TEMPORARY
 # Alphabetical order
-exceptions				 # JIT does not support calling kfunc bpf_throw				       (exceptions)
+bits_iter                                # cpumask iter can't work as expected                                         (?)
+exceptions                               # JIT does not support calling kfunc bpf_throw                                (exceptions)
 get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
 stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)
diff --git a/tools/testing/selftests/bpf/prog_tests/bits_iter.c b/tools/testing/selftests/bpf/prog_tests/bits_iter.c
new file mode 100644
index 000000000000..ff4f921c91c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bits_iter.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <test_progs.h>
+#include "test_bits_iter_success.skel.h"
+#include "test_bits_iter_failure.skel.h"
+#include "cgroup_helpers.h"
+
+static const char * const positive_testcases[] = {
+	"cpumask_memalloc",
+	"cpumask_copy",
+};
+
+static const char * const negative_testcases[] = {
+	"null_pointer",
+	"zero_bit",
+	"no_mem",
+};
+
+static int read_percpu_data(struct bpf_link *link)
+{
+	int iter_fd, len;
+	char buf[128];
+	size_t left;
+	char *p;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (!ASSERT_GE(iter_fd, 0, "iter_fd"))
+		return -1;
+
+	memset(buf, 0, sizeof(buf));
+	left = ARRAY_SIZE(buf);
+	p = buf;
+	while ((len = read(iter_fd, p, left)) > 0) {
+		p += len;
+		left -= len;
+	}
+
+	close(iter_fd);
+	return 0;
+}
+
+static void verify_iter_success(const char *prog_name, bool negative)
+{
+	DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+	struct test_bits_iter_success *skel;
+	union bpf_iter_link_info linfo;
+	int cgrp_fd, err, i, nr_cpus;
+	struct bpf_program *prog;
+	struct bpf_link *link;
+	cpu_set_t set;
+
+	if (setup_cgroup_environment())
+		return;
+
+	/* Utilize the cgroup iter */
+	cgrp_fd = get_root_cgroup();
+	if (!ASSERT_GE(cgrp_fd, 0, "create_cgrp"))
+		goto cleanup;
+
+	skel = test_bits_iter_success__open();
+	if (!ASSERT_OK_PTR(skel, "cpumask_iter_success__open"))
+		goto close_fd;
+
+	skel->bss->pid = getpid();
+
+	err = test_bits_iter_success__load(skel);
+	if (!ASSERT_OK(err, "cpumask_iter_success__load"))
+		goto destroy;
+
+	prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+	if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+		goto destroy;
+
+	memset(&linfo, 0, sizeof(linfo));
+	linfo.cgroup.cgroup_fd = cgrp_fd;
+	linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+	opts.link_info = &linfo;
+	opts.link_info_len = sizeof(linfo);
+	link = bpf_program__attach_iter(prog, &opts);
+	if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+		goto destroy;
+
+	if (negative)
+		goto negative;
+
+	CPU_ZERO(&set);
+	nr_cpus = libbpf_num_possible_cpus();
+	/* To guarantee the success of "cpumask_copy" at all times */
+	if (nr_cpus > 16)
+		nr_cpus = 16;
+	for (i = 0; i < nr_cpus; i++)
+		CPU_SET(i, &set);
+	err = sched_setaffinity(skel->bss->pid, sizeof(set), &set);
+	if (!ASSERT_OK(err, "setaffinity_all_cpus"))
+		goto free_link;
+	err = read_percpu_data(link);
+	if (!ASSERT_OK(err, "read_percpu_data"))
+		goto free_link;
+
+negative:
+	ASSERT_OK(skel->bss->err, "not_zero");
+
+free_link:
+	bpf_link__destroy(link);
+destroy:
+	test_bits_iter_success__destroy(skel);
+close_fd:
+	close(cgrp_fd);
+cleanup:
+	cleanup_cgroup_environment();
+}
+
+void test_bits_iter(void)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(positive_testcases); i++) {
+		if (!test__start_subtest(positive_testcases[i]))
+			continue;
+
+		verify_iter_success(positive_testcases[i], false);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(negative_testcases); i++) {
+		if (!test__start_subtest(negative_testcases[i]))
+			continue;
+
+		verify_iter_success(negative_testcases[i], true);
+	}
+
+	RUN_TESTS(test_bits_iter_success);
+	RUN_TESTS(test_bits_iter_failure);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_bits_iter_failure.c b/tools/testing/selftests/bpf/progs/test_bits_iter_failure.c
new file mode 100644
index 000000000000..974d0b7a540e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bits_iter_failure.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int bpf_iter_bits_new(struct bpf_iter_bits *it, const void *unsafe_ptr__ign,
+		      u32 nr_bits) __ksym __weak;
+int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak;
+void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak;
+
+SEC("iter.s/cgroup")
+__failure __msg("Unreleased reference id=3 alloc_insn=10")
+int BPF_PROG(no_destroy, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	struct bpf_iter_bits it;
+	struct task_struct *p;
+
+	p = bpf_task_from_pid(1);
+	if (!p)
+		return 1;
+
+	bpf_iter_bits_new(&it, p->cpus_ptr, 8192);
+
+	bpf_iter_bits_next(&it);
+	bpf_task_release(p);
+	return 0;
+}
+
+SEC("iter/cgroup")
+__failure __msg("expected an initialized iter_bits as arg #1")
+int BPF_PROG(next_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	struct bpf_iter_bits *it = NULL;
+
+	bpf_iter_bits_next(it);
+	return 0;
+}
+
+SEC("iter/cgroup")
+__failure __msg("expected an initialized iter_bits as arg #1")
+int BPF_PROG(destroy_uninit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	struct bpf_iter_bits it = {};
+
+	bpf_iter_bits_destroy(&it);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_bits_iter_success.c b/tools/testing/selftests/bpf/progs/test_bits_iter_success.c
new file mode 100644
index 000000000000..77081204dec3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bits_iter_success.c
@@ -0,0 +1,122 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2024 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <linux/const.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+extern const struct rq runqueues __ksym __weak;
+
+int bpf_iter_bits_new(struct bpf_iter_bits *it, const void *unsafe_ptr__ign,
+		      u32 nr_bits) __ksym __weak;
+int *bpf_iter_bits_next(struct bpf_iter_bits *it) __ksym __weak;
+void bpf_iter_bits_destroy(struct bpf_iter_bits *it) __ksym __weak;
+
+int pid, err;
+
+static int cpumask_iter(struct bpf_iter_meta *meta, struct cgroup *cgrp, u32 nr_cpus)
+{
+	struct task_struct *p;
+	u32 nr_running = 0;
+	struct rq *rq;
+	int *cpu;
+
+	/* epilogue */
+	if (!cgrp)
+		return 0;
+
+	p = bpf_task_from_pid(pid);
+	if (!p)
+		return 1;
+
+	bpf_for_each(bits, cpu, p->cpus_ptr, nr_cpus) {
+		rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, *cpu);
+		/* Every valid CPU should possess a runqueue, even in the event of being offline */
+		if (!rq)
+			break;
+		nr_running += rq->nr_running;
+	}
+	if (nr_running == 0)
+		err = 1;
+	bpf_task_release(p);
+	return 0;
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(cpumask_memalloc, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	return cpumask_iter(meta, cgrp, 128);
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(cpumask_copy, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	return cpumask_iter(meta, cgrp, 16);
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(null_pointer, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	int *cpu;
+
+	bpf_for_each(bits, cpu, NULL, 8192)
+		err++;
+	return 0;
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(zero_bit, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	struct task_struct *p;
+	int *cpu;
+
+	p = bpf_task_from_pid(pid);
+	if (!p)
+		return 1;
+
+	bpf_for_each(bits, cpu, p->cpus_ptr, 0)
+		err++;
+	bpf_task_release(p);
+	return 0;
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(no_mem, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	struct task_struct *p;
+	int *cpu;
+
+	p = bpf_task_from_pid(pid);
+	if (!p)
+		return 1;
+
+	/* The max number of memalloc is 4096, thus it will fail to allocate (8192 * 8) */
+	bpf_for_each(bits, cpu, p->cpus_ptr, 8192 * 8)
+		err++;
+	bpf_task_release(p);
+	return 0;
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(no_next, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+	struct bpf_iter_bits it;
+	struct task_struct *p;
+
+	p = bpf_task_from_pid(1);
+	if (!p)
+		return 1;
+
+	bpf_iter_bits_new(&it, p->cpus_ptr, 8192);
+
+	/* It functions properly without invoking bpf_iter_bits_next(). */
+
+	bpf_iter_bits_destroy(&it);
+	bpf_task_release(p);
+	return 0;
+}
-- 
2.39.1


^ permalink raw reply related	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 bpf-next 1/2] bpf: Add bits iterator
  2024-03-05  6:43 [PATCH v3 bpf-next 1/2] bpf: Add bits iterator Yafang Shao
  2024-03-05  6:43 ` [PATCH v3 bpf-next 2/2] selftests/bpf: Add selftest for bits iter Yafang Shao
@ 2024-03-05 22:03 ` Andrii Nakryiko
  2024-03-06  2:23   ` Yafang Shao
  1 sibling, 1 reply; 6+ messages in thread
From: Andrii Nakryiko @ 2024-03-05 22:03 UTC (permalink / raw)
  To: Yafang Shao
  Cc: ast, daniel, john.fastabend, andrii, martin.lau, eddyz87, song,
	yonghong.song, kpsingh, sdf, haoluo, jolsa, bpf

On Mon, Mar 4, 2024 at 10:44 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> Add three new kfuncs for the bits iterator:
> - bpf_iter_bits_new
>   Initialize a new bits iterator for a given memory area. Due to the
>   limitation of bpf memalloc, the max number of bits that can be iterated
>   over is limited to (4096 * 8).
> - bpf_iter_bits_next
>   Get the next bit in a bpf_iter_bits
> - bpf_iter_bits_destroy
>   Destroy a bpf_iter_bits
>
> The bits iterator facilitates the iteration of the bits of a memory area,
> such as cpumask. It can be used in any context and on any address.
>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  kernel/bpf/helpers.c | 117 +++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 117 insertions(+)
>
> diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> index a89587859571..a769561b65ae 100644
> --- a/kernel/bpf/helpers.c
> +++ b/kernel/bpf/helpers.c
> @@ -2545,6 +2545,120 @@ __bpf_kfunc void bpf_throw(u64 cookie)
>         WARN(1, "A call to BPF exception callback should never return\n");
>  }
>
> +struct bpf_iter_bits {
> +       __u64 __opaque[2];
> +} __aligned(8);
> +
> +struct bpf_iter_bits_kern {
> +       union {
> +               unsigned long *bits;
> +               unsigned long bits_copy;
> +       };
> +       u32 nr_bits;
> +       int bit;
> +} __aligned(8);
> +
> +/**
> + * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
> + * @it: The new bpf_iter_bits to be created
> + * @unsafe_ptr__ign: A ponter pointing to a memory area to be iterated over
> + * @nr_bits: The number of bits to be iterated over. Due to the limitation of
> + * memalloc, it can't greater than (4096 * 8).
> + *
> + * This function initializes a new bpf_iter_bits structure for iterating over
> + * a memory area which is specified by the @unsafe_ptr__ign and @nr_bits. It
> + * copy the data of the memory area to the newly created bpf_iter_bits @it for
> + * subsequent iteration operations.
> + *
> + * On success, 0 is returned. On failure, ERR is returned.
> + */
> +__bpf_kfunc int
> +bpf_iter_bits_new(struct bpf_iter_bits *it, const void *unsafe_ptr__ign, u32 nr_bits)
> +{
> +       struct bpf_iter_bits_kern *kit = (void *)it;
> +       u32 size = BITS_TO_BYTES(nr_bits);
> +       int err;
> +
> +       BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits));
> +       BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) !=
> +                    __alignof__(struct bpf_iter_bits));
> +
> +       if (!unsafe_ptr__ign || !nr_bits) {
> +               kit->bits = NULL;
> +               return -EINVAL;
> +       }
> +
> +       kit->nr_bits = 0;
> +       /* Optimization for u64/u32 mask */
> +       if (nr_bits <= 64) {
> +               err = bpf_probe_read_kernel_common(&kit->bits_copy, size, unsafe_ptr__ign);

it probably would be better to zero-initialize kit->bits_copy?

but also, please check that this works on big-endian right

> +               if (unlikely(err))
> +                       return -EFAULT;
> +
> +               kit->nr_bits = nr_bits;
> +               kit->bit = -1;
> +               return 0;
> +       }
> +
> +       /* Fallback to memalloc */
> +       kit->bits = bpf_mem_alloc(&bpf_global_ma, size);
> +       if (!kit->bits)
> +               return -ENOMEM;
> +
> +       err = bpf_probe_read_kernel_common(kit->bits, size, unsafe_ptr__ign);
> +       if (err) {
> +               bpf_mem_free(&bpf_global_ma, kit->bits);
> +               return err;
> +       }
> +
> +       kit->nr_bits = nr_bits;
> +       kit->bit = -1;
> +       return 0;
> +}
> +
> +/**
> + * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits
> + * @it: The bpf_iter_bits to be checked
> + *
> + * This function returns a pointer to a number representing the value of the
> + * next bit in the bits.
> + *
> + * If there are no further bit available, it returns NULL.
> + */
> +__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
> +{
> +       struct bpf_iter_bits_kern *kit = (void *)it;
> +       u32 nr_bits = kit->nr_bits;
> +       const unsigned long *bits;
> +       int bit;
> +
> +       if (nr_bits == 0)
> +               return NULL;
> +
> +       bits = nr_bits <= 64 ? &kit->bits_copy : kit->bits;
> +       bit = find_next_bit(bits, kit->nr_bits, kit->bit + 1);
> +       if (bit >= kit->nr_bits)

zero out nr_bits so subsequent next() calls keep returning NULL
(sticky iterator behavior)

> +               return NULL;
> +
> +       kit->bit = bit;
> +       return &kit->bit;
> +}
> +
> +/**
> + * bpf_iter_bits_destroy() - Destroy a bpf_iter_bits
> + * @it: The bpf_iter_bits to be destroyed
> + *
> + * Destroy the resource associated with the bpf_iter_bits.
> + */
> +__bpf_kfunc void bpf_iter_bits_destroy(struct bpf_iter_bits *it)
> +{
> +       struct bpf_iter_bits_kern *kit = (void *)it;
> +
> +       if (kit->nr_bits <= 64)
> +               return;
> +       bpf_mem_free(&bpf_global_ma, kit->bits);
> +}
> +
>  __bpf_kfunc_end_defs();
>
>  BTF_KFUNCS_START(generic_btf_ids)
> @@ -2621,6 +2735,9 @@ BTF_ID_FLAGS(func, bpf_dynptr_is_null)
>  BTF_ID_FLAGS(func, bpf_dynptr_is_rdonly)
>  BTF_ID_FLAGS(func, bpf_dynptr_size)
>  BTF_ID_FLAGS(func, bpf_dynptr_clone)
> +BTF_ID_FLAGS(func, bpf_iter_bits_new, KF_ITER_NEW)
> +BTF_ID_FLAGS(func, bpf_iter_bits_next, KF_ITER_NEXT | KF_RET_NULL)
> +BTF_ID_FLAGS(func, bpf_iter_bits_destroy, KF_ITER_DESTROY)
>  BTF_KFUNCS_END(common_btf_ids)
>
>  static const struct btf_kfunc_id_set common_kfunc_set = {
> --
> 2.39.1
>

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 bpf-next 2/2] selftests/bpf: Add selftest for bits iter
  2024-03-05  6:43 ` [PATCH v3 bpf-next 2/2] selftests/bpf: Add selftest for bits iter Yafang Shao
@ 2024-03-05 22:05   ` Andrii Nakryiko
  2024-03-06  2:24     ` Yafang Shao
  0 siblings, 1 reply; 6+ messages in thread
From: Andrii Nakryiko @ 2024-03-05 22:05 UTC (permalink / raw)
  To: Yafang Shao
  Cc: ast, daniel, john.fastabend, andrii, martin.lau, eddyz87, song,
	yonghong.song, kpsingh, sdf, haoluo, jolsa, bpf

On Mon, Mar 4, 2024 at 10:44 PM Yafang Shao <laoar.shao@gmail.com> wrote:
>
> Add selftests for the newly added bits iter.
> - bits_iter_success
>   - percpu data extracted from the percpu struct should be expected
>   - RCU lock is not required
>   - It is fine without calling bpf_iter_cpumask_next()
>   - It can work as expected when invalid arguments are passed
>
> - bits_iter_failure
>   - bpf_iter_bits_destroy() is required after calling
>     bpf_iter_bits_new()
>   - bpf_iter_bits_destroy() can only destroy an initialized iter
>   - bpf_iter_bits_next() must use an initialized iter
>
> This test case can't work correctly on s390x for unknonw reason, thus it
> is added to DENYLIST.s390x.

That's an unusual way of handling "doesn't work for unknown reason" :)
This might be an endianness issue I pointed out in patch #1.

pw-bot: cr

>
> Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> ---
>  tools/testing/selftests/bpf/DENYLIST.s390x    |   3 +-
>  .../selftests/bpf/prog_tests/bits_iter.c      | 137 ++++++++++++++++++
>  .../bpf/progs/test_bits_iter_failure.c        |  54 +++++++
>  .../bpf/progs/test_bits_iter_success.c        | 122 ++++++++++++++++
>  4 files changed, 315 insertions(+), 1 deletion(-)
>  create mode 100644 tools/testing/selftests/bpf/prog_tests/bits_iter.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_bits_iter_failure.c
>  create mode 100644 tools/testing/selftests/bpf/progs/test_bits_iter_success.c
>
> diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
> index 1a63996c0304..0cd6d2bf1ff4 100644
> --- a/tools/testing/selftests/bpf/DENYLIST.s390x
> +++ b/tools/testing/selftests/bpf/DENYLIST.s390x
> @@ -1,5 +1,6 @@
>  # TEMPORARY
>  # Alphabetical order
> -exceptions                              # JIT does not support calling kfunc bpf_throw                                (exceptions)
> +bits_iter                                # cpumask iter can't work as expected                                         (?)
> +exceptions                               # JIT does not support calling kfunc bpf_throw                                (exceptions)
>  get_stack_raw_tp                         # user_stack corrupted user stack                                             (no backchain userspace)
>  stacktrace_build_id                      # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2                   (?)

[...]

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 bpf-next 1/2] bpf: Add bits iterator
  2024-03-05 22:03 ` [PATCH v3 bpf-next 1/2] bpf: Add bits iterator Andrii Nakryiko
@ 2024-03-06  2:23   ` Yafang Shao
  0 siblings, 0 replies; 6+ messages in thread
From: Yafang Shao @ 2024-03-06  2:23 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: ast, daniel, john.fastabend, andrii, martin.lau, eddyz87, song,
	yonghong.song, kpsingh, sdf, haoluo, jolsa, bpf

On Wed, Mar 6, 2024 at 6:04 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Mar 4, 2024 at 10:44 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > Add three new kfuncs for the bits iterator:
> > - bpf_iter_bits_new
> >   Initialize a new bits iterator for a given memory area. Due to the
> >   limitation of bpf memalloc, the max number of bits that can be iterated
> >   over is limited to (4096 * 8).
> > - bpf_iter_bits_next
> >   Get the next bit in a bpf_iter_bits
> > - bpf_iter_bits_destroy
> >   Destroy a bpf_iter_bits
> >
> > The bits iterator facilitates the iteration of the bits of a memory area,
> > such as cpumask. It can be used in any context and on any address.
> >
> > Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
> > ---
> >  kernel/bpf/helpers.c | 117 +++++++++++++++++++++++++++++++++++++++++++
> >  1 file changed, 117 insertions(+)
> >
> > diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
> > index a89587859571..a769561b65ae 100644
> > --- a/kernel/bpf/helpers.c
> > +++ b/kernel/bpf/helpers.c
> > @@ -2545,6 +2545,120 @@ __bpf_kfunc void bpf_throw(u64 cookie)
> >         WARN(1, "A call to BPF exception callback should never return\n");
> >  }
> >
> > +struct bpf_iter_bits {
> > +       __u64 __opaque[2];
> > +} __aligned(8);
> > +
> > +struct bpf_iter_bits_kern {
> > +       union {
> > +               unsigned long *bits;
> > +               unsigned long bits_copy;
> > +       };
> > +       u32 nr_bits;
> > +       int bit;
> > +} __aligned(8);
> > +
> > +/**
> > + * bpf_iter_bits_new() - Initialize a new bits iterator for a given memory area
> > + * @it: The new bpf_iter_bits to be created
> > + * @unsafe_ptr__ign: A ponter pointing to a memory area to be iterated over
> > + * @nr_bits: The number of bits to be iterated over. Due to the limitation of
> > + * memalloc, it can't greater than (4096 * 8).
> > + *
> > + * This function initializes a new bpf_iter_bits structure for iterating over
> > + * a memory area which is specified by the @unsafe_ptr__ign and @nr_bits. It
> > + * copy the data of the memory area to the newly created bpf_iter_bits @it for
> > + * subsequent iteration operations.
> > + *
> > + * On success, 0 is returned. On failure, ERR is returned.
> > + */
> > +__bpf_kfunc int
> > +bpf_iter_bits_new(struct bpf_iter_bits *it, const void *unsafe_ptr__ign, u32 nr_bits)
> > +{
> > +       struct bpf_iter_bits_kern *kit = (void *)it;
> > +       u32 size = BITS_TO_BYTES(nr_bits);
> > +       int err;
> > +
> > +       BUILD_BUG_ON(sizeof(struct bpf_iter_bits_kern) != sizeof(struct bpf_iter_bits));
> > +       BUILD_BUG_ON(__alignof__(struct bpf_iter_bits_kern) !=
> > +                    __alignof__(struct bpf_iter_bits));
> > +
> > +       if (!unsafe_ptr__ign || !nr_bits) {
> > +               kit->bits = NULL;
> > +               return -EINVAL;
> > +       }
> > +
> > +       kit->nr_bits = 0;
> > +       /* Optimization for u64/u32 mask */
> > +       if (nr_bits <= 64) {
> > +               err = bpf_probe_read_kernel_common(&kit->bits_copy, size, unsafe_ptr__ign);
>
> it probably would be better to zero-initialize kit->bits_copy?

will do it.

> but also, please check that this works on big-endian right

will check.

>
> > +               if (unlikely(err))
> > +                       return -EFAULT;
> > +
> > +               kit->nr_bits = nr_bits;
> > +               kit->bit = -1;
> > +               return 0;
> > +       }
> > +
> > +       /* Fallback to memalloc */
> > +       kit->bits = bpf_mem_alloc(&bpf_global_ma, size);
> > +       if (!kit->bits)
> > +               return -ENOMEM;
> > +
> > +       err = bpf_probe_read_kernel_common(kit->bits, size, unsafe_ptr__ign);
> > +       if (err) {
> > +               bpf_mem_free(&bpf_global_ma, kit->bits);
> > +               return err;
> > +       }
> > +
> > +       kit->nr_bits = nr_bits;
> > +       kit->bit = -1;
> > +       return 0;
> > +}
> > +
> > +/**
> > + * bpf_iter_bits_next() - Get the next bit in a bpf_iter_bits
> > + * @it: The bpf_iter_bits to be checked
> > + *
> > + * This function returns a pointer to a number representing the value of the
> > + * next bit in the bits.
> > + *
> > + * If there are no further bit available, it returns NULL.
> > + */
> > +__bpf_kfunc int *bpf_iter_bits_next(struct bpf_iter_bits *it)
> > +{
> > +       struct bpf_iter_bits_kern *kit = (void *)it;
> > +       u32 nr_bits = kit->nr_bits;
> > +       const unsigned long *bits;
> > +       int bit;
> > +
> > +       if (nr_bits == 0)
> > +               return NULL;
> > +
> > +       bits = nr_bits <= 64 ? &kit->bits_copy : kit->bits;
> > +       bit = find_next_bit(bits, kit->nr_bits, kit->bit + 1);
> > +       if (bit >= kit->nr_bits)
>
> zero out nr_bits so subsequent next() calls keep returning NULL
> (sticky iterator behavior)

Good suggestion.  will change it.

-- 
Regards
Yafang

^ permalink raw reply	[flat|nested] 6+ messages in thread

* Re: [PATCH v3 bpf-next 2/2] selftests/bpf: Add selftest for bits iter
  2024-03-05 22:05   ` Andrii Nakryiko
@ 2024-03-06  2:24     ` Yafang Shao
  0 siblings, 0 replies; 6+ messages in thread
From: Yafang Shao @ 2024-03-06  2:24 UTC (permalink / raw)
  To: Andrii Nakryiko
  Cc: ast, daniel, john.fastabend, andrii, martin.lau, eddyz87, song,
	yonghong.song, kpsingh, sdf, haoluo, jolsa, bpf

On Wed, Mar 6, 2024 at 6:05 AM Andrii Nakryiko
<andrii.nakryiko@gmail.com> wrote:
>
> On Mon, Mar 4, 2024 at 10:44 PM Yafang Shao <laoar.shao@gmail.com> wrote:
> >
> > Add selftests for the newly added bits iter.
> > - bits_iter_success
> >   - percpu data extracted from the percpu struct should be expected
> >   - RCU lock is not required
> >   - It is fine without calling bpf_iter_cpumask_next()
> >   - It can work as expected when invalid arguments are passed
> >
> > - bits_iter_failure
> >   - bpf_iter_bits_destroy() is required after calling
> >     bpf_iter_bits_new()
> >   - bpf_iter_bits_destroy() can only destroy an initialized iter
> >   - bpf_iter_bits_next() must use an initialized iter
> >
> > This test case can't work correctly on s390x for unknonw reason, thus it
> > is added to DENYLIST.s390x.
>
> That's an unusual way of handling "doesn't work for unknown reason" :)
> This might be an endianness issue I pointed out in patch #1.
>
> pw-bot: cr

Thanks for your reminder.
will check it.


-- 
Regards
Yafang

^ permalink raw reply	[flat|nested] 6+ messages in thread

end of thread, other threads:[~2024-03-06  2:25 UTC | newest]

Thread overview: 6+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-03-05  6:43 [PATCH v3 bpf-next 1/2] bpf: Add bits iterator Yafang Shao
2024-03-05  6:43 ` [PATCH v3 bpf-next 2/2] selftests/bpf: Add selftest for bits iter Yafang Shao
2024-03-05 22:05   ` Andrii Nakryiko
2024-03-06  2:24     ` Yafang Shao
2024-03-05 22:03 ` [PATCH v3 bpf-next 1/2] bpf: Add bits iterator Andrii Nakryiko
2024-03-06  2:23   ` Yafang Shao

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox