From: Joanne Koong <joannekoong@fb.com>
To: <bpf@vger.kernel.org>
Cc: <andrii@kernel.org>, <ast@kernel.org>, <daniel@iogearbox.net>,
<Kernel-team@fb.com>, Joanne Koong <joannekoong@fb.com>
Subject: [PATCH v2 bpf-next 1/4] bpf: Add bpf_loop helper
Date: Tue, 23 Nov 2021 10:34:06 -0800 [thread overview]
Message-ID: <20211123183409.3599979-2-joannekoong@fb.com> (raw)
In-Reply-To: <20211123183409.3599979-1-joannekoong@fb.com>
This patch adds the kernel-side and API changes for a new helper
function, bpf_loop:
long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
u64 flags);
where long (*callback_fn)(u32 index, void *ctx);
bpf_loop invokes the "callback_fn" **nr_loops** times or until the
callback_fn returns 1. The callback_fn can only return 0 or 1, and
this is enforced by the verifier. The callback_fn index is zero-indexed.
A few things to please note:
~ The "u64 flags" parameter is currently unused but is included in
case a future use case for it arises.
~ In the kernel-side implementation of bpf_loop (kernel/bpf/bpf_iter.c),
bpf_callback_t is used as the callback function cast.
~ A program can have nested bpf_loop calls but the program must
still adhere to the verifier constraint of its stack depth (the stack depth
cannot exceed MAX_BPF_STACK))
~ Recursive callback_fns do not pass the verifier, due to the call stack
for these being too deep.
~ The next patch will include the tests and benchmark
Signed-off-by: Joanne Koong <joannekoong@fb.com>
---
include/linux/bpf.h | 1 +
include/uapi/linux/bpf.h | 25 +++++++++
kernel/bpf/bpf_iter.c | 35 ++++++++++++
kernel/bpf/helpers.c | 2 +
kernel/bpf/verifier.c | 97 +++++++++++++++++++++-------------
tools/include/uapi/linux/bpf.h | 25 +++++++++
6 files changed, 148 insertions(+), 37 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cc7a0c36e7df..cad0829710be 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -2164,6 +2164,7 @@ extern const struct bpf_func_proto bpf_sk_setsockopt_proto;
extern const struct bpf_func_proto bpf_sk_getsockopt_proto;
extern const struct bpf_func_proto bpf_kallsyms_lookup_name_proto;
extern const struct bpf_func_proto bpf_find_vma_proto;
+extern const struct bpf_func_proto bpf_loop_proto;
const struct bpf_func_proto *tracing_prog_func_proto(
enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index a69e4b04ffeb..ea37f0ab4231 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -4957,6 +4957,30 @@ union bpf_attr {
* **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
* **-EBUSY** if failed to try lock mmap_lock.
* **-EINVAL** for invalid **flags**.
+ *
+ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * For **nr_loops**, call **callback_fn** function
+ * with **callback_ctx** as the context parameter.
+ * The **callback_fn** should be a static function and
+ * the **callback_ctx** should be a pointer to the stack.
+ * The **flags** is used to control certain aspects of the helper.
+ * Currently, the **flags** must be 0. Currently, nr_loops is
+ * limited to 1 << 23 (~8 million) loops.
+ *
+ * long (\*callback_fn)(u32 index, void \*ctx);
+ *
+ * where **index** is the current index in the loop. The index
+ * is zero-indexed.
+ *
+ * If **callback_fn** returns 0, the helper will continue to the next
+ * loop. If return value is 1, the helper will skip the rest of
+ * the loops and return. Other return values are not used now,
+ * and will be rejected by the verifier.
+ *
+ * Return
+ * The number of loops performed, **-EINVAL** for invalid **flags** or
+ * if **nr_loops** exceeds the maximum number of loops.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5140,6 +5164,7 @@ union bpf_attr {
FN(skc_to_unix_sock), \
FN(kallsyms_lookup_name), \
FN(find_vma), \
+ FN(loop), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index b2ee45064e06..1a29f650e3e0 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -714,3 +714,38 @@ const struct bpf_func_proto bpf_for_each_map_elem_proto = {
.arg3_type = ARG_PTR_TO_STACK_OR_NULL,
.arg4_type = ARG_ANYTHING,
};
+
+/* maximum number of loops */
+#define MAX_LOOPS BIT(23)
+
+BPF_CALL_4(bpf_loop, u32, nr_loops, void *, callback_fn, void *, callback_ctx,
+ u64, flags)
+{
+ bpf_callback_t callback = (bpf_callback_t)callback_fn;
+ u64 ret;
+ u32 i;
+
+ if (flags || nr_loops > MAX_LOOPS)
+ return -EINVAL;
+
+ for (i = 0; i < nr_loops; i++) {
+ ret = callback((u64)i, (u64)(long)callback_ctx, 0, 0, 0);
+ /* return value: 0 - continue, 1 - stop and return */
+ if (ret) {
+ i++;
+ break;
+ }
+ }
+
+ return i;
+}
+
+const struct bpf_func_proto bpf_loop_proto = {
+ .func = bpf_loop,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_ANYTHING,
+ .arg2_type = ARG_PTR_TO_FUNC,
+ .arg3_type = ARG_PTR_TO_STACK_OR_NULL,
+ .arg4_type = ARG_ANYTHING,
+};
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 1ffd469c217f..52188004a9c3 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1378,6 +1378,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
return &bpf_ringbuf_query_proto;
case BPF_FUNC_for_each_map_elem:
return &bpf_for_each_map_elem_proto;
+ case BPF_FUNC_loop:
+ return &bpf_loop_proto;
default:
break;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 0763cca139a7..d674769d411b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -6085,6 +6085,27 @@ static int set_map_elem_callback_state(struct bpf_verifier_env *env,
return 0;
}
+static int set_loop_callback_state(struct bpf_verifier_env *env,
+ struct bpf_func_state *caller,
+ struct bpf_func_state *callee,
+ int insn_idx)
+{
+ /* bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx,
+ * u64 flags);
+ * callback_fn(u32 index, void *callback_ctx);
+ */
+ callee->regs[BPF_REG_1].type = SCALAR_VALUE;
+ callee->regs[BPF_REG_2] = caller->regs[BPF_REG_3];
+
+ /* unused */
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_3]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_4]);
+ __mark_reg_not_init(env, &callee->regs[BPF_REG_5]);
+
+ callee->in_callback_fn = true;
+ return 0;
+}
+
static int set_timer_callback_state(struct bpf_verifier_env *env,
struct bpf_func_state *caller,
struct bpf_func_state *callee,
@@ -6458,13 +6479,7 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
return err;
}
- if (func_id == BPF_FUNC_tail_call) {
- err = check_reference_leak(env);
- if (err) {
- verbose(env, "tail_call would lead to reference leak\n");
- return err;
- }
- } else if (is_release_function(func_id)) {
+ if (is_release_function(func_id)) {
err = release_reference(env, meta.ref_obj_id);
if (err) {
verbose(env, "func %s#%d reference has not been acquired before\n",
@@ -6475,42 +6490,50 @@ static int check_helper_call(struct bpf_verifier_env *env, struct bpf_insn *insn
regs = cur_regs(env);
- /* check that flags argument in get_local_storage(map, flags) is 0,
- * this is required because get_local_storage() can't return an error.
- */
- if (func_id == BPF_FUNC_get_local_storage &&
- !register_is_null(®s[BPF_REG_2])) {
- verbose(env, "get_local_storage() doesn't support non-zero flags\n");
- return -EINVAL;
- }
-
- if (func_id == BPF_FUNC_for_each_map_elem) {
- err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
- set_map_elem_callback_state);
- if (err < 0)
+ switch (func_id) {
+ case BPF_FUNC_tail_call:
+ err = check_reference_leak(env);
+ if (err) {
+ verbose(env, "tail_call would lead to reference leak\n");
+ return err;
+ }
+ break;
+ case BPF_FUNC_get_local_storage:
+ /* check that flags argument in get_local_storage(map, flags) is 0,
+ * this is required because get_local_storage() can't return an error.
+ */
+ if (!register_is_null(®s[BPF_REG_2])) {
+ verbose(env, "get_local_storage() doesn't support non-zero flags\n");
return -EINVAL;
- }
-
- if (func_id == BPF_FUNC_timer_set_callback) {
+ }
+ err = 0;
+ break;
+ case BPF_FUNC_for_each_map_elem:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
- set_timer_callback_state);
- if (err < 0)
- return -EINVAL;
- }
-
- if (func_id == BPF_FUNC_find_vma) {
+ set_map_elem_callback_state) ? -EINVAL : 0;
+ break;
+ case BPF_FUNC_timer_set_callback:
err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
- set_find_vma_callback_state);
- if (err < 0)
- return -EINVAL;
- }
-
- if (func_id == BPF_FUNC_snprintf) {
+ set_timer_callback_state) ? -EINVAL : 0;
+ break;
+ case BPF_FUNC_find_vma:
+ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+ set_find_vma_callback_state) ? -EINVAL : 0;
+ break;
+ case BPF_FUNC_snprintf:
err = check_bpf_snprintf_call(env, regs);
- if (err < 0)
- return err;
+ break;
+ case BPF_FUNC_loop:
+ err = __check_func_call(env, insn, insn_idx_p, meta.subprogno,
+ set_loop_callback_state) ? -EINVAL : 0;
+ break;
+ default:
+ err = 0;
}
+ if (err)
+ return err;
+
/* reset caller saved regs */
for (i = 0; i < CALLER_SAVED_REGS; i++) {
mark_reg_not_init(env, regs, caller_saved[i]);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index a69e4b04ffeb..ea37f0ab4231 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -4957,6 +4957,30 @@ union bpf_attr {
* **-ENOENT** if *task->mm* is NULL, or no vma contains *addr*.
* **-EBUSY** if failed to try lock mmap_lock.
* **-EINVAL** for invalid **flags**.
+ *
+ * long bpf_loop(u32 nr_loops, void *callback_fn, void *callback_ctx, u64 flags)
+ * Description
+ * For **nr_loops**, call **callback_fn** function
+ * with **callback_ctx** as the context parameter.
+ * The **callback_fn** should be a static function and
+ * the **callback_ctx** should be a pointer to the stack.
+ * The **flags** is used to control certain aspects of the helper.
+ * Currently, the **flags** must be 0. Currently, nr_loops is
+ * limited to 1 << 23 (~8 million) loops.
+ *
+ * long (\*callback_fn)(u32 index, void \*ctx);
+ *
+ * where **index** is the current index in the loop. The index
+ * is zero-indexed.
+ *
+ * If **callback_fn** returns 0, the helper will continue to the next
+ * loop. If return value is 1, the helper will skip the rest of
+ * the loops and return. Other return values are not used now,
+ * and will be rejected by the verifier.
+ *
+ * Return
+ * The number of loops performed, **-EINVAL** for invalid **flags** or
+ * if **nr_loops** exceeds the maximum number of loops.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -5140,6 +5164,7 @@ union bpf_attr {
FN(skc_to_unix_sock), \
FN(kallsyms_lookup_name), \
FN(find_vma), \
+ FN(loop), \
/* */
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
--
2.30.2
next prev parent reply other threads:[~2021-11-23 18:34 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2021-11-23 18:34 [PATCH v2 bpf-next 0/4] Add bpf_loop_helper Joanne Koong
2021-11-23 18:34 ` Joanne Koong [this message]
2021-11-23 22:46 ` [PATCH v2 bpf-next 1/4] bpf: Add bpf_loop helper Andrii Nakryiko
2021-11-23 18:34 ` [PATCH v2 bpf-next 2/4] selftests/bpf: Add bpf_loop test Joanne Koong
2021-11-23 18:34 ` [PATCH v2 bpf-next 3/4] selftests/bpf: measure bpf_loop verifier performance Joanne Koong
2021-11-23 18:34 ` [PATCH v2 bpf-next 4/4] selftest/bpf/benchs: add bpf_loop benchmark Joanne Koong
2021-11-23 19:19 ` Toke Høiland-Jørgensen
2021-11-24 0:20 ` Joanne Koong
2021-11-24 12:56 ` Toke Høiland-Jørgensen
2021-11-24 19:26 ` Andrii Nakryiko
2021-11-24 21:59 ` Toke Høiland-Jørgensen
2021-11-25 0:04 ` Joanne Koong
2021-11-25 11:35 ` Toke Høiland-Jørgensen
2021-11-29 19:41 ` Joanne Koong
2021-11-23 18:47 ` [PATCH v2 bpf-next 0/4] Add bpf_loop_helper Joanne Koong
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20211123183409.3599979-2-joannekoong@fb.com \
--to=joannekoong@fb.com \
--cc=Kernel-team@fb.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox