From: Eduard Zingerman <eddyz87@gmail.com>
To: bpf@vger.kernel.org, ast@kernel.org
Cc: andrii@kernel.org, daniel@iogearbox.net, martin.lau@linux.dev,
kernel-team@fb.com, yonghong.song@linux.dev,
Eduard Zingerman <eddyz87@gmail.com>
Subject: [PATCH bpf-next v1 02/11] bpf: compute SCCs in program control flow graph
Date: Sat, 24 May 2025 12:19:23 -0700 [thread overview]
Message-ID: <20250524191932.389444-3-eddyz87@gmail.com> (raw)
In-Reply-To: <20250524191932.389444-1-eddyz87@gmail.com>
Compute strongly connected components in the program CFG.
Assign an SCC number to each instruction, recorded in
env->insn_aux[*].scc. Use Tarjan's algorithm for SCC computation
adapted to run non-recursively.
For debug purposes print out computed SCCs as a part of full program
dump in compute_live_registers() at log level 2, e.g.:
func#0 @0
Live regs before insn:
0: .......... (b4) w6 = 10
2 1: ......6... (18) r1 = 0xffff88810bbb5565
2 3: .1....6... (b4) w2 = 2
2 4: .12...6... (85) call bpf_trace_printk#6
2 5: ......6... (04) w6 += -1
2 6: ......6... (56) if w6 != 0x0 goto pc-6
7: .......... (b4) w6 = 5
1 8: ......6... (18) r1 = 0xffff88810bbb5567
1 10: .1....6... (b4) w2 = 2
1 11: .12...6... (85) call bpf_trace_printk#6
1 12: ......6... (04) w6 += -1
1 13: ......6... (56) if w6 != 0x0 goto pc-6
14: .......... (b4) w0 = 0
15: 0......... (95) exit
^^^
SCC number for the instruction
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
include/linux/bpf_verifier.h | 5 +
kernel/bpf/verifier.c | 182 +++++++++++++++++++++++++++++++++++
2 files changed, 187 insertions(+)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index bba64c834234..b6730a2d6d43 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -604,6 +604,11 @@ struct bpf_insn_aux_data {
* accepts callback function as a parameter.
*/
bool calls_callback;
+ /*
+ * CFG strongly connected component this instruction belongs to,
+ * zero if it is a singleton SCC.
+ */
+ u32 scc;
/* registers alive before this instruction. */
u16 live_regs_before;
};
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index df944cc9fc45..23c7136ab6ae 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -23914,6 +23914,10 @@ static int compute_live_registers(struct bpf_verifier_env *env)
if (env->log.level & BPF_LOG_LEVEL2) {
verbose(env, "Live regs before insn:\n");
for (i = 0; i < insn_cnt; ++i) {
+ if (env->insn_aux_data[i].scc)
+ verbose(env, "%3d ", env->insn_aux_data[i].scc);
+ else
+ verbose(env, " ");
verbose(env, "%3d: ", i);
for (j = BPF_REG_0; j < BPF_REG_10; ++j)
if (insn_aux[i].live_regs_before & BIT(j))
@@ -23935,6 +23939,180 @@ static int compute_live_registers(struct bpf_verifier_env *env)
return err;
}
+/*
+ * Compute strongly connected components (SCCs) on the CFG.
+ * Assign an SCC number to each instruction, recorded in env->insn_aux[*].scc.
+ * If instruction is a sole member of its SCC and there are no self edges,
+ * assign it SCC number of zero.
+ * Uses a non-recursive adaptation of Tarjan's algorithm for SCC computation.
+ */
+static int compute_scc(struct bpf_verifier_env *env)
+{
+ const u32 NOT_ON_STACK = U32_MAX;
+
+ struct bpf_insn_aux_data *aux = env->insn_aux_data;
+ const u32 insn_cnt = env->prog->len;
+ int stack_sz, dfs_sz, err = 0;
+ u32 *stack, *pre, *low, *dfs;
+ u32 succ_cnt, i, j, t, w;
+ u32 next_preorder_num;
+ u32 next_scc_id;
+ bool assign_scc;
+ u32 succ[2];
+
+ next_preorder_num = 1;
+ next_scc_id = 1;
+ /*
+ * - 'stack' accumulates vertices in DFS order, see invariant comment below;
+ * - 'pre[t] == p' => preorder number of vertex 't' is 'p';
+ * - 'low[t] == n' => smallest preorder number of the vertex reachable from 't' is 'n';
+ * - 'dfs' DFS traversal stack, used to emulate explicit recursion.
+ */
+ stack = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ pre = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ low = kvcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
+ dfs = kvcalloc(insn_cnt, sizeof(*dfs), GFP_KERNEL);
+ if (!stack || !pre || !low || !dfs) {
+ err = -ENOMEM;
+ goto exit;
+ }
+ /*
+ * References:
+ * [1] R. Tarjan "Depth-First Search and Linear Graph Algorithms"
+ * [2] D. J. Pearce "A Space-Efficient Algorithm for Finding Strongly Connected Components"
+ *
+ * The algorithm maintains the following invariant:
+ * - suppose there is a path 'u' ~> 'v', such that 'pre[v] < pre[u]';
+ * - then, vertex 'u' remains on stack while vertex 'v' is on stack.
+ *
+ * Consequently:
+ * - If 'low[v] < pre[v]', there is a path from 'v' to some vertex 'u',
+ * such that 'pre[u] == low[v]'; vertex 'u' is currently on the stack,
+ * and thus there is an SCC (loop) containing both 'u' and 'v'.
+ * - If 'low[v] == pre[v]', loops containing 'v' have been explored,
+ * and 'v' can be considered the root of some SCC.
+ *
+ * Here is a pseudo-code for an explicitly recursive version of the algorithm:
+ *
+ * NOT_ON_STACK = insn_cnt + 1
+ * pre = [0] * insn_cnt
+ * low = [0] * insn_cnt
+ * scc = [0] * insn_cnt
+ * stack = []
+ *
+ * next_preorder_num = 1
+ * next_scc_id = 1
+ *
+ * def recur(w):
+ * nonlocal next_preorder_num
+ * nonlocal next_scc_id
+ *
+ * pre[w] = next_preorder_num
+ * low[w] = next_preorder_num
+ * next_preorder_num += 1
+ * stack.append(w)
+ * for s in successors(w):
+ * # Note: for classic algorithm the block below should look as:
+ * #
+ * # if pre[s] == 0:
+ * # recur(s)
+ * # low[w] = min(low[w], low[s])
+ * # elif low[s] != NOT_ON_STACK:
+ * # low[w] = min(low[w], pre[s])
+ * #
+ * # But replacing both 'min' instructions with 'low[w] = min(low[w], low[s])'
+ * # does not break the invariant and makes itartive version of the algorithm
+ * # simpler. See 'Algorithm #3' from [2].
+ *
+ * # 's' not yet visited
+ * if pre[s] == 0:
+ * recur(s)
+ * # if 's' is on stack, pick lowest reachable preorder number from it;
+ * # if 's' is not on stack 'low[s] == NOT_ON_STACK > low[w]',
+ * # so 'min' would be a noop.
+ * low[w] = min(low[w], low[s])
+ *
+ * if low[w] == pre[w]:
+ * # 'w' is the root of an SCC, pop all vertices
+ * # below 'w' on stack and assign same SCC to them.
+ * while True:
+ * t = stack.pop()
+ * low[t] = NOT_ON_STACK
+ * scc[t] = next_scc_id
+ * if t == w:
+ * break
+ * next_scc_id += 1
+ *
+ * for i in range(0, insn_cnt):
+ * if pre[i] == 0:
+ * recur(i)
+ *
+ * Below implementation replaces explicit recusion with array 'dfs'.
+ */
+ for (i = 0; i < insn_cnt; i++) {
+ if (pre[i])
+ continue;
+ stack_sz = 0;
+ dfs_sz = 1;
+ dfs[0] = i;
+dfs_continue:
+ while (dfs_sz) {
+ w = dfs[dfs_sz - 1];
+ if (pre[w] == 0) {
+ low[w] = next_preorder_num;
+ pre[w] = next_preorder_num;
+ next_preorder_num++;
+ stack[stack_sz++] = w;
+ }
+ /* Visit 'w' successors */
+ succ_cnt = insn_successors(env->prog, w, succ);
+ for (j = 0; j < succ_cnt; ++j) {
+ if (pre[succ[j]]) {
+ low[w] = min(low[w], low[succ[j]]);
+ } else {
+ dfs[dfs_sz++] = succ[j];
+ goto dfs_continue;
+ }
+ }
+ /*
+ * Preserve the invariant: if some vertex above in the stack
+ * is reachable from 'w', keep 'w' on the stack.
+ */
+ if (low[w] < pre[w]) {
+ dfs_sz--;
+ goto dfs_continue;
+ }
+ /*
+ * Assign SCC number only if component has two or more elements,
+ * or if component has a self reference.
+ */
+ assign_scc = stack[stack_sz - 1] != w;
+ for (j = 0; j < succ_cnt; ++j) {
+ if (succ[j] == w) {
+ assign_scc = true;
+ break;
+ }
+ }
+ /* Pop component elements from stack */
+ do {
+ t = stack[--stack_sz];
+ low[t] = NOT_ON_STACK;
+ if (assign_scc)
+ aux[t].scc = next_scc_id;
+ } while (t != w);
+ if (assign_scc)
+ next_scc_id++;
+ dfs_sz--;
+ }
+ }
+exit:
+ kvfree(stack);
+ kvfree(pre);
+ kvfree(low);
+ kvfree(dfs);
+ return err;
+}
+
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u32 uattr_size)
{
u64 start_time = ktime_get_ns();
@@ -24056,6 +24234,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, bpfptr_t uattr, __u3
if (ret)
goto skip_full_check;
+ ret = compute_scc(env);
+ if (ret < 0)
+ goto skip_full_check;
+
ret = compute_live_registers(env);
if (ret < 0)
goto skip_full_check;
--
2.48.1
next prev parent reply other threads:[~2025-05-24 19:19 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2025-05-24 19:19 [PATCH bpf-next v1 00/11] bpf: propagate read/precision marks over state graph backedges Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 01/11] Revert "bpf: use common instruction history across all states" Eduard Zingerman
2025-05-24 19:19 ` Eduard Zingerman [this message]
2025-05-24 19:19 ` [PATCH bpf-next v1 03/11] bpf: frame_insn_idx() utility function Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 04/11] bpf: starting_state parameter for __mark_chain_precision() Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 05/11] bpf: set 'changed' status if propagate_precision() did any updates Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 06/11] bpf: set 'changed' status if propagate_liveness() " Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 07/11] bpf: move REG_LIVE_DONE check to clean_live_states() Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 08/11] bpf: propagate read/precision marks over state graph backedges Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 09/11] bpf: remove {update,get}_loop_entry functions Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 10/11] bpf: include backedges in peak_states stat Eduard Zingerman
2025-05-24 19:19 ` [PATCH bpf-next v1 11/11] selftests/bpf: tests with a loop state missing read/precision mark Eduard Zingerman
2025-05-27 21:26 ` [PATCH bpf-next v1 00/11] bpf: propagate read/precision marks over state graph backedges Eduard Zingerman
2025-06-05 23:02 ` Eduard Zingerman
2025-06-06 2:38 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20250524191932.389444-3-eddyz87@gmail.com \
--to=eddyz87@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@fb.com \
--cc=martin.lau@linux.dev \
--cc=yonghong.song@linux.dev \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox