public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed
From: Eduard Zingerman <eddyz87@gmail.com>
To: bpf@vger.kernel.org, ast@kernel.org, andrii@kernel.org
Cc: daniel@iogearbox.net, martin.lau@linux.dev, kernel-team@fb.com,
	yonghong.song@linux.dev, eddyz87@gmail.com
Subject: [PATCH bpf-next 13/14] selftests/bpf: add new tests for static stack liveness analysis
Date: Wed,  8 Apr 2026 18:33:15 -0700	[thread overview]
Message-ID: <20260408-patch-set-v1-13-1a666e860d42@gmail.com> (raw)
In-Reply-To: <20260408-patch-set-v1-0-1a666e860d42@gmail.com>

From: Alexei Starovoitov <ast@kernel.org>

Add a bunch of new tests to verify the static stack
liveness analysis.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 .../selftests/bpf/progs/verifier_live_stack.c      | 2103 ++++++++++++++++++++
 1 file changed, 2103 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
index 0281955139c7543f44c5c1b62c69d5f121988e98..4aee1616964d2fe365fcac644854706c1629b67b 100644
--- a/tools/testing/selftests/bpf/progs/verifier_live_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -13,6 +13,15 @@ struct {
 	__type(value, long long);
 } map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} array_map_8b SEC(".maps");
+
+const char snprintf_u64_fmt[] = "%llu";
+
 SEC("socket")
 __log_level(2)
 __msg("0: (79) r1 = *(u64 *)(r10 -8)        ; use: fp0-8")
@@ -154,6 +163,33 @@ static __used __naked void read_first_param(void)
 	::: __clobber_all);
 }
 
+SEC("socket")
+__success
+__naked void arg_track_join_convergence(void)
+{
+	asm volatile (
+	"r1 = 1;"
+	"r2 = 2;"
+	"call arg_track_join_convergence_subprog;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void arg_track_join_convergence_subprog(void)
+{
+	asm volatile (
+	"if r1 == 0 goto 1f;"
+	"r0 = r1;"
+	"goto 2f;"
+"1:"
+	"r0 = r2;"
+"2:"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
 SEC("socket")
 __flag(BPF_F_TEST_STATE_FREQ)
 __log_level(2)
@@ -324,3 +360,2070 @@ static __used __naked unsigned long write_tail_call(void)
           __imm_addr(map_array)
         : __clobber_all);
 }
+
+/* Test precise subprog stack access analysis.
+ * Caller passes fp-32 (SPI 3) to callee that only accesses arg+0 and arg+8
+ * (SPIs 3 and 2). Slots 0 and 1 should NOT be live at the call site.
+ *
+ * Insn layout:
+ *   0: *(u64*)(r10 - 8) = 0      write SPI 0
+ *   1: *(u64*)(r10 - 16) = 0     write SPI 1
+ *   2: *(u64*)(r10 - 24) = 0     write SPI 2
+ *   3: *(u64*)(r10 - 32) = 0     write SPI 3
+ *   4: r1 = r10
+ *   5: r1 += -32
+ *   6: call precise_read_two      passes fp-32 (SPI 3)
+ *   7: r0 = 0
+ *   8: exit
+ *
+ * At insn 6 only SPIs 2,3 should be live (slots 4-7, 0xf0).
+ * SPIs 0,1 are written but never read → dead.
+ */
+SEC("socket")
+__log_level(2)
+__msg("6: (85) call pc+{{.*}}                   ; use: fp0-24 fp0-32{{$}}")
+__naked void subprog_precise_stack_access(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u64 *)(r10 - 16) = 0;"
+	"*(u64 *)(r10 - 24) = 0;"
+	"*(u64 *)(r10 - 32) = 0;"
+	"r1 = r10;"
+	"r1 += -32;"
+	"call precise_read_two;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Callee reads only at arg+0 (SPI 3) and arg+8 (SPI 2) */
+static __used __naked void precise_read_two(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r2 = *(u64 *)(r1 + 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that multi-level subprog calls (callee passes arg-derived ptr
+ * to another BPF subprog) are analyzed precisely.
+ *
+ * Caller passes fp-32 (SPI 3). The callee forwards it to inner_callee.
+ * inner_callee only reads at offset 0 from the pointer.
+ * The analysis recurses into forward_to_inner -> inner_callee and
+ * determines only SPI 3 is accessed (slots 6-7, 0xc0), not all of SPIs 0-3.
+ *
+ * Insn layout:
+ *   0: *(u64*)(r10 - 8) = 0      write SPI 0
+ *   1: *(u64*)(r10 - 16) = 0     write SPI 1
+ *   2: *(u64*)(r10 - 24) = 0     write SPI 2
+ *   3: *(u64*)(r10 - 32) = 0     write SPI 3
+ *   4: r1 = r10
+ *   5: r1 += -32
+ *   6: call forward_to_inner      passes fp-32 (SPI 3)
+ *   7: r0 = 0
+ *   8: exit
+ */
+SEC("socket")
+__log_level(2)
+__msg("6: (85) call pc+{{.*}}                   ; use: fp0-32{{$}}")
+__naked void subprog_multilevel_conservative(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u64 *)(r10 - 16) = 0;"
+	"*(u64 *)(r10 - 24) = 0;"
+	"*(u64 *)(r10 - 32) = 0;"
+	"r1 = r10;"
+	"r1 += -32;"
+	"call forward_to_inner;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Forwards arg to another subprog */
+static __used __naked void forward_to_inner(void)
+{
+	asm volatile (
+	"call inner_callee;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void inner_callee(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test multi-frame precision loss: callee consumes caller stack early,
+ * but static liveness keeps it live at pruning points inside callee.
+ *
+ * Caller stores map_ptr or scalar(42) at fp-8, then calls
+ * consume_and_call_inner. The callee reads fp0-8 at entry (consuming
+ * the slot), then calls do_nothing2. After do_nothing2 returns (a
+ * pruning point), fp-8 should be dead -- the read already happened.
+ * But because the call instruction's stack_use includes SPI 0, the
+ * static live_stack_before at insn 7 is 0x1, keeping fp-8 live inside
+ * the callee and preventing state pruning between the two paths.
+ *
+ * Insn layout:
+ *   0: call bpf_get_prandom_u32
+ *   1: if r0 == 42 goto pc+2    -> insn 4
+ *   2: r0 = map ll (ldimm64 part1)
+ *   3: (ldimm64 part2)
+ *   4: *(u64)(r10 - 8) = r0     fp-8 = map_ptr OR scalar(42)
+ *   5: r1 = r10
+ *   6: r1 += -8
+ *   7: call consume_and_call_inner
+ *   8: r0 = 0
+ *   9: exit
+ *
+ * At insn 7, live_stack_before = 0x3 (slots 0-1 live due to stack_use).
+ * At insn 8, live_stack_before = 0x0 (SPI 0 dead, caller doesn't need it).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+__success
+__msg(" 7: (85) call pc+{{.*}}                   ; use: fp0-8")
+__msg(" 8: {{.*}} (b7)")
+__naked void callee_consumed_caller_stack(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 42 goto 1f;"
+	"r0 = %[map] ll;"
+"1:"
+	"*(u64 *)(r10 - 8) = r0;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call consume_and_call_inner;"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+static __used __naked void consume_and_call_inner(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"	/* read fp[0]-8 into caller-saved r0 */
+	"call do_nothing2;"		/* inner call clobbers r0 */
+	"r0 = 0;"
+	"goto +0;"			/* checkpoint */
+	"r0 = 0;"
+	"goto +0;"			/* checkpoint */
+	"r0 = 0;"
+	"goto +0;"			/* checkpoint */
+	"r0 = 0;"
+	"goto +0;"			/* checkpoint */
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void do_nothing2(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning when clean_verifier_state() promotes
+ * live STACK_ZERO bytes to STACK_MISC.
+ *
+ * Program shape:
+ * - Build key at fp-4:
+ *   - path A keeps key byte as STACK_ZERO;
+ *   - path B writes unknown byte making it STACK_MISC.
+ * - Branches merge at a prune point before map_lookup.
+ * - map_lookup on ARRAY map is value-sensitive to constant zero key:
+ *   - path A: const key 0 => PTR_TO_MAP_VALUE (non-NULL);
+ *   - path B: non-const key => PTR_TO_MAP_VALUE_OR_NULL.
+ * - Dereference lookup result without null check.
+ *
+ * Note this behavior won't trigger at fp-8, since the verifier will
+ * track 32-bit scalar spill differently as spilled_ptr.
+ *
+ * Correct verifier behavior: reject (path B unsafe).
+ * With blanket STACK_ZERO->STACK_MISC promotion on live slots, cached path A
+ * state can be generalized and incorrectly prune path B, making program load.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void stack_zero_to_misc_unsound_array_lookup(void)
+{
+	asm volatile (
+	/* key at fp-4: all bytes STACK_ZERO */
+	"*(u32 *)(r10 - 4) = 0;"
+	"call %[bpf_get_prandom_u32];"
+	/* fall-through (path A) explored first */
+	"if r0 != 0 goto l_nonconst%=;"
+	/* path A: keep key constant zero */
+	"goto l_lookup%=;"
+"l_nonconst%=:"
+	/* path B: key byte turns to STACK_MISC, key no longer const */
+	"*(u8 *)(r10 - 4) = r0;"
+"l_lookup%=:"
+	/* value-sensitive lookup */
+	"r2 = r10;"
+	"r2 += -4;"
+	"r1 = %[array_map_8b] ll;"
+	"call %[bpf_map_lookup_elem];"
+	/* unsafe when lookup result is map_value_or_null */
+	"r0 = *(u64 *)(r0 + 0);"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_map_lookup_elem),
+	  __imm_addr(array_map_8b)
+	: __clobber_all);
+}
+
+/*
+ * Subprog variant of stack_zero_to_misc_unsound_array_lookup.
+ *
+ * Check unsound pruning when a callee modifies the caller's
+ * stack through a pointer argument.
+ *
+ * Program shape:
+ *   main:
+ *     *(u32)(fp - 4) = 0            key = 0 (all bytes STACK_ZERO)
+ *     r1 = fp - 4
+ *     call maybe_clobber_key        may overwrite key[0] with scalar
+ *     <-- prune point: two states meet here -->
+ *     r2 = fp - 4
+ *     r1 = array_map_8b
+ *     call bpf_map_lookup_elem      value-sensitive on const-zero key
+ *     r0 = *(u64)(r0 + 0)           deref without null check
+ *     exit
+ *
+ *   maybe_clobber_key(r1):
+ *     r6 = r1                       save &key
+ *     call bpf_get_prandom_u32
+ *     if r0 == 0 goto skip          path A: key stays STACK_ZERO
+ *     *(u8)(r6 + 0) = r0            path B: key[0] becomes STACK_MISC
+ *   skip:
+ *     r0 = 0
+ *     exit
+ *
+ * Path A: const-zero key => array lookup => PTR_TO_MAP_VALUE => deref OK.
+ * Path B: non-const key  => array lookup => PTR_TO_MAP_VALUE_OR_NULL => UNSAFE.
+ *
+ * If the cleaner collapses STACK_ZERO -> STACK_MISC for the live key
+ * slot, path A's cached state matches path B, pruning the unsafe path.
+ *
+ * Correct verifier behaviour: reject.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void subprog_stack_zero_to_misc_unsound(void)
+{
+	asm volatile (
+	/* key at fp-4: all bytes STACK_ZERO */
+	"*(u32 *)(r10 - 4) = 0;"
+	/* subprog may clobber key[0] with a scalar byte */
+	"r1 = r10;"
+	"r1 += -4;"
+	"call maybe_clobber_key;"
+	/* value-sensitive array lookup */
+	"r2 = r10;"
+	"r2 += -4;"
+	"r1 = %[array_map_8b] ll;"
+	"call %[bpf_map_lookup_elem];"
+	/* unsafe when result is map_value_or_null (path B) */
+	"r0 = *(u64 *)(r0 + 0);"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(array_map_8b)
+	: __clobber_all);
+}
+
+static __used __naked void maybe_clobber_key(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"call %[bpf_get_prandom_u32];"
+	/* path A (r0==0): key stays STACK_ZERO, explored first */
+	"if r0 == 0 goto 1f;"
+	/* path B (r0!=0): overwrite key[0] with scalar */
+	"*(u8 *)(r6 + 0) = r0;"
+	"1:"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * Demonstrate that subprog arg spill/reload breaks arg tracking,
+ * inflating caller stack liveness and preventing state pruning.
+ *
+ * modifier2(fp-24) has two paths: one writes a scalar to *(r1+8)
+ * = caller fp-16, the other leaves it as zero.  After modifier2
+ * returns, fp-16 is never read again — it is dead.
+ *
+ * spill_reload_reader2(fp-24) only reads caller fp-8 via
+ * *(r1+16), but it spills r1 across a helper call.  This
+ * breaks compute_subprog_arg_access(): the reload from callee
+ * stack cannot be connected back to arg1, so arg1 access goes
+ * "all (conservative)".  At the call site (r1 = fp-24, slot 5)
+ * apply_callee_stack_access() marks slots 0..5 as stack_use —
+ * pulling fp-16 (slots 2-3) into live_stack_before even though
+ * the reader never touches it.
+ *
+ * Result: at modifier2's return point two states with different
+ * fp-16 values cannot be pruned.
+ *
+ * With correct (or old dynamic) liveness fp-16 is dead at that
+ * point and the states prune → "6: safe" appears in the log.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+__success
+__msg("6: safe")
+__naked void spill_reload_inflates_stack_liveness(void)
+{
+	asm volatile (
+	/* struct at fp-24: { ctx; ptr; tail; } */
+	"*(u64 *)(r10 - 24) = r1;"		/* fp-24 = ctx */
+	"*(u64 *)(r10 - 16) = r1;"		/* fp-16 = ctx (STACK_SPILL ptr) */
+	"*(u64 *)(r10 - 8) = 0;"		/* fp-8  = tail */
+	/* modifier2 writes different values to fp-16 on two paths */
+	"r1 = r10;"
+	"r1 += -24;"
+	"call modifier2;"
+	/* insn 6: prune point — two states with different fp-16
+	 * path A: fp-16 = STACK_MISC  (scalar overwrote pointer)
+	 * path B: fp-16 = STACK_SPILL (original ctx pointer)
+	 * STACK_MISC does NOT subsume STACK_SPILL(ptr),
+	 * so pruning fails unless fp-16 is cleaned (dead).
+	 */
+	"r1 = r10;"
+	"r1 += -24;"
+	"call spill_reload_reader2;"		/* reads fp-8 via *(r1+16) */
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Two paths: one writes a scalar to *(r1+8) = caller fp-16,
+ * the other leaves it unchanged.  Both return 0 via separate
+ * exits to prevent pruning inside the subprog at the merge.
+ */
+static __used __naked void modifier2(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"*(u64 *)(r6 + 8) = r0;"		/* fp-16 = random */
+	"r0 = 0;"
+	"exit;"					/* path A exit */
+	"1:"
+	"r0 = 0;"
+	"exit;"					/* path B exit */
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* Receives r1 = caller fp-24.  Only reads *(r1+16) = fp-8.
+ * Spills r1 across a helper call → arg tracking goes conservative →
+ * slots 0..5 all appear used instead of just slot 1 (fp-8).
+ */
+static __used __naked void spill_reload_reader2(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = r1;"		/* spill arg1 */
+	"call %[bpf_get_prandom_u32];"		/* clobbers r1-r5 */
+	"r1 = *(u64 *)(r10 - 8);"		/* reload arg1 */
+	"r0 = *(u64 *)(r1 + 16);"		/* read caller fp-8 */
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __kfunc_btf_root(void)
+{
+	bpf_iter_num_new(0, 0, 0);
+	bpf_iter_num_next(0);
+	bpf_iter_num_destroy(0);
+}
+
+/* Test that open-coded iterator kfunc arguments get precise stack
+ * liveness tracking. struct bpf_iter_num is 8 bytes (1 SPI).
+ *
+ * Insn layout:
+ *   0: *(u64*)(r10 - 8) = 0      write SPI 0 (dead)
+ *   1: *(u64*)(r10 - 16) = 0     write SPI 1 (dead)
+ *   2: r1 = r10
+ *   3: r1 += -24                 iter state at fp-24 (SPI 2)
+ *   4: r2 = 0
+ *   5: r3 = 10
+ *   6: call bpf_iter_num_new     defines SPI 2 (KF_ITER_NEW) → 0x0
+ *   7-8: r1 = fp-24
+ *   9: call bpf_iter_num_next    uses SPI 2 → 0x30
+ *  10: if r0 == 0 goto 2f
+ *  11: goto 1b
+ *  12-13: r1 = fp-24
+ *  14: call bpf_iter_num_destroy uses SPI 2 → 0x30
+ *  15: r0 = 0
+ *  16: exit
+ *
+ * At insn 6, SPI 2 is defined (KF_ITER_NEW initializes, doesn't read),
+ * so it kills liveness from successors. live_stack_before = 0x0.
+ * At insns 9 and 14, SPI 2 is used (iter_next/destroy read the state),
+ * so live_stack_before = 0x30.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg(" 6: (85) call bpf_iter_num_new{{.*}}          ; def: fp0-24{{$}}")
+__msg(" 9: (85) call bpf_iter_num_next{{.*}}         ; use: fp0-24{{$}}")
+__msg("14: (85) call bpf_iter_num_destroy{{.*}}      ; use: fp0-24{{$}}")
+__naked void kfunc_iter_stack_liveness(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"	/* SPI 0 - dead */
+	"*(u64 *)(r10 - 16) = 0;"	/* SPI 1 - dead */
+	"r1 = r10;"
+	"r1 += -24;"
+	"r2 = 0;"
+	"r3 = 10;"
+	"call %[bpf_iter_num_new];"
+"1:"
+	"r1 = r10;"
+	"r1 += -24;"
+	"call %[bpf_iter_num_next];"
+	"if r0 == 0 goto 2f;"
+	"goto 1b;"
+"2:"
+	"r1 = r10;"
+	"r1 += -24;"
+	"call %[bpf_iter_num_destroy];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_iter_num_new),
+	   __imm(bpf_iter_num_next),
+	   __imm(bpf_iter_num_destroy)
+	: __clobber_all);
+}
+
+/*
+ * Test for soundness bug in static stack liveness analysis.
+ *
+ * The static pre-pass tracks FP-derived register offsets to determine
+ * which stack slots are accessed. When a PTR_TO_STACK is spilled to
+ * the stack and later reloaded, the reload (BPF_LDX) kills FP-derived
+ * tracking, making subsequent accesses through the reloaded pointer
+ * invisible to the static analysis.
+ *
+ * This causes the analysis to incorrectly mark SPI 0 as dead at the
+ * merge point. clean_verifier_state() zeros it in the cached state,
+ * and stacksafe() accepts the new state against STACK_INVALID,
+ * enabling incorrect pruning.
+ *
+ * Path A (verified first): stores PTR_TO_MAP_VALUE in SPI 0
+ * Path B (verified second): stores scalar 42 in SPI 0
+ * After merge: reads SPI 0 through spilled/reloaded PTR_TO_STACK
+ * and dereferences the result as a pointer.
+ *
+ * Correct behavior: reject (path B dereferences a scalar)
+ * Bug behavior: accept (path B is incorrectly pruned)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void spill_ptr_liveness_type_confusion(void)
+{
+	asm volatile (
+	/* Map lookup to get PTR_TO_MAP_VALUE */
+	"r1 = %[map] ll;"
+	"*(u32 *)(r10 - 32) = 0;"
+	"r2 = r10;"
+	"r2 += -32;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l_exit%=;"
+	/* r6 = PTR_TO_MAP_VALUE (callee-saved) */
+	"r6 = r0;"
+	/* Branch: fall-through (path A) verified first */
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_scalar%=;"
+	/* Path A: store map value ptr at SPI 0 */
+	"*(u64 *)(r10 - 8) = r6;"
+	"goto l_merge%=;"
+"l_scalar%=:"
+	/* Path B: store scalar at SPI 0 */
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+	/*
+	 * Spill PTR_TO_STACK{off=-8} to SPI 1, then reload.
+	 * Reload kills FP-derived tracking, hiding the
+	 * subsequent SPI 0 access from the static analysis.
+	 */
+	"r1 = r10;"
+	"r1 += -8;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"goto +0;"			/* checkpoint */
+	"goto +0;"			/* checkpoint */
+	"goto +0;"			/* checkpoint */
+	"r1 = *(u64 *)(r10 - 16);"
+	/* Read SPI 0 through reloaded pointer */
+	"r0 = *(u64 *)(r1 + 0);"
+	/* Dereference: safe for map value (path A),
+	 * unsafe for scalar (path B).
+	 */
+	"r0 = *(u64 *)(r0 + 0);"
+	"exit;"
+"l_exit%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+/* === Tests for 4-byte stack slot liveness granularity === */
+
+/* Test that a 4-byte aligned write is stack_def and kills liveness.
+ *
+ *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1 (full SPI 0)
+ *   1: *(u32 *)(r10 - 8) = 0      def slot 1 (4-byte write kills slot 1)
+ *   2: r0 = *(u64 *)(r10 - 8)     use slots 0,1
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 1, the 4-byte write defines slot 1. Slot 0 still flows
+ * backward from insn 2's read: live_stack_before = 0x1.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
+__naked void four_byte_write_kills_slot(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u32 *)(r10 - 8) = 0;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that a write to the upper half of an SPI is dead when only
+ * the lower half is read. This was impossible at SPI granularity
+ * where any read of the SPI kept the entire SPI live.
+ *
+ *   0: *(u32 *)(r10 - 8) = 0      def slot 1 (DEAD: never read)
+ *   1: *(u32 *)(r10 - 4) = 0      def slot 0
+ *   2: r0 = *(u32 *)(r10 - 4)     use slot 0 only
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 0, nothing is live (0x0). Previously at SPI granularity,
+ * the read at insn 2 would mark the full SPI 0 as live and the
+ * 4-byte writes wouldn't count as def, so insn 0 would have had
+ * SPI 0 live (0x1).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
+__msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
+__naked void dead_half_spi_write(void)
+{
+	asm volatile (
+	"*(u32 *)(r10 - 8) = 0;"
+	"*(u32 *)(r10 - 4) = 0;"
+	"r0 = *(u32 *)(r10 - 4);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that a 4-byte read from the upper half of SPI 0 makes only
+ * slot 1 live (0x2), not the full SPI (0x3).
+ *
+ *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
+ *   1: r0 = *(u32 *)(r10 - 8)     use slot 1 only (upper half)
+ *   2: r0 = 0
+ *   3: exit
+ *
+ * At insn 1, live_stack_before = 0x2 (slot 1 only).
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (61) r0 = *(u32 *)(r10 -8)        ; use: fp0-8h")
+__naked void four_byte_read_upper_half(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"r0 = *(u32 *)(r10 - 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that a 2-byte write does NOT count as stack_def.
+ * Sub-4-byte writes don't fully cover a 4-byte slot,
+ * so liveness passes through.
+ *
+ *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
+ *   1: *(u16 *)(r10 - 4) = 0      NOT stack_def (2 < 4 bytes)
+ *   2: r0 = *(u32 *)(r10 - 4)     use slot 0
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 1, slot 0 still live (0x1) because 2-byte write
+ * didn't kill it.
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
+__msg("1: (6a) *(u16 *)(r10 -4) = 0{{$}}")
+__msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
+__naked void two_byte_write_no_kill(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u16 *)(r10 - 4) = 0;"
+	"r0 = *(u32 *)(r10 - 4);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that a 1-byte write does NOT count as stack_def.
+ *
+ *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
+ *   1: *(u8 *)(r10 - 4) = 0       NOT stack_def (1 < 4 bytes)
+ *   2: r0 = *(u32 *)(r10 - 4)     use slot 0
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 1, slot 0 still live (0x1).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
+__msg("1: (72) *(u8 *)(r10 -4) = 0")
+__msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
+__naked void one_byte_write_no_kill(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u8 *)(r10 - 4) = 0;"
+	"r0 = *(u32 *)(r10 - 4);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test stack access beyond fp-256 exercising the second bitmask word.
+ * fp-264 is SPI 32, slots 64-65, which are bits 0-1 of live_stack[1].
+ *
+ *   0: *(u64 *)(r10 - 264) = 0     def slots 64,65
+ *   1: r0 = *(u64 *)(r10 - 264)    use slots 64,65
+ *   2: r0 = 0
+ *   3: exit
+ *
+ * At insn 1, live_stack high word has bits 0,1 set: 0x3:0x0.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (79) r0 = *(u64 *)(r10 -264)      ; use: fp0-264")
+__naked void high_stack_second_bitmask_word(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 264) = 0;"
+	"r0 = *(u64 *)(r10 - 264);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that two separate 4-byte writes to each half of an SPI
+ * together kill liveness for the full SPI.
+ *
+ *   0: *(u32 *)(r10 - 8) = 0      def slot 1 (upper half)
+ *   1: *(u32 *)(r10 - 4) = 0      def slot 0 (lower half)
+ *   2: r0 = *(u64 *)(r10 - 8)     use slots 0,1
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 0: live_stack_before = 0x0 (both slots killed by insns 0,1).
+ * At insn 1: live_stack_before = 0x2 (slot 1 still live, slot 0 killed here).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
+__msg("1: (62) *(u32 *)(r10 -4) = 0         ; def: fp0-4h")
+__naked void two_four_byte_writes_kill_full_spi(void)
+{
+	asm volatile (
+	"*(u32 *)(r10 - 8) = 0;"
+	"*(u32 *)(r10 - 4) = 0;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that 4-byte writes on both branches kill a slot at the
+ * join point. Previously at SPI granularity, a 4-byte write was
+ * not stack_def, so liveness would flow backward through the
+ * branch that only had a 4-byte write.
+ *
+ *   0: call bpf_get_prandom_u32
+ *   1: if r0 != 0 goto 1f
+ *   2: *(u64 *)(r10 - 8) = 0       path A: def slots 0,1
+ *   3: goto 2f
+ * 1:4: *(u32 *)(r10 - 4) = 0       path B: def slot 0
+ * 2:5: r0 = *(u32 *)(r10 - 4)      use slot 0
+ *   6: r0 = 0
+ *   7: exit
+ *
+ * Both paths define slot 0 before the read. At insn 1 (branch),
+ * live_stack_before = 0x0 because slot 0 is killed on both paths.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (55) if r0 != 0x0 goto pc+2")
+__msg("2: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
+__msg("3: (05) goto pc+1")
+__msg("4: (62) *(u32 *)(r10 -4) = 0         ; def: fp0-4h")
+__msg("5: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
+__naked void both_branches_kill_slot(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto 1f;"
+	"*(u64 *)(r10 - 8) = 0;"
+	"goto 2f;"
+"1:"
+	"*(u32 *)(r10 - 4) = 0;"
+"2:"
+	"r0 = *(u32 *)(r10 - 4);"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* Soundness: cleaning the dead upper half of an SPI must not
+ * affect the live lower half's type information for pruning.
+ *
+ * Both halves of SPI 0 are written separately. Only the lower
+ * half (slot 0) is used as a 4-byte map key. The upper half
+ * (slot 1) is dead and cleaned to STACK_INVALID.
+ *
+ * Path A: key stays 0 (STACK_ZERO) → non-null array lookup
+ * Path B: key byte turns STACK_MISC → may-null array lookup
+ * Deref without null check: safe for A, unsafe for B.
+ *
+ * If half-SPI cleaning incorrectly corrupted the live half's
+ * type info, path A's cached state could generalize and unsoundly
+ * prune path B.
+ *
+ * Expected: reject (path B unsafe).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void half_spi_clean_preserves_stack_zero(void)
+{
+	asm volatile (
+	"*(u32 *)(r10 - 4) = 0;"           /* slot 0: STACK_ZERO */
+	"*(u32 *)(r10 - 8) = 0;"           /* slot 1: STACK_ZERO (dead) */
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_nonconst%=;"
+	"goto l_lookup%=;"
+"l_nonconst%=:"
+	"*(u8 *)(r10 - 4) = r0;"           /* slot 0: STACK_MISC */
+"l_lookup%=:"
+	"r2 = r10;"
+	"r2 += -4;"
+	"r1 = %[array_map_8b] ll;"
+	"call %[bpf_map_lookup_elem];"
+	"r0 = *(u64 *)(r0 + 0);"           /* unsafe if null */
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_map_lookup_elem),
+	  __imm_addr(array_map_8b)
+	: __clobber_all);
+}
+
+/*
+ * Model of scx_lavd's pick_idle_cpu_at_cpdom iat block:
+ * conditional block with helper call and temporary stack spill,
+ * spill dead after merge.
+ *
+ * Path A (fall-through): spill r6 to fp-8 across helper call
+ * Path B (branch taken): skip the block entirely
+ * At merge (insn 6): fp-8 is dead (never read after merge)
+ *
+ * Static liveness marks fp-8 dead at merge. clean_verifier_state()
+ * converts path A's STACK_SPILL to STACK_INVALID. Path B has
+ * STACK_INVALID. stacksafe() matches -> path B pruned -> "6: safe".
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__success
+__log_level(2)
+__msg("6: safe")
+__naked void dead_spill_at_merge_enables_pruning(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r6 = 7;"
+	"if r0 != 0 goto l_skip%=;"
+	/* conditional block: spill, call, reload */
+	"*(u64 *)(r10 - 8) = r6;"
+	"call %[bpf_get_prandom_u32];"
+	"r6 = *(u64 *)(r10 - 8);"
+"l_skip%=:"
+	/* fp-8 dead. Path B pruned here -> "6: safe" */
+	"r0 = r6;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * FP-offset tracking loses precision on second ADD, killing all liveness.
+ *
+ * fp_off_insn_xfer() handles "FP itself + negative imm" precisely
+ * (e.g. r6 = r10; r6 += -24 -> slot 5).  But any subsequent ADD/SUB
+ * on a register that already has non-zero spis falls through to
+ * spis_set_all(), because the code only handles the FP-itself case.
+ *
+ * A write through this imprecise register enters the non-zero-spis
+ * branch of set_indirect_stack_access(), which OR's the all-ones
+ * mask into stack_def.  The backward liveness equation
+ *
+ *   stack_in = (stack_out & ~stack_def) | stack_use
+ *
+ * sees ~ALL = 0, killing ALL slot liveness at that instruction.
+ *
+ * At the merge pruning point, live_stack_before is empty.
+ * clean_verifier_state() marks fp-8 as STACK_INVALID.
+ * stacksafe() skips STACK_INVALID (line "continue"), so pruning
+ * succeeds regardless of the current state's fp-8 value.
+ * Path B is pruned, its null deref is never explored.
+ *
+ * Correct behavior: reject (path B dereferences NULL).
+ * Bug behavior: accept (path B pruned away).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R1 invalid mem access 'scalar'")
+__naked void fp_add_loses_precision_kills_liveness(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_pathB%=;"
+
+	/* Path A (fall-through, explored first): fp-8 = 0 */
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"goto l_merge%=;"
+
+"l_pathB%=:"
+	/* Path B (explored second): fp-8 = 42 */
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+
+"l_merge%=:"
+	/*
+	 * Create imprecise FP-derived register.
+	 * r6 = r10 - 24 gets precise slot 5.
+	 * r6 += 8 hits the else branch (spis non-zero, delta > 0)
+	 * and sets spis to ALL.  r6 is actually r10-16.
+	 */
+	"r6 = r10;"
+	"r6 += -24;"
+	"r6 += 8;"
+
+	/*
+	 * Write through imprecise r6.  Actually writes to fp-16
+	 * (does NOT touch fp-8), but liveness marks ALL slots
+	 * as stack_def, killing fp-8's liveness.
+	 */
+	"r7 = 0;"
+	"*(u64 *)(r6 + 0) = r7;"
+
+	/* Read fp-8: liveness says dead, but value is needed. */
+	"r2 = *(u64 *)(r10 - 8);"
+	"if r2 == 42 goto l_danger%=;"
+
+	/* r2 != 42 (path A: r2 == 0): safe exit */
+	"r0 = 0;"
+	"exit;"
+
+"l_danger%=:"
+	/* Only reachable from path B (r2 == 42): null deref */
+	"r1 = 0;"
+	"r0 = *(u64 *)(r1 + 0);"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R1 invalid mem access 'scalar'")
+__naked void fp_spill_loses_precision_kills_liveness(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_pathB%=;"
+
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"goto l_merge%=;"
+
+"l_pathB%=:"
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+
+"l_merge%=:"
+	"r6 = r10;"
+	"r6 += -64;"
+	"*(u64 *)(r10 - 160) = r6;"
+	"r6 = *(u64 *)(r10 - 160);"
+
+	"r7 = 0;"
+	"*(u64 *)(r6 + 0) = r7;"
+
+	"r2 = *(u64 *)(r10 - 8);"
+	"if r2 == 42 goto l_danger%=;"
+
+	"r0 = *(u64 *)(r10 - 56);"
+	"exit;"
+
+"l_danger%=:"
+	"r1 = 0;"
+	"r0 = *(u64 *)(r1 + 0);"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* === Tests for frame-based AT_FP tracking === */
+
+/*
+ * Test 1: conditional_stx_in_subprog
+ * Subprog conditionally writes caller's slot.
+ * Verify slot stays live (backward pass handles conditional def via CFG).
+ *
+ * Main writes fp-8=42, calls cond_writer(fp-8), reads fp-8.
+ * cond_writer only writes on one path → parent_def only on that path.
+ * The backward parent_live correctly keeps fp-8 live at entry
+ * (conditional write doesn't kill liveness at the join).
+ */
+SEC("socket")
+__log_level(2)
+/* fp-8 live at call (callee conditionally writes → slot not killed) */
+__msg("1: (7b) *(u64 *)(r10 -8) = r1        ; def: fp0-8")
+__msg("4: (85) call pc+2{{$}}")
+__msg("5: (79) r0 = *(u64 *)(r10 -8)        ; use: fp0-8")
+__naked void conditional_stx_in_subprog(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call cond_writer;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Conditionally writes to *(r1+0) */
+static __used __naked void cond_writer(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"*(u64 *)(r6 + 0) = r0;"
+	"1:"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("4: (85) call pc+{{.*}}                   ; use: fp0-16")
+__msg("7: (85) call pc+{{.*}}                   ; use: fp0-32")
+__naked void multiple_callsites_different_offsets(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 16) = 0;"
+	"*(u64 *)(r10 - 32) = 0;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"call read_first_param;"
+	"r1 = r10;"
+	"r1 += -32;"
+	"call read_first_param;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Test 3: nested_fp_passthrough
+ * main→A→B, main's FP forwarded to B. B accesses main's stack.
+ * Verify liveness propagates through.
+ *
+ * Main passes fp-32 to outer_forwarder, which passes it to inner_reader.
+ * inner_reader reads at arg+0 (= main's fp-32).
+ * parent_live propagates transitively: inner→outer→main.
+ */
+SEC("socket")
+__log_level(2)
+/* At call to outer_forwarder: main's fp-32 (slots 6,7) should be live */
+__msg("6: (85) call pc+{{.*}}                   ; use: fp0-32")
+__naked void nested_fp_passthrough(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u64 *)(r10 - 16) = 0;"
+	"*(u64 *)(r10 - 24) = 0;"
+	"*(u64 *)(r10 - 32) = 0;"
+	"r1 = r10;"
+	"r1 += -32;"
+	"call outer_forwarder;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Forwards arg to inner_reader */
+static __used __naked void outer_forwarder(void)
+{
+	asm volatile (
+	"call inner_reader;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void inner_reader(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Test 4: callee_must_write_before_read
+ * Callee unconditionally writes parent slot before reading.
+ * Verify slot is NOT live at call site (parent_def kills it).
+ */
+SEC("socket")
+__log_level(2)
+/* fp-8 NOT live at call: callee writes before reading (parent_def kills it) */
+__msg("2: .12345.... (85) call pc+")
+__naked void callee_must_write_before_read(void)
+{
+	asm volatile (
+	"r1 = r10;"
+	"r1 += -8;"
+	"call write_then_read;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Unconditionally writes *(r1+0), then reads it back */
+static __used __naked void write_then_read(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"r7 = 99;"
+	"*(u64 *)(r6 + 0) = r7;"
+	"r0 = *(u64 *)(r6 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Test 5: return_site_liveness_bleeding
+ * Main calls subprog twice. Slot used after one call but not the other.
+ * Context-insensitive: slot conservatively live at both.
+ *
+ * After first call: read fp-8.
+ * After second call: don't read fp-8.
+ * Since parent_live is per-subprog (not per call-site),
+ * fp-8 is live at both call sites.
+ */
+SEC("socket")
+__log_level(2)
+/* Both calls have fp-8 live due to context-insensitive parent_live */
+__msg("3: (85) call pc+{{.*}}                   ; use: fp0-8")
+__msg("7: (85) call pc+{{.*}}                   ; use: fp0-8")
+__naked void return_site_liveness_bleeding(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call read_first_param;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call read_first_param;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("9: (85) call bpf_loop#181            ; use: fp0-16")
+__naked void callback_conditional_read_beyond_ctx(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = 2;"
+	"r2 = cb_cond_read ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_loop)
+	: __clobber_all);
+}
+
+/* Callback conditionally reads *(ctx - 8) = caller fp-16 */
+static __used __naked void cb_cond_read(void)
+{
+	asm volatile (
+	"r6 = r2;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"r0 = *(u64 *)(r6 - 8);"
+	"1:"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("14: (7b) *(u64 *)(r6 -8) = r7         ; def: fp0-16")
+__msg("15: (79) r0 = *(u64 *)(r6 -8)         ; use: fp0-16")
+__naked void callback_write_before_read_kills(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = 2;"
+	"r2 = cb_write_read ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_loop)
+	: __clobber_all);
+}
+
+/* Callback unconditionally writes *(ctx-8), then reads it back.
+ * The write (parent_def) kills liveness before entry.
+ */
+static __used __naked void cb_write_read(void)
+{
+	asm volatile (
+	"r6 = r2;"
+	"r7 = 99;"
+	"*(u64 *)(r6 - 8) = r7;"
+	"r0 = *(u64 *)(r6 - 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * bpf_loop callback conditionally writes fp-16 then unconditionally
+ * reads it. The conditional write does NOT kill liveness
+ */
+SEC("socket")
+__log_level(2)
+__msg("9: (85) call bpf_loop#181            ; use: fp0-16")
+__naked void callback_conditional_write_preserves(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = 2;"
+	"r2 = cb_cond_write_read ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_loop)
+	: __clobber_all);
+}
+
+static __used __naked void cb_cond_write_read(void)
+{
+	asm volatile (
+	"r6 = r2;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"*(u64 *)(r6 - 8) = r0;"
+	"1:"
+	"r0 = *(u64 *)(r6 - 8);"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * Two bpf_loop calls with the same callback but different ctx pointers.
+ *
+ * First call: ctx=fp-8, second call: ctx=fp-24.
+ */
+SEC("socket")
+__log_level(2)
+__msg(" 8: (85) call bpf_loop{{.*}}            ; use: fp0-8")
+__msg("15: (85) call bpf_loop{{.*}}            ; use: fp0-24")
+__naked void callback_two_calls_different_ctx(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u64 *)(r10 - 24) = 0;"
+	"r1 = 1;"
+	"r2 = cb_read_ctx ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r1 = 1;"
+	"r2 = cb_read_ctx ll;"
+	"r3 = r10;"
+	"r3 += -24;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_loop)
+	: __clobber_all);
+}
+
+/* Callback reads at ctx+0 unconditionally */
+static __used __naked void cb_read_ctx(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r2 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning in refined_caller_live_stack().
+ *
+ * Three-level call chain: main → mid_fwd → grandchild_deref.
+ * Main passes &fp-8 to mid_fwd, which forwards R1 to grandchild_deref.
+ * grandchild_deref reads main's fp-8 through the forwarded pointer
+ * and dereferences the result.
+ *
+ * refined_caller_live_stack() has a callee_offset++ when mid_fwd
+ * (frame 1) is mid-call. This drops the transitive parent_live
+ * contribution at mid_fwd's call instruction — the only place
+ * where grandchild_deref's read of main's fp-8 is recorded.
+ * As a result, main's fp-8 is cleaned to STACK_INVALID at the
+ * pruning point inside grandchild_deref, and path B is
+ * incorrectly pruned against path A.
+ *
+ * Path A: main stores PTR_TO_MAP_VALUE at fp-8
+ * Path B: main stores scalar 42 at fp-8
+ *
+ * Correct behavior: reject (path B dereferences scalar)
+ * Bug behavior: accept (path B pruned against cleaned path A)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void transitive_parent_stack_read_unsound(void)
+{
+	asm volatile (
+	/* Map lookup to get PTR_TO_MAP_VALUE */
+	"r1 = %[map] ll;"
+	"*(u32 *)(r10 - 32) = 0;"
+	"r2 = r10;"
+	"r2 += -32;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l_exit%=;"
+	"r6 = r0;"
+	/* Branch: path A (fall-through) explored first */
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_scalar%=;"
+	/* Path A: fp-8 = PTR_TO_MAP_VALUE */
+	"*(u64 *)(r10 - 8) = r6;"
+	"goto l_merge%=;"
+"l_scalar%=:"
+	/* Path B: fp-8 = scalar 42 */
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+	/* Pass &fp-8 to mid_fwd → grandchild_deref */
+	"r1 = r10;"
+	"r1 += -8;"
+	"call mid_fwd;"
+	"r0 = 0;"
+	"exit;"
+"l_exit%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+/* Forwards R1 (ptr to main's fp-8) to grandchild_deref */
+static __used __naked void mid_fwd(void)
+{
+	asm volatile (
+	"call grandchild_deref;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Reads main's fp-8 through forwarded pointer, dereferences result */
+static __used __naked void grandchild_deref(void)
+{
+	asm volatile (
+	"goto +0;"				/* checkpoint */
+	"goto +0;"				/* checkpoint */
+	/* read main's fp-8: map_ptr (path A) or scalar (path B) */
+	"r0 = *(u64 *)(r1 + 0);"
+	/* dereference: safe for map_ptr, unsafe for scalar */
+	"r0 = *(u64 *)(r0 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__success
+__msg("14: (79) r1 = *(u64 *)(r10 -8) // r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("15: (79) r0 = *(u64 *)(r1 +0) // r1=fp1-16 r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("stack use/def subprog#1 mid_two_fp_threshold (d1,cs2):")
+__msg("14: (79) r1 = *(u64 *)(r10 -8)        ; use: fp1-8")
+__msg("15: (79) r0 = *(u64 *)(r1 +0)         ; use: fp1-16")
+__naked void two_fp_clear_stack_threshold(void)
+{
+	asm volatile (
+	"r1 = r10;"
+	"r1 += -8;"
+	"call mid_two_fp_threshold;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void mid_two_fp_threshold(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"r7 = r10;"
+	"r7 += -16;"
+	"*(u64 *)(r10 - 8) = r7;"
+	"*(u64 *)(r10 - 16) = r6;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"r2 = r6;"
+	"call inner_nop_fptest;"
+	"r1 = *(u64 *)(r10 - 8);"
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void inner_nop_fptest(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__success
+__msg("13: (79) r1 = *(u64 *)(r10 -8) // r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("14: (79) r0 = *(u64 *)(r1 +0) // r1=fp1-16 r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("stack use/def subprog#1 mid_one_fp_threshold (d1,cs2):")
+__msg("13: (79) r1 = *(u64 *)(r10 -8)        ; use: fp1-8")
+__msg("14: (79) r0 = *(u64 *)(r1 +0)         ; use: fp1-16")
+__naked void one_fp_clear_stack_threshold(void)
+{
+	asm volatile (
+	"r1 = r10;"
+	"r1 += -8;"
+	"call mid_one_fp_threshold;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void mid_one_fp_threshold(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"r7 = r10;"
+	"r7 += -16;"
+	"*(u64 *)(r10 - 8) = r7;"
+	"*(u64 *)(r10 - 16) = r6;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call inner_nop_fptest;"
+	"r1 = *(u64 *)(r10 - 8);"
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning when a subprog forwards a parent
+ * stack pointer (AT_PARENT) to a helper with a memory argument.
+ *
+ * set_call_stack_access_at() previously only tracked AT_CURRENT args,
+ * skipping AT_PARENT entirely. This meant helper reads through parent
+ * stack pointers did not set parent_use, letting the slot appear dead
+ * at pruning checkpoints inside the subprog.
+ *
+ * Program shape:
+ *   main:
+ *     *(u32)(fp-4) = 0             key = STACK_ZERO (const 0)
+ *     call bpf_get_prandom_u32
+ *     if r0 != 0 goto clobber      path A (fall-through) first
+ *     goto merge
+ *   clobber:
+ *     *(u8)(fp-4) = r0             path B: key[0] = STACK_MISC
+ *   merge:
+ *     r1 = fp - 4
+ *     call fwd_parent_key_to_helper
+ *     r0 = 0
+ *     exit
+ *
+ *   fwd_parent_key_to_helper(r1 = &caller_fp-4):
+ *     goto +0                      checkpoint
+ *     r2 = r1                      R2 = AT_PARENT ptr to caller fp-4
+ *     r1 = array_map_8b ll         R1 = array map
+ *     call bpf_map_lookup_elem     reads key_size(4) from parent fp-4
+ *     r0 = *(u64 *)(r0 + 0)        deref without null check
+ *     r0 = 0
+ *     exit
+ *
+ * Path A: STACK_ZERO key = const 0 -> array lookup -> PTR_TO_MAP_VALUE
+ *         (non-NULL for in-bounds const key) -> deref OK.
+ * Path B: STACK_MISC key = unknown -> array lookup ->
+ *         PTR_TO_MAP_VALUE_OR_NULL -> deref UNSAFE.
+ *
+ * Bug: AT_PARENT R2 arg to bpf_map_lookup_elem skipped -> parent_use
+ *      not set -> fp-4 cleaned at checkpoint -> STACK_ZERO collapses
+ *      to STACK_INVALID -> path B pruned -> deref never checked.
+ *
+ * Correct verifier behavior: reject (path B deref of map_value_or_null).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void helper_parent_stack_read_unsound(void)
+{
+	asm volatile (
+	/* key at fp-4: all bytes STACK_ZERO */
+	"*(u32 *)(r10 - 4) = 0;"
+	"call %[bpf_get_prandom_u32];"
+	/* fall-through (path A) explored first */
+	"if r0 != 0 goto l_clobber%=;"
+	/* path A: key stays constant zero */
+	"goto l_merge%=;"
+"l_clobber%=:"
+	/* path B: key[0] becomes STACK_MISC, key no longer const */
+	"*(u8 *)(r10 - 4) = r0;"
+"l_merge%=:"
+	"r1 = r10;"
+	"r1 += -4;"
+	"call fwd_parent_key_to_helper;"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * Subprog forwards parent stack pointer to bpf_map_lookup_elem as key
+ * on an array map, then dereferences the result without a null check.
+ * R1 = &parent_fp-4 (AT_PARENT in this frame).
+ *
+ * The helper reads key_size(4) bytes from parent stack.  The deref of
+ * R0 reads the map value, NOT parent stack, so record_insn_mem_accesses
+ * does not set parent_use for it.  The ONLY parent stack access is
+ * through the helper's R2 arg.
+ */
+static __used __naked void fwd_parent_key_to_helper(void)
+{
+	asm volatile (
+	"goto +0;"				/* checkpoint */
+	"r2 = r1;"				/* R2 = parent ptr (AT_PARENT) */
+	"r1 = %[array_map_8b] ll;"		/* R1 = array map */
+	"call %[bpf_map_lookup_elem];"		/* reads 4 bytes from parent fp-4 */
+	/* deref without null check: safe for PTR_TO_MAP_VALUE,
+	 * unsafe for PTR_TO_MAP_VALUE_OR_NULL
+	 */
+	"r0 = *(u64 *)(r0 + 0);"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(array_map_8b)
+	: __clobber_all);
+}
+
+/*
+ * Regression for keeping later helper args after a whole-stack fallback
+ * on an earlier local arg.  The first bpf_snprintf() arg is a local
+ * frame-derived pointer with offset-imprecise tracking (`fp1 ?`), which
+ * conservatively marks the whole local stack live.  The fourth arg still
+ * forwards &parent_fp-8 and must contribute nonlocal_use[0]=0:3.
+ */
+SEC("socket")
+__log_level(2)
+__success
+__msg("call bpf_snprintf{{.*}}        ; use: fp1-8..-512 fp0-8")
+__naked void helper_arg_fallback_keeps_scanning(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call helper_snprintf_parent_after_local_fallback;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void helper_snprintf_parent_after_local_fallback(void)
+{
+	asm volatile (
+	"r6 = r1;"				/* save &parent_fp-8 */
+	"call %[bpf_get_prandom_u32];"
+	"r0 &= 8;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"r1 += r0;"				/* local fp, offset-imprecise */
+	"r2 = 8;"
+	"r3 = %[snprintf_u64_fmt] ll;"
+	"r4 = r6;"				/* later arg: parent fp-8 */
+	"r5 = 8;"
+	"call %[bpf_snprintf];"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_snprintf),
+	  __imm_addr(snprintf_u64_fmt)
+	: __clobber_all);
+}
+
+/*
+ * Test that propagate_callee_ancestor() correctly chains ancestor
+ * liveness across sequential calls within a single frame.
+ *
+ * main → mid_seq_touch → {nop_callee, deref_ancestor}
+ *
+ * mid_seq_touch receives two pointers: R1 = &main_fp-8 (forwarded to
+ * deref_ancestor) and R2 = &main_fp-16 (read directly by mid_seq_touch).
+ * The direct read of fp-16 forces ensure_anc_arrays() to allocate
+ * ancestor_live[0] for mid_seq_touch, so refined_caller_live_stack()
+ * uses the refined path (not the conservative fallback).
+ *
+ * mid_seq_touch calls nop_callee first (no-op, creates a pruning point),
+ * then calls deref_ancestor which reads main's fp-8 and dereferences it.
+ *
+ * propagate_callee_ancestor() propagates deref_ancestor's entry
+ * ancestor_live[0] into mid_seq_touch's anc_use[0] at the call-to-deref
+ * instruction.  mid_seq_touch's backward pass flows this backward so
+ * ancestor_live[0] includes fp-8 at the pruning point between the calls.
+ *
+ * Without propagation, mid_seq_touch's ancestor_live[0] only has fp-16
+ * (from the direct read) — fp-8 is missing.  refined_caller_live_stack()
+ * Term 1 says fp-8 is dead, the verifier cleans it, and path B
+ * (scalar 42) is incorrectly pruned against path A (MAP_VALUE).
+ *
+ * Path A: main stores PTR_TO_MAP_VALUE at fp-8  → deref succeeds
+ * Path B: main stores scalar 42 at fp-8         → deref must fail
+ *
+ * Correct: reject (path B dereferences scalar)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void propagate_callee_ancestor_chain(void)
+{
+	asm volatile (
+	/* Map lookup to get PTR_TO_MAP_VALUE */
+	"r1 = %[map] ll;"
+	"*(u32 *)(r10 - 32) = 0;"
+	"r2 = r10;"
+	"r2 += -32;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l_exit%=;"
+	"r6 = r0;"
+	/* Branch: path A (fall-through) explored first */
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_scalar%=;"
+	/* Path A: fp-8 = PTR_TO_MAP_VALUE */
+	"*(u64 *)(r10 - 8) = r6;"
+	"goto l_merge%=;"
+"l_scalar%=:"
+	/* Path B: fp-8 = scalar 42 */
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+	/* fp-16 = dummy value (mid_seq_touch reads it directly) */
+	"r1 = 99;"
+	"*(u64 *)(r10 - 16) = r1;"
+	/* R1 = &fp-8 (for deref_ancestor), R2 = &fp-16 (for mid_seq_touch) */
+	"r1 = r10;"
+	"r1 += -8;"
+	"r2 = r10;"
+	"r2 += -16;"
+	"call mid_seq_touch;"
+	"r0 = 0;"
+	"exit;"
+"l_exit%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+/*
+ * R1 = &main_fp-8 (forwarded to deref_ancestor)
+ * R2 = &main_fp-16 (read directly here → allocates ancestor_live[0])
+ *
+ * Reads main's fp-16 to force ancestor_live[0] allocation, then
+ * calls nop_callee (pruning point), then deref_ancestor.
+ */
+static __used __naked void mid_seq_touch(void)
+{
+	asm volatile (
+	"r6 = r1;"			/* save &main_fp-8 in callee-saved */
+	"r0 = *(u64 *)(r2 + 0);"	/* read main's fp-16: triggers anc_use[0] */
+	"call nop_callee;"		/* no-op, creates pruning point after */
+	"r1 = r6;"			/* restore ptr to &main_fp-8 */
+	"call deref_ancestor;"		/* reads main's fp-8, dereferences */
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void nop_callee(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Reads main's fp-8 through forwarded pointer, dereferences result */
+static __used __naked void deref_ancestor(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"	/* read main's fp-8 */
+	"r0 = *(u64 *)(r0 + 0);"	/* deref: safe for map_ptr, unsafe for scalar */
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Test: callee loads an fp-derived pointer from caller's stack, then
+ * reads through it to access another caller stack slot.
+ *
+ * main stores PTR_TO_MAP_VALUE at fp-24, stores &fp-24 (an fp-derived
+ * pointer) at fp-8, passes &fp-8 through mid_fwd_spilled_ptr to
+ * load_ptr_deref_grandchild.  The leaf loads the pointer from main's
+ * fp-8, then reads main's fp-24 through the loaded pointer.
+ *
+ * fill_from_stack() in arg_track_xfer() only handles local-frame
+ * FP-derived loads (src_is_local_fp check requires frame == depth).
+ * When a callee loads from a parent-frame pointer (frame < depth),
+ * the loaded value gets ARG_NONE instead of being recognized as
+ * fp-derived.  Subsequent reads through that loaded pointer are
+ * invisible to liveness — nonlocal_use is never set for fp-24.
+ *
+ * clean_live_states() cleans the current state at every prune point.
+ * Because liveness misses fp-24, refined_caller_live_stack() tells
+ * __clean_func_state() that fp-24 is dead, which destroys the
+ * PTR_TO_MAP_VALUE spill before the grandchild can read it.
+ * The grandchild then reads STACK_INVALID → scalar, and the deref
+ * is rejected with "R0 invalid mem access 'scalar'" — even though
+ * fp-24 is genuinely live and holds a valid map pointer.
+ *
+ * This is a false positive: a valid program incorrectly rejected.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__success
+__naked void spilled_fp_cross_frame_deref(void)
+{
+	asm volatile (
+	/* Map lookup to get PTR_TO_MAP_VALUE */
+	"r1 = %[map] ll;"
+	"*(u32 *)(r10 - 32) = 0;"
+	"r2 = r10;"
+	"r2 += -32;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l_exit%=;"
+	/* fp-24 = PTR_TO_MAP_VALUE */
+	"*(u64 *)(r10 - 24) = r0;"
+	/* Store pointer to fp-24 at fp-8 */
+	"r1 = r10;"
+	"r1 += -24;"
+	"*(u64 *)(r10 - 8) = r1;"
+	/* R1 = &fp-8: pointer to the spilled ptr */
+	"r1 = r10;"
+	"r1 += -8;"
+	"call mid_fwd_spilled_ptr;"
+	"r0 = 0;"
+	"exit;"
+"l_exit%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+/* Forwards R1 (ptr to main's fp-8, which holds &main_fp-24) to leaf */
+static __used __naked void mid_fwd_spilled_ptr(void)
+{
+	asm volatile (
+	"call load_ptr_deref_grandchild;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * R1 = &main_fp-8 (where main stored ptr to fp-24)
+ * Loads the ptr from main's fp-8, reads main's fp-24 through it,
+ * then dereferences the result.
+ */
+static __used __naked void load_ptr_deref_grandchild(void)
+{
+	asm volatile (
+	/* Load ptr from main's fp-8 → r2 = &main_fp-24 */
+	"r2 = *(u64 *)(r1 + 0);"
+	/* Read main's fp-24 through loaded ptr */
+	"r0 = *(u64 *)(r2 + 0);"
+	/* Dereference: safe for map_ptr */
+	"r0 = *(u64 *)(r0 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Exercise merge_nonlocal_live().
+ *
+ * merge_shared_mid is analyzed twice (once from each wrapper), so the
+ * callsite within merge_shared_mid that calls merge_leaf_read gets its
+ * nonlocal_live info merged twice via merge_nonlocal_live().
+ */
+SEC("socket")
+__log_level(2)
+__success
+__msg("14: (85) call pc+2	r1: fp0-16")
+__msg("17: (79) r0 = *(u64 *)(r1 +0) // r1=fp0-16")
+__msg("14: (85) call pc+2	r1: fp0-8")
+__msg("17: (79) r0 = *(u64 *)(r1 +0) // r1=fp0-8")
+__msg("5: (85) call pc+{{.*}}                   ; use: fp0-8 fp0-16")
+__naked void test_merge_nonlocal_live(void)
+{
+	asm volatile (
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call merge_wrapper_a;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"call merge_wrapper_b;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void merge_wrapper_a(void)
+{
+	asm volatile (
+	"call merge_shared_mid;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void merge_wrapper_b(void)
+{
+	asm volatile (
+	"call merge_shared_mid;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void merge_shared_mid(void)
+{
+	asm volatile (
+	"call merge_leaf_read;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void merge_leaf_read(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Same bpf_loop instruction calls different callbacks depending on branch. */
+SEC("socket")
+__log_level(2)
+__success
+__msg("call bpf_loop#181            ; use: fp2-8..-512 fp1-8..-512 fp0-8..-512")
+__naked void bpf_loop_two_callbacks(void)
+{
+	asm volatile (
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call dyn_wrapper_a;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"call dyn_wrapper_b;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void dyn_wrapper_a(void)
+{
+	asm volatile (
+	"call mid_dynamic_cb;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void dyn_wrapper_b(void)
+{
+	asm volatile (
+	"call mid_dynamic_cb;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void mid_dynamic_cb(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"r2 = dyn_cb_a ll;"
+	"goto 2f;"
+	"1:"
+	"r2 = dyn_cb_b ll;"
+	"2:"
+	"r1 = 1;"
+	"r3 = r6;" /* ctx = fp-derived ptr from parent */
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32),
+	   __imm(bpf_loop)
+	: __clobber_all);
+}
+
+/* Callback A/B: read parent stack through ctx */
+static __used __naked void dyn_cb_a(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r2 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void dyn_cb_b(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r2 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Path A: r0 = map_lookup result (non-FP, ARG_NONE for stack tracking)
+ * Path B: r0 = fp-8 (FP-derived, frame=0, off=-8)
+ * At the join: r0 is not guaranteed to be a frame pointer.
+ */
+SEC("socket")
+__log_level(2)
+__msg("10: (79) r0 = *(u64 *)(r10 -8) // r0=fp0-8|fp0+0")
+__naked void stack_or_non_stack_write(void)
+{
+	asm volatile (
+	/* initial write to fp-8 */
+	"*(u64 *)(r10 - 8) = 0;"
+	/* map lookup to get a non-FP pointer */
+	"r2 = r10;"
+	"r2 += -4;"
+	"r1 = %[map] ll;"
+	"call %[bpf_map_lookup_elem];"
+	/* r0 = map_value (ARG_NONE) */
+	"if r0 != 0 goto 1f;"
+	/* path B: r0 = fp-8 */
+	"r0 = r10;"
+	"r0 += -8;"
+"1:"
+	/* join: the write is not a def for fp[0]-8 */
+	"*(u64 *)(r0 + 0) = 7;"
+	/* read fp-8: should be non-poisoned */
+	"r0 = *(u64 *)(r10 - 8);"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("subprog#2 write_first_read_second:")
+__msg("17: (7a) *(u64 *)(r1 +0) = 42{{$}}")
+__msg("18: (79) r0 = *(u64 *)(r2 +0) // r1=fp0-8 r2=fp0-16{{$}}")
+__msg("stack use/def subprog#2 write_first_read_second (d2,cs15):")
+__msg("17: (7a) *(u64 *)(r1 +0) = 42{{$}}")
+__msg("18: (79) r0 = *(u64 *)(r2 +0)         ; use: fp0-8 fp0-16")
+__naked void shared_instance_must_write_overwrite(void)
+{
+	asm volatile (
+	"r1 = 1;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	/* Call 1: write_first_read_second(&fp[-8], &fp[-16]) */
+	"r1 = r10;"
+	"r1 += -8;"
+	"r2 = r10;"
+	"r2 += -16;"
+	"call forwarding_rw;"
+	/* Call 2: write_first_read_second(&fp[-16], &fp[-8]) */
+	"r1 = r10;"
+	"r1 += -16;"
+	"r2 = r10;"
+	"r2 += -8;"
+	"call forwarding_rw;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void forwarding_rw(void)
+{
+	asm volatile (
+	"call write_first_read_second;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void write_first_read_second(void)
+{
+	asm volatile (
+	"*(u64 *)(r1 + 0) = 42;"
+	"r0 = *(u64 *)(r2 + 0);"
+	"exit;"
+	::: __clobber_all);
+}

-- 
2.53.0

  parent reply	other threads:[~2026-04-09  1:33 UTC|newest]

Thread overview: 22+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-09  1:33 [PATCH bpf-next 00/14] bpf: static stack liveness data flow analysis Eduard Zingerman
2026-04-09  1:33 ` [PATCH bpf-next 01/14] bpf: share several utility functions as internal API Eduard Zingerman
2026-04-09  1:33 ` [PATCH bpf-next 02/14] bpf: save subprogram name in bpf_subprog_info Eduard Zingerman
2026-04-09  2:14   ` bot+bpf-ci
2026-04-09  1:33 ` [PATCH bpf-next 03/14] bpf: Add spis_*() helpers for 4-byte stack slot bitmasks Eduard Zingerman
2026-04-09  3:12   ` bot+bpf-ci
2026-04-09  1:33 ` [PATCH bpf-next 04/14] bpf: make liveness.c track stack with 4-byte granularity Eduard Zingerman
2026-04-09  2:26   ` bot+bpf-ci
2026-04-09  1:33 ` [PATCH bpf-next 05/14] bpf: 4-byte precise clean_verifier_state Eduard Zingerman
2026-04-09  1:33 ` [PATCH bpf-next 06/14] bpf: prepare bpf_liveness api for use by static analysis pass Eduard Zingerman
2026-04-09  1:33 ` [PATCH bpf-next 07/14] bpf: introduce forward arg-tracking dataflow analysis Eduard Zingerman
2026-04-09  2:26   ` bot+bpf-ci
2026-04-09  1:33 ` [PATCH bpf-next 08/14] bpf: record arg tracking results in bpf_liveness masks Eduard Zingerman
2026-04-09  2:26   ` bot+bpf-ci
2026-04-09  1:33 ` [PATCH bpf-next 09/14] bpf: simplify liveness to use (callsite, depth) keyed func_instances Eduard Zingerman
2026-04-09  2:26   ` bot+bpf-ci
2026-04-09  1:33 ` [PATCH bpf-next 10/14] bpf: change logging scheme for live stack analysis Eduard Zingerman
2026-04-09  2:14   ` bot+bpf-ci
2026-04-09  1:33 ` [PATCH bpf-next 11/14] selftests/bpf: update existing tests due to liveness changes Eduard Zingerman
2026-04-09  1:33 ` [PATCH bpf-next 12/14] selftests/bpf: adjust verifier_log buffers Eduard Zingerman
2026-04-09  1:33 ` Eduard Zingerman [this message]
2026-04-09  1:33 ` [PATCH bpf-next 14/14] bpf: poison dead stack slots Eduard Zingerman

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260408-patch-set-v1-13-1a666e860d42@gmail.com \
    --to=eddyz87@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@linux.dev \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox