All of lore.kernel.org
 help / color / mirror / Atom feed
From: Eduard Zingerman <eddyz87@gmail.com>
To: bpf@vger.kernel.org, ast@kernel.org, andrii@kernel.org
Cc: daniel@iogearbox.net, martin.lau@linux.dev, kernel-team@fb.com,
	yonghong.song@linux.dev, eddyz87@gmail.com
Subject: [PATCH bpf-next v2 12/13] selftests/bpf: add new tests for static stack liveness analysis
Date: Thu,  9 Apr 2026 18:11:31 -0700	[thread overview]
Message-ID: <20260409-patch-set-v2-12-651804512349@gmail.com> (raw)
In-Reply-To: <20260409-patch-set-v2-0-651804512349@gmail.com>

From: Alexei Starovoitov <ast@kernel.org>

Add a bunch of new tests to verify the static stack
liveness analysis.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Eduard Zingerman <eddyz87@gmail.com>
---
 .../selftests/bpf/progs/verifier_live_stack.c      | 2129 ++++++++++++++++++++
 1 file changed, 2129 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/verifier_live_stack.c b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
index 0281955139c7..a3f779abd00e 100644
--- a/tools/testing/selftests/bpf/progs/verifier_live_stack.c
+++ b/tools/testing/selftests/bpf/progs/verifier_live_stack.c
@@ -3,6 +3,7 @@
 
 #include <linux/bpf.h>
 #include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
 #include "bpf_misc.h"
 
 char _license[] SEC("license") = "GPL";
@@ -13,6 +14,15 @@ struct {
 	__type(value, long long);
 } map SEC(".maps");
 
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, __u32);
+	__type(value, __u64);
+} array_map_8b SEC(".maps");
+
+const char snprintf_u64_fmt[] = "%llu";
+
 SEC("socket")
 __log_level(2)
 __msg("0: (79) r1 = *(u64 *)(r10 -8)        ; use: fp0-8")
@@ -154,6 +164,33 @@ static __used __naked void read_first_param(void)
 	::: __clobber_all);
 }
 
+SEC("socket")
+__success
+__naked void arg_track_join_convergence(void)
+{
+	asm volatile (
+	"r1 = 1;"
+	"r2 = 2;"
+	"call arg_track_join_convergence_subprog;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void arg_track_join_convergence_subprog(void)
+{
+	asm volatile (
+	"if r1 == 0 goto 1f;"
+	"r0 = r1;"
+	"goto 2f;"
+"1:"
+	"r0 = r2;"
+"2:"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
 SEC("socket")
 __flag(BPF_F_TEST_STATE_FREQ)
 __log_level(2)
@@ -324,3 +361,2095 @@ static __used __naked unsigned long write_tail_call(void)
           __imm_addr(map_array)
         : __clobber_all);
 }
+
+/* Test precise subprog stack access analysis.
+ * Caller passes fp-32 (SPI 3) to callee that only accesses arg+0 and arg+8
+ * (SPIs 3 and 2). Slots 0 and 1 should NOT be live at the call site.
+ *
+ * Insn layout:
+ *   0: *(u64*)(r10 - 8) = 0      write SPI 0
+ *   1: *(u64*)(r10 - 16) = 0     write SPI 1
+ *   2: *(u64*)(r10 - 24) = 0     write SPI 2
+ *   3: *(u64*)(r10 - 32) = 0     write SPI 3
+ *   4: r1 = r10
+ *   5: r1 += -32
+ *   6: call precise_read_two      passes fp-32 (SPI 3)
+ *   7: r0 = 0
+ *   8: exit
+ *
+ * At insn 6 only SPIs 2,3 should be live (slots 4-7, 0xf0).
+ * SPIs 0,1 are written but never read → dead.
+ */
+SEC("socket")
+__log_level(2)
+__msg("6: (85) call pc+{{.*}}                   ; use: fp0-24 fp0-32{{$}}")
+__naked void subprog_precise_stack_access(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u64 *)(r10 - 16) = 0;"
+	"*(u64 *)(r10 - 24) = 0;"
+	"*(u64 *)(r10 - 32) = 0;"
+	"r1 = r10;"
+	"r1 += -32;"
+	"call precise_read_two;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Callee reads only at arg+0 (SPI 3) and arg+8 (SPI 2) */
+static __used __naked void precise_read_two(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r2 = *(u64 *)(r1 + 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that multi-level subprog calls (callee passes arg-derived ptr
+ * to another BPF subprog) are analyzed precisely.
+ *
+ * Caller passes fp-32 (SPI 3). The callee forwards it to inner_callee.
+ * inner_callee only reads at offset 0 from the pointer.
+ * The analysis recurses into forward_to_inner -> inner_callee and
+ * determines only SPI 3 is accessed (slots 6-7, 0xc0), not all of SPIs 0-3.
+ *
+ * Insn layout:
+ *   0: *(u64*)(r10 - 8) = 0      write SPI 0
+ *   1: *(u64*)(r10 - 16) = 0     write SPI 1
+ *   2: *(u64*)(r10 - 24) = 0     write SPI 2
+ *   3: *(u64*)(r10 - 32) = 0     write SPI 3
+ *   4: r1 = r10
+ *   5: r1 += -32
+ *   6: call forward_to_inner      passes fp-32 (SPI 3)
+ *   7: r0 = 0
+ *   8: exit
+ */
+SEC("socket")
+__log_level(2)
+__msg("6: (85) call pc+{{.*}}                   ; use: fp0-32{{$}}")
+__naked void subprog_multilevel_conservative(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u64 *)(r10 - 16) = 0;"
+	"*(u64 *)(r10 - 24) = 0;"
+	"*(u64 *)(r10 - 32) = 0;"
+	"r1 = r10;"
+	"r1 += -32;"
+	"call forward_to_inner;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Forwards arg to another subprog */
+static __used __naked void forward_to_inner(void)
+{
+	asm volatile (
+	"call inner_callee;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void inner_callee(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test multi-frame precision loss: callee consumes caller stack early,
+ * but static liveness keeps it live at pruning points inside callee.
+ *
+ * Caller stores map_ptr or scalar(42) at fp-8, then calls
+ * consume_and_call_inner. The callee reads fp0-8 at entry (consuming
+ * the slot), then calls do_nothing2. After do_nothing2 returns (a
+ * pruning point), fp-8 should be dead -- the read already happened.
+ * But because the call instruction's stack_use includes SPI 0, the
+ * static live_stack_before at insn 7 is 0x1, keeping fp-8 live inside
+ * the callee and preventing state pruning between the two paths.
+ *
+ * Insn layout:
+ *   0: call bpf_get_prandom_u32
+ *   1: if r0 == 42 goto pc+2    -> insn 4
+ *   2: r0 = map ll (ldimm64 part1)
+ *   3: (ldimm64 part2)
+ *   4: *(u64)(r10 - 8) = r0     fp-8 = map_ptr OR scalar(42)
+ *   5: r1 = r10
+ *   6: r1 += -8
+ *   7: call consume_and_call_inner
+ *   8: r0 = 0
+ *   9: exit
+ *
+ * At insn 7, live_stack_before = 0x3 (slots 0-1 live due to stack_use).
+ * At insn 8, live_stack_before = 0x0 (SPI 0 dead, caller doesn't need it).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+__success
+__msg(" 7: (85) call pc+{{.*}}                   ; use: fp0-8")
+__msg(" 8: {{.*}} (b7)")
+__naked void callee_consumed_caller_stack(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 42 goto 1f;"
+	"r0 = %[map] ll;"
+"1:"
+	"*(u64 *)(r10 - 8) = r0;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call consume_and_call_inner;"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+static __used __naked void consume_and_call_inner(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"	/* read fp[0]-8 into caller-saved r0 */
+	"call do_nothing2;"		/* inner call clobbers r0 */
+	"r0 = 0;"
+	"goto +0;"			/* checkpoint */
+	"r0 = 0;"
+	"goto +0;"			/* checkpoint */
+	"r0 = 0;"
+	"goto +0;"			/* checkpoint */
+	"r0 = 0;"
+	"goto +0;"			/* checkpoint */
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void do_nothing2(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning when clean_verifier_state() promotes
+ * live STACK_ZERO bytes to STACK_MISC.
+ *
+ * Program shape:
+ * - Build key at fp-4:
+ *   - path A keeps key byte as STACK_ZERO;
+ *   - path B writes unknown byte making it STACK_MISC.
+ * - Branches merge at a prune point before map_lookup.
+ * - map_lookup on ARRAY map is value-sensitive to constant zero key:
+ *   - path A: const key 0 => PTR_TO_MAP_VALUE (non-NULL);
+ *   - path B: non-const key => PTR_TO_MAP_VALUE_OR_NULL.
+ * - Dereference lookup result without null check.
+ *
+ * Note this behavior won't trigger at fp-8, since the verifier will
+ * track 32-bit scalar spill differently as spilled_ptr.
+ *
+ * Correct verifier behavior: reject (path B unsafe).
+ * With blanket STACK_ZERO->STACK_MISC promotion on live slots, cached path A
+ * state can be generalized and incorrectly prune path B, making program load.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void stack_zero_to_misc_unsound_array_lookup(void)
+{
+	asm volatile (
+	/* key at fp-4: all bytes STACK_ZERO */
+	"*(u32 *)(r10 - 4) = 0;"
+	"call %[bpf_get_prandom_u32];"
+	/* fall-through (path A) explored first */
+	"if r0 != 0 goto l_nonconst%=;"
+	/* path A: keep key constant zero */
+	"goto l_lookup%=;"
+"l_nonconst%=:"
+	/* path B: key byte turns to STACK_MISC, key no longer const */
+	"*(u8 *)(r10 - 4) = r0;"
+"l_lookup%=:"
+	/* value-sensitive lookup */
+	"r2 = r10;"
+	"r2 += -4;"
+	"r1 = %[array_map_8b] ll;"
+	"call %[bpf_map_lookup_elem];"
+	/* unsafe when lookup result is map_value_or_null */
+	"r0 = *(u64 *)(r0 + 0);"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_map_lookup_elem),
+	  __imm_addr(array_map_8b)
+	: __clobber_all);
+}
+
+/*
+ * Subprog variant of stack_zero_to_misc_unsound_array_lookup.
+ *
+ * Check unsound pruning when a callee modifies the caller's
+ * stack through a pointer argument.
+ *
+ * Program shape:
+ *   main:
+ *     *(u32)(fp - 4) = 0            key = 0 (all bytes STACK_ZERO)
+ *     r1 = fp - 4
+ *     call maybe_clobber_key        may overwrite key[0] with scalar
+ *     <-- prune point: two states meet here -->
+ *     r2 = fp - 4
+ *     r1 = array_map_8b
+ *     call bpf_map_lookup_elem      value-sensitive on const-zero key
+ *     r0 = *(u64)(r0 + 0)           deref without null check
+ *     exit
+ *
+ *   maybe_clobber_key(r1):
+ *     r6 = r1                       save &key
+ *     call bpf_get_prandom_u32
+ *     if r0 == 0 goto skip          path A: key stays STACK_ZERO
+ *     *(u8)(r6 + 0) = r0            path B: key[0] becomes STACK_MISC
+ *   skip:
+ *     r0 = 0
+ *     exit
+ *
+ * Path A: const-zero key => array lookup => PTR_TO_MAP_VALUE => deref OK.
+ * Path B: non-const key  => array lookup => PTR_TO_MAP_VALUE_OR_NULL => UNSAFE.
+ *
+ * If the cleaner collapses STACK_ZERO -> STACK_MISC for the live key
+ * slot, path A's cached state matches path B, pruning the unsafe path.
+ *
+ * Correct verifier behaviour: reject.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void subprog_stack_zero_to_misc_unsound(void)
+{
+	asm volatile (
+	/* key at fp-4: all bytes STACK_ZERO */
+	"*(u32 *)(r10 - 4) = 0;"
+	/* subprog may clobber key[0] with a scalar byte */
+	"r1 = r10;"
+	"r1 += -4;"
+	"call maybe_clobber_key;"
+	/* value-sensitive array lookup */
+	"r2 = r10;"
+	"r2 += -4;"
+	"r1 = %[array_map_8b] ll;"
+	"call %[bpf_map_lookup_elem];"
+	/* unsafe when result is map_value_or_null (path B) */
+	"r0 = *(u64 *)(r0 + 0);"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(array_map_8b)
+	: __clobber_all);
+}
+
+static __used __naked void maybe_clobber_key(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"call %[bpf_get_prandom_u32];"
+	/* path A (r0==0): key stays STACK_ZERO, explored first */
+	"if r0 == 0 goto 1f;"
+	/* path B (r0!=0): overwrite key[0] with scalar */
+	"*(u8 *)(r6 + 0) = r0;"
+	"1:"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * Demonstrate that subprog arg spill/reload breaks arg tracking,
+ * inflating caller stack liveness and preventing state pruning.
+ *
+ * modifier2(fp-24) has two paths: one writes a scalar to *(r1+8)
+ * = caller fp-16, the other leaves it as zero.  After modifier2
+ * returns, fp-16 is never read again — it is dead.
+ *
+ * spill_reload_reader2(fp-24) only reads caller fp-8 via
+ * *(r1+16), but it spills r1 across a helper call.  This
+ * breaks compute_subprog_arg_access(): the reload from callee
+ * stack cannot be connected back to arg1, so arg1 access goes
+ * "all (conservative)".  At the call site (r1 = fp-24, slot 5)
+ * apply_callee_stack_access() marks slots 0..5 as stack_use —
+ * pulling fp-16 (slots 2-3) into live_stack_before even though
+ * the reader never touches it.
+ *
+ * Result: at modifier2's return point two states with different
+ * fp-16 values cannot be pruned.
+ *
+ * With correct (or old dynamic) liveness fp-16 is dead at that
+ * point and the states prune → "6: safe" appears in the log.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__log_level(2)
+__success
+__msg("6: safe")
+__naked void spill_reload_inflates_stack_liveness(void)
+{
+	asm volatile (
+	/* struct at fp-24: { ctx; ptr; tail; } */
+	"*(u64 *)(r10 - 24) = r1;"		/* fp-24 = ctx */
+	"*(u64 *)(r10 - 16) = r1;"		/* fp-16 = ctx (STACK_SPILL ptr) */
+	"*(u64 *)(r10 - 8) = 0;"		/* fp-8  = tail */
+	/* modifier2 writes different values to fp-16 on two paths */
+	"r1 = r10;"
+	"r1 += -24;"
+	"call modifier2;"
+	/* insn 6: prune point — two states with different fp-16
+	 * path A: fp-16 = STACK_MISC  (scalar overwrote pointer)
+	 * path B: fp-16 = STACK_SPILL (original ctx pointer)
+	 * STACK_MISC does NOT subsume STACK_SPILL(ptr),
+	 * so pruning fails unless fp-16 is cleaned (dead).
+	 */
+	"r1 = r10;"
+	"r1 += -24;"
+	"call spill_reload_reader2;"		/* reads fp-8 via *(r1+16) */
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Two paths: one writes a scalar to *(r1+8) = caller fp-16,
+ * the other leaves it unchanged.  Both return 0 via separate
+ * exits to prevent pruning inside the subprog at the merge.
+ */
+static __used __naked void modifier2(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"*(u64 *)(r6 + 8) = r0;"		/* fp-16 = random */
+	"r0 = 0;"
+	"exit;"					/* path A exit */
+	"1:"
+	"r0 = 0;"
+	"exit;"					/* path B exit */
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* Receives r1 = caller fp-24.  Only reads *(r1+16) = fp-8.
+ * Spills r1 across a helper call → arg tracking goes conservative →
+ * slots 0..5 all appear used instead of just slot 1 (fp-8).
+ */
+static __used __naked void spill_reload_reader2(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = r1;"		/* spill arg1 */
+	"call %[bpf_get_prandom_u32];"		/* clobbers r1-r5 */
+	"r1 = *(u64 *)(r10 - 8);"		/* reload arg1 */
+	"r0 = *(u64 *)(r1 + 16);"		/* read caller fp-8 */
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __kfunc_btf_root(void)
+{
+	bpf_iter_num_new(0, 0, 0);
+	bpf_iter_num_next(0);
+	bpf_iter_num_destroy(0);
+}
+
+/* Test that open-coded iterator kfunc arguments get precise stack
+ * liveness tracking. struct bpf_iter_num is 8 bytes (1 SPI).
+ *
+ * Insn layout:
+ *   0: *(u64*)(r10 - 8) = 0      write SPI 0 (dead)
+ *   1: *(u64*)(r10 - 16) = 0     write SPI 1 (dead)
+ *   2: r1 = r10
+ *   3: r1 += -24                 iter state at fp-24 (SPI 2)
+ *   4: r2 = 0
+ *   5: r3 = 10
+ *   6: call bpf_iter_num_new     defines SPI 2 (KF_ITER_NEW) → 0x0
+ *   7-8: r1 = fp-24
+ *   9: call bpf_iter_num_next    uses SPI 2 → 0x30
+ *  10: if r0 == 0 goto 2f
+ *  11: goto 1b
+ *  12-13: r1 = fp-24
+ *  14: call bpf_iter_num_destroy uses SPI 2 → 0x30
+ *  15: r0 = 0
+ *  16: exit
+ *
+ * At insn 6, SPI 2 is defined (KF_ITER_NEW initializes, doesn't read),
+ * so it kills liveness from successors. live_stack_before = 0x0.
+ * At insns 9 and 14, SPI 2 is used (iter_next/destroy read the state),
+ * so live_stack_before = 0x30.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg(" 6: (85) call bpf_iter_num_new{{.*}}          ; def: fp0-24{{$}}")
+__msg(" 9: (85) call bpf_iter_num_next{{.*}}         ; use: fp0-24{{$}}")
+__msg("14: (85) call bpf_iter_num_destroy{{.*}}      ; use: fp0-24{{$}}")
+__naked void kfunc_iter_stack_liveness(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"	/* SPI 0 - dead */
+	"*(u64 *)(r10 - 16) = 0;"	/* SPI 1 - dead */
+	"r1 = r10;"
+	"r1 += -24;"
+	"r2 = 0;"
+	"r3 = 10;"
+	"call %[bpf_iter_num_new];"
+"1:"
+	"r1 = r10;"
+	"r1 += -24;"
+	"call %[bpf_iter_num_next];"
+	"if r0 == 0 goto 2f;"
+	"goto 1b;"
+"2:"
+	"r1 = r10;"
+	"r1 += -24;"
+	"call %[bpf_iter_num_destroy];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_iter_num_new),
+	   __imm(bpf_iter_num_next),
+	   __imm(bpf_iter_num_destroy)
+	: __clobber_all);
+}
+
+/*
+ * Test for soundness bug in static stack liveness analysis.
+ *
+ * The static pre-pass tracks FP-derived register offsets to determine
+ * which stack slots are accessed. When a PTR_TO_STACK is spilled to
+ * the stack and later reloaded, the reload (BPF_LDX) kills FP-derived
+ * tracking, making subsequent accesses through the reloaded pointer
+ * invisible to the static analysis.
+ *
+ * This causes the analysis to incorrectly mark SPI 0 as dead at the
+ * merge point. clean_verifier_state() zeros it in the cached state,
+ * and stacksafe() accepts the new state against STACK_INVALID,
+ * enabling incorrect pruning.
+ *
+ * Path A (verified first): stores PTR_TO_MAP_VALUE in SPI 0
+ * Path B (verified second): stores scalar 42 in SPI 0
+ * After merge: reads SPI 0 through spilled/reloaded PTR_TO_STACK
+ * and dereferences the result as a pointer.
+ *
+ * Correct behavior: reject (path B dereferences a scalar)
+ * Bug behavior: accept (path B is incorrectly pruned)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void spill_ptr_liveness_type_confusion(void)
+{
+	asm volatile (
+	/* Map lookup to get PTR_TO_MAP_VALUE */
+	"r1 = %[map] ll;"
+	"*(u32 *)(r10 - 32) = 0;"
+	"r2 = r10;"
+	"r2 += -32;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l_exit%=;"
+	/* r6 = PTR_TO_MAP_VALUE (callee-saved) */
+	"r6 = r0;"
+	/* Branch: fall-through (path A) verified first */
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_scalar%=;"
+	/* Path A: store map value ptr at SPI 0 */
+	"*(u64 *)(r10 - 8) = r6;"
+	"goto l_merge%=;"
+"l_scalar%=:"
+	/* Path B: store scalar at SPI 0 */
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+	/*
+	 * Spill PTR_TO_STACK{off=-8} to SPI 1, then reload.
+	 * Reload kills FP-derived tracking, hiding the
+	 * subsequent SPI 0 access from the static analysis.
+	 */
+	"r1 = r10;"
+	"r1 += -8;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"goto +0;"			/* checkpoint */
+	"goto +0;"			/* checkpoint */
+	"goto +0;"			/* checkpoint */
+	"r1 = *(u64 *)(r10 - 16);"
+	/* Read SPI 0 through reloaded pointer */
+	"r0 = *(u64 *)(r1 + 0);"
+	/* Dereference: safe for map value (path A),
+	 * unsafe for scalar (path B).
+	 */
+	"r0 = *(u64 *)(r0 + 0);"
+	"exit;"
+"l_exit%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+/* === Tests for 4-byte stack slot liveness granularity === */
+
+/* Test that a 4-byte aligned write is stack_def and kills liveness.
+ *
+ *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1 (full SPI 0)
+ *   1: *(u32 *)(r10 - 8) = 0      def slot 1 (4-byte write kills slot 1)
+ *   2: r0 = *(u64 *)(r10 - 8)     use slots 0,1
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 1, the 4-byte write defines slot 1. Slot 0 still flows
+ * backward from insn 2's read: live_stack_before = 0x1.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
+__naked void four_byte_write_kills_slot(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u32 *)(r10 - 8) = 0;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that a write to the upper half of an SPI is dead when only
+ * the lower half is read. This was impossible at SPI granularity
+ * where any read of the SPI kept the entire SPI live.
+ *
+ *   0: *(u32 *)(r10 - 8) = 0      def slot 1 (DEAD: never read)
+ *   1: *(u32 *)(r10 - 4) = 0      def slot 0
+ *   2: r0 = *(u32 *)(r10 - 4)     use slot 0 only
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 0, nothing is live (0x0). Previously at SPI granularity,
+ * the read at insn 2 would mark the full SPI 0 as live and the
+ * 4-byte writes wouldn't count as def, so insn 0 would have had
+ * SPI 0 live (0x1).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
+__msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
+__naked void dead_half_spi_write(void)
+{
+	asm volatile (
+	"*(u32 *)(r10 - 8) = 0;"
+	"*(u32 *)(r10 - 4) = 0;"
+	"r0 = *(u32 *)(r10 - 4);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that a 4-byte read from the upper half of SPI 0 makes only
+ * slot 1 live (0x2), not the full SPI (0x3).
+ *
+ *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
+ *   1: r0 = *(u32 *)(r10 - 8)     use slot 1 only (upper half)
+ *   2: r0 = 0
+ *   3: exit
+ *
+ * At insn 1, live_stack_before = 0x2 (slot 1 only).
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (61) r0 = *(u32 *)(r10 -8)        ; use: fp0-8h")
+__naked void four_byte_read_upper_half(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"r0 = *(u32 *)(r10 - 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that a 2-byte write does NOT count as stack_def.
+ * Sub-4-byte writes don't fully cover a 4-byte slot,
+ * so liveness passes through.
+ *
+ *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
+ *   1: *(u16 *)(r10 - 4) = 0      NOT stack_def (2 < 4 bytes)
+ *   2: r0 = *(u32 *)(r10 - 4)     use slot 0
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 1, slot 0 still live (0x1) because 2-byte write
+ * didn't kill it.
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
+__msg("1: (6a) *(u16 *)(r10 -4) = 0{{$}}")
+__msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
+__naked void two_byte_write_no_kill(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u16 *)(r10 - 4) = 0;"
+	"r0 = *(u32 *)(r10 - 4);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that a 1-byte write does NOT count as stack_def.
+ *
+ *   0: *(u64 *)(r10 - 8) = 0      def slots 0,1
+ *   1: *(u8 *)(r10 - 4) = 0       NOT stack_def (1 < 4 bytes)
+ *   2: r0 = *(u32 *)(r10 - 4)     use slot 0
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 1, slot 0 still live (0x1).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
+__msg("1: (72) *(u8 *)(r10 -4) = 0")
+__msg("2: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
+__naked void one_byte_write_no_kill(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u8 *)(r10 - 4) = 0;"
+	"r0 = *(u32 *)(r10 - 4);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test stack access beyond fp-256 exercising the second bitmask word.
+ * fp-264 is SPI 32, slots 64-65, which are bits 0-1 of live_stack[1].
+ *
+ *   0: *(u64 *)(r10 - 264) = 0     def slots 64,65
+ *   1: r0 = *(u64 *)(r10 - 264)    use slots 64,65
+ *   2: r0 = 0
+ *   3: exit
+ *
+ * At insn 1, live_stack high word has bits 0,1 set: 0x3:0x0.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (79) r0 = *(u64 *)(r10 -264)      ; use: fp0-264")
+__naked void high_stack_second_bitmask_word(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 264) = 0;"
+	"r0 = *(u64 *)(r10 - 264);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that two separate 4-byte writes to each half of an SPI
+ * together kill liveness for the full SPI.
+ *
+ *   0: *(u32 *)(r10 - 8) = 0      def slot 1 (upper half)
+ *   1: *(u32 *)(r10 - 4) = 0      def slot 0 (lower half)
+ *   2: r0 = *(u64 *)(r10 - 8)     use slots 0,1
+ *   3: r0 = 0
+ *   4: exit
+ *
+ * At insn 0: live_stack_before = 0x0 (both slots killed by insns 0,1).
+ * At insn 1: live_stack_before = 0x2 (slot 1 still live, slot 0 killed here).
+ */
+SEC("socket")
+__log_level(2)
+__msg("0: (62) *(u32 *)(r10 -8) = 0         ; def: fp0-8h")
+__msg("1: (62) *(u32 *)(r10 -4) = 0         ; def: fp0-4h")
+__naked void two_four_byte_writes_kill_full_spi(void)
+{
+	asm volatile (
+	"*(u32 *)(r10 - 8) = 0;"
+	"*(u32 *)(r10 - 4) = 0;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Test that 4-byte writes on both branches kill a slot at the
+ * join point. Previously at SPI granularity, a 4-byte write was
+ * not stack_def, so liveness would flow backward through the
+ * branch that only had a 4-byte write.
+ *
+ *   0: call bpf_get_prandom_u32
+ *   1: if r0 != 0 goto 1f
+ *   2: *(u64 *)(r10 - 8) = 0       path A: def slots 0,1
+ *   3: goto 2f
+ * 1:4: *(u32 *)(r10 - 4) = 0       path B: def slot 0
+ * 2:5: r0 = *(u32 *)(r10 - 4)      use slot 0
+ *   6: r0 = 0
+ *   7: exit
+ *
+ * Both paths define slot 0 before the read. At insn 1 (branch),
+ * live_stack_before = 0x0 because slot 0 is killed on both paths.
+ */
+SEC("socket")
+__log_level(2)
+__msg("1: (55) if r0 != 0x0 goto pc+2")
+__msg("2: (7a) *(u64 *)(r10 -8) = 0         ; def: fp0-8")
+__msg("3: (05) goto pc+1")
+__msg("4: (62) *(u32 *)(r10 -4) = 0         ; def: fp0-4h")
+__msg("5: (61) r0 = *(u32 *)(r10 -4)        ; use: fp0-4h")
+__naked void both_branches_kill_slot(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto 1f;"
+	"*(u64 *)(r10 - 8) = 0;"
+	"goto 2f;"
+"1:"
+	"*(u32 *)(r10 - 4) = 0;"
+"2:"
+	"r0 = *(u32 *)(r10 - 4);"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* Soundness: cleaning the dead upper half of an SPI must not
+ * affect the live lower half's type information for pruning.
+ *
+ * Both halves of SPI 0 are written separately. Only the lower
+ * half (slot 0) is used as a 4-byte map key. The upper half
+ * (slot 1) is dead and cleaned to STACK_INVALID.
+ *
+ * Path A: key stays 0 (STACK_ZERO) → non-null array lookup
+ * Path B: key byte turns STACK_MISC → may-null array lookup
+ * Deref without null check: safe for A, unsafe for B.
+ *
+ * If half-SPI cleaning incorrectly corrupted the live half's
+ * type info, path A's cached state could generalize and unsoundly
+ * prune path B.
+ *
+ * Expected: reject (path B unsafe).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void half_spi_clean_preserves_stack_zero(void)
+{
+	asm volatile (
+	"*(u32 *)(r10 - 4) = 0;"           /* slot 0: STACK_ZERO */
+	"*(u32 *)(r10 - 8) = 0;"           /* slot 1: STACK_ZERO (dead) */
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_nonconst%=;"
+	"goto l_lookup%=;"
+"l_nonconst%=:"
+	"*(u8 *)(r10 - 4) = r0;"           /* slot 0: STACK_MISC */
+"l_lookup%=:"
+	"r2 = r10;"
+	"r2 += -4;"
+	"r1 = %[array_map_8b] ll;"
+	"call %[bpf_map_lookup_elem];"
+	"r0 = *(u64 *)(r0 + 0);"           /* unsafe if null */
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_map_lookup_elem),
+	  __imm_addr(array_map_8b)
+	: __clobber_all);
+}
+
+/*
+ * Model of scx_lavd's pick_idle_cpu_at_cpdom iat block:
+ * conditional block with helper call and temporary stack spill,
+ * spill dead after merge.
+ *
+ * Path A (fall-through): spill r6 to fp-8 across helper call
+ * Path B (branch taken): skip the block entirely
+ * At merge (insn 6): fp-8 is dead (never read after merge)
+ *
+ * Static liveness marks fp-8 dead at merge. clean_verifier_state()
+ * converts path A's STACK_SPILL to STACK_INVALID. Path B has
+ * STACK_INVALID. stacksafe() matches -> path B pruned -> "6: safe".
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__success
+__log_level(2)
+__msg("6: safe")
+__naked void dead_spill_at_merge_enables_pruning(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"r6 = 7;"
+	"if r0 != 0 goto l_skip%=;"
+	/* conditional block: spill, call, reload */
+	"*(u64 *)(r10 - 8) = r6;"
+	"call %[bpf_get_prandom_u32];"
+	"r6 = *(u64 *)(r10 - 8);"
+"l_skip%=:"
+	/* fp-8 dead. Path B pruned here -> "6: safe" */
+	"r0 = r6;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * FP-offset tracking loses precision on second ADD, killing all liveness.
+ *
+ * fp_off_insn_xfer() handles "FP itself + negative imm" precisely
+ * (e.g. r6 = r10; r6 += -24 -> slot 5).  But any subsequent ADD/SUB
+ * on a register that already has non-zero spis falls through to
+ * spis_set_all(), because the code only handles the FP-itself case.
+ *
+ * A write through this imprecise register enters the non-zero-spis
+ * branch of set_indirect_stack_access(), which OR's the all-ones
+ * mask into stack_def.  The backward liveness equation
+ *
+ *   stack_in = (stack_out & ~stack_def) | stack_use
+ *
+ * sees ~ALL = 0, killing ALL slot liveness at that instruction.
+ *
+ * At the merge pruning point, live_stack_before is empty.
+ * clean_verifier_state() marks fp-8 as STACK_INVALID.
+ * stacksafe() skips STACK_INVALID (line "continue"), so pruning
+ * succeeds regardless of the current state's fp-8 value.
+ * Path B is pruned, its null deref is never explored.
+ *
+ * Correct behavior: reject (path B dereferences NULL).
+ * Bug behavior: accept (path B pruned away).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R1 invalid mem access 'scalar'")
+__naked void fp_add_loses_precision_kills_liveness(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_pathB%=;"
+
+	/* Path A (fall-through, explored first): fp-8 = 0 */
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"goto l_merge%=;"
+
+"l_pathB%=:"
+	/* Path B (explored second): fp-8 = 42 */
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+
+"l_merge%=:"
+	/*
+	 * Create imprecise FP-derived register.
+	 * r6 = r10 - 24 gets precise slot 5.
+	 * r6 += 8 hits the else branch (spis non-zero, delta > 0)
+	 * and sets spis to ALL.  r6 is actually r10-16.
+	 */
+	"r6 = r10;"
+	"r6 += -24;"
+	"r6 += 8;"
+
+	/*
+	 * Write through imprecise r6.  Actually writes to fp-16
+	 * (does NOT touch fp-8), but liveness marks ALL slots
+	 * as stack_def, killing fp-8's liveness.
+	 */
+	"r7 = 0;"
+	"*(u64 *)(r6 + 0) = r7;"
+
+	/* Read fp-8: liveness says dead, but value is needed. */
+	"r2 = *(u64 *)(r10 - 8);"
+	"if r2 == 42 goto l_danger%=;"
+
+	/* r2 != 42 (path A: r2 == 0): safe exit */
+	"r0 = 0;"
+	"exit;"
+
+"l_danger%=:"
+	/* Only reachable from path B (r2 == 42): null deref */
+	"r1 = 0;"
+	"r0 = *(u64 *)(r1 + 0);"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R1 invalid mem access 'scalar'")
+__naked void fp_spill_loses_precision_kills_liveness(void)
+{
+	asm volatile (
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_pathB%=;"
+
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"goto l_merge%=;"
+
+"l_pathB%=:"
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+
+"l_merge%=:"
+	"r6 = r10;"
+	"r6 += -64;"
+	"*(u64 *)(r10 - 160) = r6;"
+	"r6 = *(u64 *)(r10 - 160);"
+
+	"r7 = 0;"
+	"*(u64 *)(r6 + 0) = r7;"
+
+	"r2 = *(u64 *)(r10 - 8);"
+	"if r2 == 42 goto l_danger%=;"
+
+	"r0 = *(u64 *)(r10 - 56);"
+	"exit;"
+
+"l_danger%=:"
+	"r1 = 0;"
+	"r0 = *(u64 *)(r1 + 0);"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/* === Tests for frame-based AT_FP tracking === */
+
+/*
+ * Test 1: conditional_stx_in_subprog
+ * Subprog conditionally writes caller's slot.
+ * Verify slot stays live (backward pass handles conditional def via CFG).
+ *
+ * Main writes fp-8=42, calls cond_writer(fp-8), reads fp-8.
+ * cond_writer only writes on one path → parent_def only on that path.
+ * The backward parent_live correctly keeps fp-8 live at entry
+ * (conditional write doesn't kill liveness at the join).
+ */
+SEC("socket")
+__log_level(2)
+/* fp-8 live at call (callee conditionally writes → slot not killed) */
+__msg("1: (7b) *(u64 *)(r10 -8) = r1        ; def: fp0-8")
+__msg("4: (85) call pc+2{{$}}")
+__msg("5: (79) r0 = *(u64 *)(r10 -8)        ; use: fp0-8")
+__naked void conditional_stx_in_subprog(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call cond_writer;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Conditionally writes to *(r1+0) */
+static __used __naked void cond_writer(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"*(u64 *)(r6 + 0) = r0;"
+	"1:"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("4: (85) call pc+{{.*}}                   ; use: fp0-16")
+__msg("7: (85) call pc+{{.*}}                   ; use: fp0-32")
+__naked void multiple_callsites_different_offsets(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 16) = 0;"
+	"*(u64 *)(r10 - 32) = 0;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"call read_first_param;"
+	"r1 = r10;"
+	"r1 += -32;"
+	"call read_first_param;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Test 3: nested_fp_passthrough
+ * main→A→B, main's FP forwarded to B. B accesses main's stack.
+ * Verify liveness propagates through.
+ *
+ * Main passes fp-32 to outer_forwarder, which passes it to inner_reader.
+ * inner_reader reads at arg+0 (= main's fp-32).
+ * parent_live propagates transitively: inner→outer→main.
+ */
+SEC("socket")
+__log_level(2)
+/* At call to outer_forwarder: main's fp-32 (slots 6,7) should be live */
+__msg("6: (85) call pc+{{.*}}                   ; use: fp0-32")
+__naked void nested_fp_passthrough(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u64 *)(r10 - 16) = 0;"
+	"*(u64 *)(r10 - 24) = 0;"
+	"*(u64 *)(r10 - 32) = 0;"
+	"r1 = r10;"
+	"r1 += -32;"
+	"call outer_forwarder;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Forwards arg to inner_reader */
+static __used __naked void outer_forwarder(void)
+{
+	asm volatile (
+	"call inner_reader;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void inner_reader(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Test 4: callee_must_write_before_read
+ * Callee unconditionally writes parent slot before reading.
+ * Verify slot is NOT live at call site (parent_def kills it).
+ */
+SEC("socket")
+__log_level(2)
+/* fp-8 NOT live at call: callee writes before reading (parent_def kills it) */
+__msg("2: .12345.... (85) call pc+")
+__naked void callee_must_write_before_read(void)
+{
+	asm volatile (
+	"r1 = r10;"
+	"r1 += -8;"
+	"call write_then_read;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Unconditionally writes *(r1+0), then reads it back */
+static __used __naked void write_then_read(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"r7 = 99;"
+	"*(u64 *)(r6 + 0) = r7;"
+	"r0 = *(u64 *)(r6 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Test 5: return_site_liveness_bleeding
+ * Main calls subprog twice. Slot used after one call but not the other.
+ * Context-insensitive: slot conservatively live at both.
+ *
+ * After first call: read fp-8.
+ * After second call: don't read fp-8.
+ * Since parent_live is per-subprog (not per call-site),
+ * fp-8 is live at both call sites.
+ */
+SEC("socket")
+__log_level(2)
+/* Both calls have fp-8 live due to context-insensitive parent_live */
+__msg("3: (85) call pc+{{.*}}                   ; use: fp0-8")
+__msg("7: (85) call pc+{{.*}}                   ; use: fp0-8")
+__naked void return_site_liveness_bleeding(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call read_first_param;"
+	"r0 = *(u64 *)(r10 - 8);"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call read_first_param;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("9: (85) call bpf_loop#181            ; use: fp0-16")
+__naked void callback_conditional_read_beyond_ctx(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = 2;"
+	"r2 = cb_cond_read ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_loop)
+	: __clobber_all);
+}
+
+/* Callback conditionally reads *(ctx - 8) = caller fp-16 */
+static __used __naked void cb_cond_read(void)
+{
+	asm volatile (
+	"r6 = r2;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"r0 = *(u64 *)(r6 - 8);"
+	"1:"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__msg("14: (7b) *(u64 *)(r6 -8) = r7         ; def: fp0-16")
+__msg("15: (79) r0 = *(u64 *)(r6 -8)         ; use: fp0-16")
+__naked void callback_write_before_read_kills(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = 2;"
+	"r2 = cb_write_read ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_loop)
+	: __clobber_all);
+}
+
+/* Callback unconditionally writes *(ctx-8), then reads it back.
+ * The write (parent_def) kills liveness before entry.
+ */
+static __used __naked void cb_write_read(void)
+{
+	asm volatile (
+	"r6 = r2;"
+	"r7 = 99;"
+	"*(u64 *)(r6 - 8) = r7;"
+	"r0 = *(u64 *)(r6 - 8);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * bpf_loop callback conditionally writes fp-16 then unconditionally
+ * reads it. The conditional write does NOT kill liveness
+ */
+SEC("socket")
+__log_level(2)
+__msg("9: (85) call bpf_loop#181            ; use: fp0-16")
+__naked void callback_conditional_write_preserves(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = 2;"
+	"r2 = cb_cond_write_read ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_loop)
+	: __clobber_all);
+}
+
+static __used __naked void cb_cond_write_read(void)
+{
+	asm volatile (
+	"r6 = r2;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"*(u64 *)(r6 - 8) = r0;"
+	"1:"
+	"r0 = *(u64 *)(r6 - 8);"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * Two bpf_loop calls with the same callback but different ctx pointers.
+ *
+ * First call: ctx=fp-8, second call: ctx=fp-24.
+ */
+SEC("socket")
+__log_level(2)
+__msg(" 8: (85) call bpf_loop{{.*}}            ; use: fp0-8")
+__msg("15: (85) call bpf_loop{{.*}}            ; use: fp0-24")
+__naked void callback_two_calls_different_ctx(void)
+{
+	asm volatile (
+	"*(u64 *)(r10 - 8) = 0;"
+	"*(u64 *)(r10 - 24) = 0;"
+	"r1 = 1;"
+	"r2 = cb_read_ctx ll;"
+	"r3 = r10;"
+	"r3 += -8;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r1 = 1;"
+	"r2 = cb_read_ctx ll;"
+	"r3 = r10;"
+	"r3 += -24;"
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_loop)
+	: __clobber_all);
+}
+
+/* Callback reads at ctx+0 unconditionally */
+static __used __naked void cb_read_ctx(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r2 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning in refined_caller_live_stack().
+ *
+ * Three-level call chain: main → mid_fwd → grandchild_deref.
+ * Main passes &fp-8 to mid_fwd, which forwards R1 to grandchild_deref.
+ * grandchild_deref reads main's fp-8 through the forwarded pointer
+ * and dereferences the result.
+ *
+ * refined_caller_live_stack() has a callee_offset++ when mid_fwd
+ * (frame 1) is mid-call. This drops the transitive parent_live
+ * contribution at mid_fwd's call instruction — the only place
+ * where grandchild_deref's read of main's fp-8 is recorded.
+ * As a result, main's fp-8 is cleaned to STACK_INVALID at the
+ * pruning point inside grandchild_deref, and path B is
+ * incorrectly pruned against path A.
+ *
+ * Path A: main stores PTR_TO_MAP_VALUE at fp-8
+ * Path B: main stores scalar 42 at fp-8
+ *
+ * Correct behavior: reject (path B dereferences scalar)
+ * Bug behavior: accept (path B pruned against cleaned path A)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void transitive_parent_stack_read_unsound(void)
+{
+	asm volatile (
+	/* Map lookup to get PTR_TO_MAP_VALUE */
+	"r1 = %[map] ll;"
+	"*(u32 *)(r10 - 32) = 0;"
+	"r2 = r10;"
+	"r2 += -32;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l_exit%=;"
+	"r6 = r0;"
+	/* Branch: path A (fall-through) explored first */
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_scalar%=;"
+	/* Path A: fp-8 = PTR_TO_MAP_VALUE */
+	"*(u64 *)(r10 - 8) = r6;"
+	"goto l_merge%=;"
+"l_scalar%=:"
+	/* Path B: fp-8 = scalar 42 */
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+	/* Pass &fp-8 to mid_fwd → grandchild_deref */
+	"r1 = r10;"
+	"r1 += -8;"
+	"call mid_fwd;"
+	"r0 = 0;"
+	"exit;"
+"l_exit%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+/* Forwards R1 (ptr to main's fp-8) to grandchild_deref */
+static __used __naked void mid_fwd(void)
+{
+	asm volatile (
+	"call grandchild_deref;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Reads main's fp-8 through forwarded pointer, dereferences result */
+static __used __naked void grandchild_deref(void)
+{
+	asm volatile (
+	"goto +0;"				/* checkpoint */
+	"goto +0;"				/* checkpoint */
+	/* read main's fp-8: map_ptr (path A) or scalar (path B) */
+	"r0 = *(u64 *)(r1 + 0);"
+	/* dereference: safe for map_ptr, unsafe for scalar */
+	"r0 = *(u64 *)(r0 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__success
+__msg("14: (79) r1 = *(u64 *)(r10 -8) // r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("15: (79) r0 = *(u64 *)(r1 +0) // r1=fp1-16 r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("stack use/def subprog#1 mid_two_fp_threshold (d1,cs2):")
+__msg("14: (79) r1 = *(u64 *)(r10 -8)        ; use: fp1-8")
+__msg("15: (79) r0 = *(u64 *)(r1 +0)         ; use: fp1-16")
+__naked void two_fp_clear_stack_threshold(void)
+{
+	asm volatile (
+	"r1 = r10;"
+	"r1 += -8;"
+	"call mid_two_fp_threshold;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void mid_two_fp_threshold(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"r7 = r10;"
+	"r7 += -16;"
+	"*(u64 *)(r10 - 8) = r7;"
+	"*(u64 *)(r10 - 16) = r6;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"r2 = r6;"
+	"call inner_nop_fptest;"
+	"r1 = *(u64 *)(r10 - 8);"
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void inner_nop_fptest(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__success
+__msg("13: (79) r1 = *(u64 *)(r10 -8) // r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("14: (79) r0 = *(u64 *)(r1 +0) // r1=fp1-16 r6=fp0-8 r7=fp1-16 fp-8=fp1-16 fp-16=fp0-8")
+__msg("stack use/def subprog#1 mid_one_fp_threshold (d1,cs2):")
+__msg("13: (79) r1 = *(u64 *)(r10 -8)        ; use: fp1-8")
+__msg("14: (79) r0 = *(u64 *)(r1 +0)         ; use: fp1-16")
+__naked void one_fp_clear_stack_threshold(void)
+{
+	asm volatile (
+	"r1 = r10;"
+	"r1 += -8;"
+	"call mid_one_fp_threshold;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void mid_one_fp_threshold(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"r7 = r10;"
+	"r7 += -16;"
+	"*(u64 *)(r10 - 8) = r7;"
+	"*(u64 *)(r10 - 16) = r6;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call inner_nop_fptest;"
+	"r1 = *(u64 *)(r10 - 8);"
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Reproducer for unsound pruning when a subprog forwards a parent
+ * stack pointer (AT_PARENT) to a helper with a memory argument.
+ *
+ * set_call_stack_access_at() previously only tracked AT_CURRENT args,
+ * skipping AT_PARENT entirely. This meant helper reads through parent
+ * stack pointers did not set parent_use, letting the slot appear dead
+ * at pruning checkpoints inside the subprog.
+ *
+ * Program shape:
+ *   main:
+ *     *(u32)(fp-4) = 0             key = STACK_ZERO (const 0)
+ *     call bpf_get_prandom_u32
+ *     if r0 != 0 goto clobber      path A (fall-through) first
+ *     goto merge
+ *   clobber:
+ *     *(u8)(fp-4) = r0             path B: key[0] = STACK_MISC
+ *   merge:
+ *     r1 = fp - 4
+ *     call fwd_parent_key_to_helper
+ *     r0 = 0
+ *     exit
+ *
+ *   fwd_parent_key_to_helper(r1 = &caller_fp-4):
+ *     goto +0                      checkpoint
+ *     r2 = r1                      R2 = AT_PARENT ptr to caller fp-4
+ *     r1 = array_map_8b ll         R1 = array map
+ *     call bpf_map_lookup_elem     reads key_size(4) from parent fp-4
+ *     r0 = *(u64 *)(r0 + 0)        deref without null check
+ *     r0 = 0
+ *     exit
+ *
+ * Path A: STACK_ZERO key = const 0 -> array lookup -> PTR_TO_MAP_VALUE
+ *         (non-NULL for in-bounds const key) -> deref OK.
+ * Path B: STACK_MISC key = unknown -> array lookup ->
+ *         PTR_TO_MAP_VALUE_OR_NULL -> deref UNSAFE.
+ *
+ * Bug: AT_PARENT R2 arg to bpf_map_lookup_elem skipped -> parent_use
+ *      not set -> fp-4 cleaned at checkpoint -> STACK_ZERO collapses
+ *      to STACK_INVALID -> path B pruned -> deref never checked.
+ *
+ * Correct verifier behavior: reject (path B deref of map_value_or_null).
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__naked void helper_parent_stack_read_unsound(void)
+{
+	asm volatile (
+	/* key at fp-4: all bytes STACK_ZERO */
+	"*(u32 *)(r10 - 4) = 0;"
+	"call %[bpf_get_prandom_u32];"
+	/* fall-through (path A) explored first */
+	"if r0 != 0 goto l_clobber%=;"
+	/* path A: key stays constant zero */
+	"goto l_merge%=;"
+"l_clobber%=:"
+	/* path B: key[0] becomes STACK_MISC, key no longer const */
+	"*(u8 *)(r10 - 4) = r0;"
+"l_merge%=:"
+	"r1 = r10;"
+	"r1 += -4;"
+	"call fwd_parent_key_to_helper;"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32)
+	: __clobber_all);
+}
+
+/*
+ * Subprog forwards parent stack pointer to bpf_map_lookup_elem as key
+ * on an array map, then dereferences the result without a null check.
+ * R1 = &parent_fp-4 (AT_PARENT in this frame).
+ *
+ * The helper reads key_size(4) bytes from parent stack.  The deref of
+ * R0 reads the map value, NOT parent stack, so record_insn_mem_accesses
+ * does not set parent_use for it.  The ONLY parent stack access is
+ * through the helper's R2 arg.
+ */
+static __used __naked void fwd_parent_key_to_helper(void)
+{
+	asm volatile (
+	"goto +0;"				/* checkpoint */
+	"r2 = r1;"				/* R2 = parent ptr (AT_PARENT) */
+	"r1 = %[array_map_8b] ll;"		/* R1 = array map */
+	"call %[bpf_map_lookup_elem];"		/* reads 4 bytes from parent fp-4 */
+	/* deref without null check: safe for PTR_TO_MAP_VALUE,
+	 * unsafe for PTR_TO_MAP_VALUE_OR_NULL
+	 */
+	"r0 = *(u64 *)(r0 + 0);"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(array_map_8b)
+	: __clobber_all);
+}
+
+/*
+ * Regression for keeping later helper args after a whole-stack fallback
+ * on an earlier local arg.  The first bpf_snprintf() arg is a local
+ * frame-derived pointer with offset-imprecise tracking (`fp1 ?`), which
+ * conservatively marks the whole local stack live.  The fourth arg still
+ * forwards &parent_fp-8 and must contribute nonlocal_use[0]=0:3.
+ */
+SEC("socket")
+__log_level(2)
+__success
+__msg("call bpf_snprintf{{.*}}        ; use: fp1-8..-512 fp0-8")
+__naked void helper_arg_fallback_keeps_scanning(void)
+{
+	asm volatile (
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call helper_snprintf_parent_after_local_fallback;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void helper_snprintf_parent_after_local_fallback(void)
+{
+	asm volatile (
+	"r6 = r1;"				/* save &parent_fp-8 */
+	"call %[bpf_get_prandom_u32];"
+	"r0 &= 8;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"r1 += r0;"				/* local fp, offset-imprecise */
+	"r2 = 8;"
+	"r3 = %[snprintf_u64_fmt] ll;"
+	"r4 = r6;"				/* later arg: parent fp-8 */
+	"r5 = 8;"
+	"call %[bpf_snprintf];"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_get_prandom_u32),
+	  __imm(bpf_snprintf),
+	  __imm_addr(snprintf_u64_fmt)
+	: __clobber_all);
+}
+
+/*
+ * Test that propagate_callee_ancestor() correctly chains ancestor
+ * liveness across sequential calls within a single frame.
+ *
+ * main → mid_seq_touch → {nop_callee, deref_ancestor}
+ *
+ * mid_seq_touch receives two pointers: R1 = &main_fp-8 (forwarded to
+ * deref_ancestor) and R2 = &main_fp-16 (read directly by mid_seq_touch).
+ * The direct read of fp-16 forces ensure_anc_arrays() to allocate
+ * ancestor_live[0] for mid_seq_touch, so refined_caller_live_stack()
+ * uses the refined path (not the conservative fallback).
+ *
+ * mid_seq_touch calls nop_callee first (no-op, creates a pruning point),
+ * then calls deref_ancestor which reads main's fp-8 and dereferences it.
+ *
+ * propagate_callee_ancestor() propagates deref_ancestor's entry
+ * ancestor_live[0] into mid_seq_touch's anc_use[0] at the call-to-deref
+ * instruction.  mid_seq_touch's backward pass flows this backward so
+ * ancestor_live[0] includes fp-8 at the pruning point between the calls.
+ *
+ * Without propagation, mid_seq_touch's ancestor_live[0] only has fp-16
+ * (from the direct read) — fp-8 is missing.  refined_caller_live_stack()
+ * Term 1 says fp-8 is dead, the verifier cleans it, and path B
+ * (scalar 42) is incorrectly pruned against path A (MAP_VALUE).
+ *
+ * Path A: main stores PTR_TO_MAP_VALUE at fp-8  → deref succeeds
+ * Path B: main stores scalar 42 at fp-8         → deref must fail
+ *
+ * Correct: reject (path B dereferences scalar)
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure __msg("R0 invalid mem access 'scalar'")
+__naked void propagate_callee_ancestor_chain(void)
+{
+	asm volatile (
+	/* Map lookup to get PTR_TO_MAP_VALUE */
+	"r1 = %[map] ll;"
+	"*(u32 *)(r10 - 32) = 0;"
+	"r2 = r10;"
+	"r2 += -32;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l_exit%=;"
+	"r6 = r0;"
+	/* Branch: path A (fall-through) explored first */
+	"call %[bpf_get_prandom_u32];"
+	"if r0 != 0 goto l_scalar%=;"
+	/* Path A: fp-8 = PTR_TO_MAP_VALUE */
+	"*(u64 *)(r10 - 8) = r6;"
+	"goto l_merge%=;"
+"l_scalar%=:"
+	/* Path B: fp-8 = scalar 42 */
+	"r1 = 42;"
+	"*(u64 *)(r10 - 8) = r1;"
+"l_merge%=:"
+	/* fp-16 = dummy value (mid_seq_touch reads it directly) */
+	"r1 = 99;"
+	"*(u64 *)(r10 - 16) = r1;"
+	/* R1 = &fp-8 (for deref_ancestor), R2 = &fp-16 (for mid_seq_touch) */
+	"r1 = r10;"
+	"r1 += -8;"
+	"r2 = r10;"
+	"r2 += -16;"
+	"call mid_seq_touch;"
+	"r0 = 0;"
+	"exit;"
+"l_exit%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm(bpf_get_prandom_u32),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+/*
+ * R1 = &main_fp-8 (forwarded to deref_ancestor)
+ * R2 = &main_fp-16 (read directly here → allocates ancestor_live[0])
+ *
+ * Reads main's fp-16 to force ancestor_live[0] allocation, then
+ * calls nop_callee (pruning point), then deref_ancestor.
+ */
+static __used __naked void mid_seq_touch(void)
+{
+	asm volatile (
+	"r6 = r1;"			/* save &main_fp-8 in callee-saved */
+	"r0 = *(u64 *)(r2 + 0);"	/* read main's fp-16: triggers anc_use[0] */
+	"call nop_callee;"		/* no-op, creates pruning point after */
+	"r1 = r6;"			/* restore ptr to &main_fp-8 */
+	"call deref_ancestor;"		/* reads main's fp-8, dereferences */
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void nop_callee(void)
+{
+	asm volatile (
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Reads main's fp-8 through forwarded pointer, dereferences result */
+static __used __naked void deref_ancestor(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"	/* read main's fp-8 */
+	"r0 = *(u64 *)(r0 + 0);"	/* deref: safe for map_ptr, unsafe for scalar */
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Test: callee loads an fp-derived pointer from caller's stack, then
+ * reads through it to access another caller stack slot.
+ *
+ * main stores PTR_TO_MAP_VALUE at fp-24, stores &fp-24 (an fp-derived
+ * pointer) at fp-8, passes &fp-8 through mid_fwd_spilled_ptr to
+ * load_ptr_deref_grandchild.  The leaf loads the pointer from main's
+ * fp-8, then reads main's fp-24 through the loaded pointer.
+ *
+ * fill_from_stack() in arg_track_xfer() only handles local-frame
+ * FP-derived loads (src_is_local_fp check requires frame == depth).
+ * When a callee loads from a parent-frame pointer (frame < depth),
+ * the loaded value gets ARG_NONE instead of being recognized as
+ * fp-derived.  Subsequent reads through that loaded pointer are
+ * invisible to liveness — nonlocal_use is never set for fp-24.
+ *
+ * clean_live_states() cleans the current state at every prune point.
+ * Because liveness misses fp-24, refined_caller_live_stack() tells
+ * __clean_func_state() that fp-24 is dead, which destroys the
+ * PTR_TO_MAP_VALUE spill before the grandchild can read it.
+ * The grandchild then reads STACK_INVALID → scalar, and the deref
+ * is rejected with "R0 invalid mem access 'scalar'" — even though
+ * fp-24 is genuinely live and holds a valid map pointer.
+ *
+ * This is a false positive: a valid program incorrectly rejected.
+ */
+SEC("socket")
+__flag(BPF_F_TEST_STATE_FREQ)
+__success
+__naked void spilled_fp_cross_frame_deref(void)
+{
+	asm volatile (
+	/* Map lookup to get PTR_TO_MAP_VALUE */
+	"r1 = %[map] ll;"
+	"*(u32 *)(r10 - 32) = 0;"
+	"r2 = r10;"
+	"r2 += -32;"
+	"call %[bpf_map_lookup_elem];"
+	"if r0 == 0 goto l_exit%=;"
+	/* fp-24 = PTR_TO_MAP_VALUE */
+	"*(u64 *)(r10 - 24) = r0;"
+	/* Store pointer to fp-24 at fp-8 */
+	"r1 = r10;"
+	"r1 += -24;"
+	"*(u64 *)(r10 - 8) = r1;"
+	/* R1 = &fp-8: pointer to the spilled ptr */
+	"r1 = r10;"
+	"r1 += -8;"
+	"call mid_fwd_spilled_ptr;"
+	"r0 = 0;"
+	"exit;"
+"l_exit%=:"
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+/* Forwards R1 (ptr to main's fp-8, which holds &main_fp-24) to leaf */
+static __used __naked void mid_fwd_spilled_ptr(void)
+{
+	asm volatile (
+	"call load_ptr_deref_grandchild;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * R1 = &main_fp-8 (where main stored ptr to fp-24)
+ * Loads the ptr from main's fp-8, reads main's fp-24 through it,
+ * then dereferences the result.
+ */
+static __used __naked void load_ptr_deref_grandchild(void)
+{
+	asm volatile (
+	/* Load ptr from main's fp-8 → r2 = &main_fp-24 */
+	"r2 = *(u64 *)(r1 + 0);"
+	/* Read main's fp-24 through loaded ptr */
+	"r0 = *(u64 *)(r2 + 0);"
+	/* Dereference: safe for map_ptr */
+	"r0 = *(u64 *)(r0 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Exercise merge_nonlocal_live().
+ *
+ * merge_shared_mid is analyzed twice (once from each wrapper), so the
+ * callsite within merge_shared_mid that calls merge_leaf_read gets its
+ * nonlocal_live info merged twice via merge_nonlocal_live().
+ */
+SEC("socket")
+__log_level(2)
+__success
+__msg("14: (85) call pc+2	r1: fp0-16")
+__msg("17: (79) r0 = *(u64 *)(r1 +0) // r1=fp0-16")
+__msg("14: (85) call pc+2	r1: fp0-8")
+__msg("17: (79) r0 = *(u64 *)(r1 +0) // r1=fp0-8")
+__msg("5: (85) call pc+{{.*}}                   ; use: fp0-8 fp0-16")
+__naked void test_merge_nonlocal_live(void)
+{
+	asm volatile (
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call merge_wrapper_a;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"call merge_wrapper_b;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void merge_wrapper_a(void)
+{
+	asm volatile (
+	"call merge_shared_mid;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void merge_wrapper_b(void)
+{
+	asm volatile (
+	"call merge_shared_mid;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void merge_shared_mid(void)
+{
+	asm volatile (
+	"call merge_leaf_read;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void merge_leaf_read(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r1 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/* Same bpf_loop instruction calls different callbacks depending on branch. */
+SEC("socket")
+__log_level(2)
+__success
+__msg("call bpf_loop#181            ; use: fp2-8..-512 fp1-8..-512 fp0-8..-512")
+__naked void bpf_loop_two_callbacks(void)
+{
+	asm volatile (
+	"r1 = 0;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	"r1 = r10;"
+	"r1 += -8;"
+	"call dyn_wrapper_a;"
+	"r1 = r10;"
+	"r1 += -16;"
+	"call dyn_wrapper_b;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void dyn_wrapper_a(void)
+{
+	asm volatile (
+	"call mid_dynamic_cb;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void dyn_wrapper_b(void)
+{
+	asm volatile (
+	"call mid_dynamic_cb;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void mid_dynamic_cb(void)
+{
+	asm volatile (
+	"r6 = r1;"
+	"call %[bpf_get_prandom_u32];"
+	"if r0 == 0 goto 1f;"
+	"r2 = dyn_cb_a ll;"
+	"goto 2f;"
+	"1:"
+	"r2 = dyn_cb_b ll;"
+	"2:"
+	"r1 = 1;"
+	"r3 = r6;" /* ctx = fp-derived ptr from parent */
+	"r4 = 0;"
+	"call %[bpf_loop];"
+	"r0 = 0;"
+	"exit;"
+	:: __imm(bpf_get_prandom_u32),
+	   __imm(bpf_loop)
+	: __clobber_all);
+}
+
+/* Callback A/B: read parent stack through ctx */
+static __used __naked void dyn_cb_a(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r2 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void dyn_cb_b(void)
+{
+	asm volatile (
+	"r0 = *(u64 *)(r2 + 0);"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+/*
+ * Path A: r0 = map_lookup result (non-FP, ARG_NONE for stack tracking)
+ * Path B: r0 = fp-8 (FP-derived, frame=0, off=-8)
+ * At the join: r0 is not guaranteed to be a frame pointer.
+ */
+SEC("socket")
+__log_level(2)
+__msg("10: (79) r0 = *(u64 *)(r10 -8) // r0=fp0-8|fp0+0")
+__naked void stack_or_non_stack_write(void)
+{
+	asm volatile (
+	/* initial write to fp-8 */
+	"*(u64 *)(r10 - 8) = 0;"
+	/* map lookup to get a non-FP pointer */
+	"r2 = r10;"
+	"r2 += -4;"
+	"r1 = %[map] ll;"
+	"call %[bpf_map_lookup_elem];"
+	/* r0 = map_value (ARG_NONE) */
+	"if r0 != 0 goto 1f;"
+	/* path B: r0 = fp-8 */
+	"r0 = r10;"
+	"r0 += -8;"
+"1:"
+	/* join: the write is not a def for fp[0]-8 */
+	"*(u64 *)(r0 + 0) = 7;"
+	/* read fp-8: should be non-poisoned */
+	"r0 = *(u64 *)(r10 - 8);"
+	"exit;"
+	:
+	: __imm(bpf_map_lookup_elem),
+	  __imm_addr(map)
+	: __clobber_all);
+}
+
+SEC("socket")
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("subprog#2 write_first_read_second:")
+__msg("17: (7a) *(u64 *)(r1 +0) = 42{{$}}")
+__msg("18: (79) r0 = *(u64 *)(r2 +0) // r1=fp0-8 r2=fp0-16{{$}}")
+__msg("stack use/def subprog#2 write_first_read_second (d2,cs15):")
+__msg("17: (7a) *(u64 *)(r1 +0) = 42{{$}}")
+__msg("18: (79) r0 = *(u64 *)(r2 +0)         ; use: fp0-8 fp0-16")
+__naked void shared_instance_must_write_overwrite(void)
+{
+	asm volatile (
+	"r1 = 1;"
+	"*(u64 *)(r10 - 8) = r1;"
+	"*(u64 *)(r10 - 16) = r1;"
+	/* Call 1: write_first_read_second(&fp[-8], &fp[-16]) */
+	"r1 = r10;"
+	"r1 += -8;"
+	"r2 = r10;"
+	"r2 += -16;"
+	"call forwarding_rw;"
+	/* Call 2: write_first_read_second(&fp[-16], &fp[-8]) */
+	"r1 = r10;"
+	"r1 += -16;"
+	"r2 = r10;"
+	"r2 += -8;"
+	"call forwarding_rw;"
+	"r0 = 0;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void forwarding_rw(void)
+{
+	asm volatile (
+	"call write_first_read_second;"
+	"exit;"
+	::: __clobber_all);
+}
+
+static __used __naked void write_first_read_second(void)
+{
+	asm volatile (
+	"*(u64 *)(r1 + 0) = 42;"
+	"r0 = *(u64 *)(r2 + 0);"
+	"exit;"
+	::: __clobber_all);
+}
+
+#ifdef CAN_USE_LOAD_ACQ_STORE_REL
+
+SEC("socket")
+__log_level(2)
+__success
+__msg("*(u64 *)(r0 +0) = 42         ; def: fp0-16")
+__naked void load_acquire_dont_clear_dst(void)
+{
+	asm volatile (
+	"r0 = r10;"
+	"r0 += -16;"
+	"*(u64 *)(r0 + 0) = r0;"	/* fp[-16] == &fp[-16] */
+	".8byte %[load_acquire_insn];"	/* load_acquire is a special case for BPF_STX, */
+	"r0 = *(u64 *)(r10 - 16);"	/* it shouldn't clear tracking info for */
+	"*(u64 *)(r0 + 0) = 42;"	/* dst register, r0 in this case. */
+	"r0 = 0;"
+	"exit;"
+	:
+	: __imm_insn(load_acquire_insn,
+		     BPF_ATOMIC_OP(BPF_DW, BPF_LOAD_ACQ, BPF_REG_0, BPF_REG_0, 0))
+	: __clobber_all);
+}
+
+#endif /* CAN_USE_LOAD_ACQ_STORE_REL */

-- 
2.53.0

  parent reply	other threads:[~2026-04-10  1:12 UTC|newest]

Thread overview: 23+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-10  1:11 [PATCH bpf-next v2 00/13] bpf: static stack liveness data flow analysis Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 01/13] bpf: share several utility functions as internal API Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 02/13] bpf: save subprogram name in bpf_subprog_info Eduard Zingerman
2026-04-10  1:50   ` bot+bpf-ci
2026-04-10  4:04     ` Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 03/13] bpf: Add spis_*() helpers for 4-byte stack slot bitmasks Eduard Zingerman
2026-04-10  1:50   ` bot+bpf-ci
2026-04-10  4:05     ` Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 04/13] bpf: make liveness.c track stack with 4-byte granularity Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 05/13] bpf: 4-byte precise clean_verifier_state Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 06/13] bpf: prepare bpf_liveness api for use by static analysis pass Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 07/13] bpf: introduce forward arg-tracking dataflow analysis Eduard Zingerman
2026-04-10  2:02   ` bot+bpf-ci
2026-04-10  1:11 ` [PATCH bpf-next v2 08/13] bpf: simplify liveness to use (callsite, depth) keyed func_instances Eduard Zingerman
2026-04-10  2:02   ` bot+bpf-ci
2026-04-10  1:11 ` [PATCH bpf-next v2 09/13] bpf: change logging scheme for live stack analysis Eduard Zingerman
2026-04-10  2:02   ` bot+bpf-ci
2026-04-10  4:06     ` Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 10/13] selftests/bpf: update existing tests due to liveness changes Eduard Zingerman
2026-04-10  1:11 ` [PATCH bpf-next v2 11/13] selftests/bpf: adjust verifier_log buffers Eduard Zingerman
2026-04-10  1:11 ` Eduard Zingerman [this message]
2026-04-10  1:11 ` [PATCH bpf-next v2 13/13] bpf: poison dead stack slots Eduard Zingerman
2026-04-10  2:02   ` bot+bpf-ci

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20260409-patch-set-v2-12-651804512349@gmail.com \
    --to=eddyz87@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@linux.dev \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.