Re: [PATCH bpf-next 01/18] bpf: Support stack arguments for bpf functions

public inbox for bpf@vger.kernel.org
 help / color / mirror / Atom feed

From: Eduard Zingerman <eddyz87@gmail.com>
To: Yonghong Song <yonghong.song@linux.dev>, bpf@vger.kernel.org
Cc: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	 Daniel Borkmann <daniel@iogearbox.net>,
	"Jose E . Marchesi" <jose.marchesi@oracle.com>,
	kernel-team@fb.com,  Martin KaFai Lau <martin.lau@kernel.org>
Subject: Re: [PATCH bpf-next 01/18] bpf: Support stack arguments for bpf functions
Date: Tue, 28 Apr 2026 07:29:57 -0700	[thread overview]
Message-ID: <7a031b0dcbf54e34d6a6571256b1bb65b5617bcc.camel@gmail.com> (raw)
In-Reply-To: <20260424171438.2034741-1-yonghong.song@linux.dev>

On Fri, 2026-04-24 at 10:14 -0700, Yonghong Song wrote:

[...]

I didn't see this in the patch, hence the question: should or should
not this feature be privileged bpf only?

[...]

> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
> index d5b4303315dd..2cc349d7fc17 100644
> --- a/include/linux/bpf_verifier.h
> +++ b/include/linux/bpf_verifier.h

[...]

> @@ -508,6 +512,17 @@ struct bpf_verifier_state {
>  	     iter < frame->allocated_stack / BPF_REG_SIZE;		\
>  	     iter++, reg = bpf_get_spilled_reg(iter, frame, mask))
>  
> +#define bpf_get_spilled_stack_arg(slot, frame, mask)                   \
> +	((((slot) < frame->out_stack_arg_depth / BPF_REG_SIZE) &&           \
> +	  (frame->stack_arg_regs[slot].type != NOT_INIT))               \
> +	 ? &frame->stack_arg_regs[slot] : NULL)

can this be a static inline function?

> +
> +/* Iterate over 'frame', setting 'reg' to either NULL or a spilled stack arg. */
> +#define bpf_for_each_spilled_stack_arg(iter, frame, reg, mask)         \
> +	for (iter = 0, reg = bpf_get_spilled_stack_arg(iter, frame, mask); \
> +	     iter < frame->out_stack_arg_depth / BPF_REG_SIZE;              \
> +	     iter++, reg = bpf_get_spilled_stack_arg(iter, frame, mask))
> +
>  #define bpf_for_each_reg_in_vstate_mask(__vst, __state, __reg, __mask, __expr)   \
>  	({                                                               \
>  		struct bpf_verifier_state *___vstate = __vst;            \
> @@ -525,6 +540,11 @@ struct bpf_verifier_state {
>  					continue;                        \
>  				(void)(__expr);                          \
>  			}                                                \
> +			bpf_for_each_spilled_stack_arg(___j, __state, __reg, __mask) { \
> +				if (!__reg)                              \
> +					continue;                        \
> +				(void)(__expr);                          \
> +			}						 \
>  		}                                                        \
>  	})

Tangential nit: I think this macro is getting a bit too complicated,
we might want to introduce some proper reg_state iterator at some
point, e.g.:

  struct ret_iter it = new_reg_iter(state);
  while ((reg = next_reg(&it))) { ... }


>  
> @@ -739,10 +759,13 @@ struct bpf_subprog_info {
>  	bool keep_fastcall_stack: 1;
>  	bool changes_pkt_data: 1;
>  	bool might_sleep: 1;
> -	u8 arg_cnt:3;
> +	u8 arg_cnt:4;
>  
>  	enum priv_stack_mode priv_stack_mode;
> -	struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
> +	struct bpf_subprog_arg_info args[MAX_BPF_FUNC_ARGS];
> +	u16 incoming_stack_arg_depth;

Can this be inferred from arg_cnt?
Also, the verifier keeps doing '/ BPF_REG_SIZE' on this number,
would it be more convenient to keep it as count?

> +	u16 stack_arg_depth; /* incoming + max outgoing */
> +	u16 max_out_stack_arg_depth;
>  };
>  
>  struct bpf_verifier_env;
> diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
> index 77af44d8a3ad..cfb35a2decf6 100644
> --- a/kernel/bpf/btf.c
> +++ b/kernel/bpf/btf.c
> @@ -7880,13 +7880,19 @@ int btf_prepare_func_args(struct bpf_verifier_env *env, int subprog)
>  	}
>  	args = (const struct btf_param *)(t + 1);
>  	nargs = btf_type_vlen(t);
> -	if (nargs > MAX_BPF_FUNC_REG_ARGS) {
> -		if (!is_global)
> -			return -EINVAL;
> -		bpf_log(log, "Global function %s() with %d > %d args. Buggy compiler.\n",
> +	if (nargs > MAX_BPF_FUNC_ARGS) {
> +		bpf_log(log, "Function %s() with %d > %d args not supported.\n",
> +			tname, nargs, MAX_BPF_FUNC_ARGS);

Nit: I'd report it as "kernel supports at-most %d parameters for regular functions, while function %s is declared to accept %d parameters"
     just to make the rules a bit more explicit.

> +		return -EINVAL;
> +	}

[...]

> @@ -1378,9 +1382,21 @@ int bpf_fixup_call_args(struct bpf_verifier_env *env)
>  	struct bpf_prog *prog = env->prog;
>  	struct bpf_insn *insn = prog->insnsi;
>  	bool has_kfunc_call = bpf_prog_has_kfunc_call(prog);
> -	int i, depth;
> +	int depth;
>  #endif
> -	int err = 0;
> +	int i, err = 0;
> +
> +	for (i = 0; i < env->subprog_cnt; i++) {
> +		struct bpf_subprog_info *subprog = &env->subprog_info[i];
> +		u16 outgoing = subprog->stack_arg_depth - subprog->incoming_stack_arg_depth;
> +
> +		if (subprog->max_out_stack_arg_depth > outgoing) {
> +			verbose(env,
> +				"func#%d writes stack arg slot at depth %u, but calls only require %u bytes\n",
> +				i, subprog->max_out_stack_arg_depth, outgoing);
> +			return -EINVAL;

Is this an internal error condition?
If it is, maybe use verifier_bug()?

> +		}
> +	}
>  
>  	if (env->prog->jit_requested &&
>  	    !bpf_prog_is_offloaded(env->prog->aux)) {
> diff --git a/kernel/bpf/states.c b/kernel/bpf/states.c
> index 8478d2c6ed5b..3e59d1c3a726 100644
> --- a/kernel/bpf/states.c
> +++ b/kernel/bpf/states.c
> @@ -838,6 +838,34 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
>  	return true;
>  }
>  
> +/*
> + * Compare stack arg slots between old and current states.
> + * Outgoing stack args are path-local state and must agree for pruning.
> + */
> +static bool stack_arg_safe(struct bpf_verifier_env *env, struct bpf_func_state *old,
> +			   struct bpf_func_state *cur, struct bpf_idmap *idmap,
> +			   enum exact_level exact)
> +{
> +	int i, nslots;
> +
> +	nslots = min(old->out_stack_arg_depth, cur->out_stack_arg_depth) / BPF_REG_SIZE;

this is not safe, e.g. it will accept cur with one argument as
equivalent for old with two arguments.

> +	for (i = 0; i < nslots; i++) {
> +		struct bpf_reg_state *old_arg = &old->stack_arg_regs[i];
> +		struct bpf_reg_state *cur_arg = &cur->stack_arg_regs[i];
> +
> +		if (old_arg->type == NOT_INIT && cur_arg->type == NOT_INIT)
> +			continue;
> +
> +		if (exact == EXACT && old_arg->type != cur_arg->type)
> +			return false;
> +
> +		if (!regsafe(env, old_arg, cur_arg, idmap, exact))
> +			return false;
> +	}

regsafe() seem handles NOT_INIT and EXACT in the same way,
I don't think there is a necessity to do the handling explicitly here.

> +
> +	return true;
> +}
> +

[...]

> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index ff6ff1c27517..bcf81692a22b 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -1361,6 +1361,18 @@ static int copy_stack_state(struct bpf_func_state *dst, const struct bpf_func_st
>  		return -ENOMEM;
>  
>  	dst->allocated_stack = src->allocated_stack;
> +
> +	/* copy stack args state */
> +	n = src->out_stack_arg_depth / BPF_REG_SIZE;
> +	if (n) {
> +		dst->stack_arg_regs = copy_array(dst->stack_arg_regs, src->stack_arg_regs, n,
> +						 sizeof(struct bpf_reg_state),
> +						 GFP_KERNEL_ACCOUNT);
> +		if (!dst->stack_arg_regs)
> +			return -ENOMEM;
> +	}
> +
> +	dst->out_stack_arg_depth = src->out_stack_arg_depth;

Given that this is capped by 12, does it make sense to maintain the counter?
It might be simpler to always allocate an array of 12 elements.

>  	return 0;
>  }

[...]

> @@ -4417,6 +4446,109 @@ static int check_stack_write(struct bpf_verifier_env *env,
>  	return err;
>  }
>  
> +/*
> + * Write a value to the outgoing stack arg area.
> + * off is a negative offset from r11 (e.g. -8 for arg6, -16 for arg7).
> + */
> +static int check_stack_arg_write(struct bpf_verifier_env *env, struct bpf_func_state *state,
> +				 int off, int value_regno)
> +{

Nit: Maybe replace value_regno with pointer to a register state?
     Just for consistency.

> +	int max_stack_arg_regs = MAX_BPF_FUNC_ARGS - MAX_BPF_FUNC_REG_ARGS;
> +	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
> +	int spi = -off / BPF_REG_SIZE - 1;
> +	struct bpf_func_state *cur;
> +	struct bpf_reg_state *arg;
> +	int err;
> +
> +	if (spi >= max_stack_arg_regs) {
> +		verbose(env, "stack arg write offset %d exceeds max %d stack args\n",
> +			off, max_stack_arg_regs);
> +		return -EINVAL;
> +	}
> +
> +	err = grow_stack_arg_slots(env, state, -off);
> +	if (err)
> +		return err;
> +
> +	/* Track the max outgoing stack arg access depth. */
> +	if (-off > subprog->max_out_stack_arg_depth)
> +		subprog->max_out_stack_arg_depth = -off;
> +
> +	cur = env->cur_state->frame[env->cur_state->curframe];
> +	if (value_regno >= 0) {
> +		state->stack_arg_regs[spi] = cur->regs[value_regno];

Nit: there is copy_register_state(), we should either use it here or
drop it and replace with direct assignments everywhere.

> +	} else {
> +		/* BPF_ST: store immediate, treat as scalar */
> +		arg = &state->stack_arg_regs[spi];
> +		arg->type = SCALAR_VALUE;
> +		__mark_reg_known(arg, env->prog->insnsi[env->insn_idx].imm);
> +	}
> +	state->no_stack_arg_load = true;
> +	return 0;
> +}
> +
> +/*
> + * Read a value from the incoming stack arg area.
> + * off is a positive offset from r11 (e.g. +8 for arg6, +16 for arg7).
> + */
> +static int check_stack_arg_read(struct bpf_verifier_env *env, struct bpf_func_state *state,
> +				int off, int dst_regno)
> +{
> +	struct bpf_subprog_info *subprog = &env->subprog_info[state->subprogno];
> +	struct bpf_verifier_state *vstate = env->cur_state;
> +	int spi = off / BPF_REG_SIZE - 1;
> +	struct bpf_func_state *caller, *cur;
> +	struct bpf_reg_state *arg;
> +
> +	if (state->no_stack_arg_load) {
> +		verbose(env, "r11 load must be before any r11 store or call insn\n");
> +		return -EINVAL;
> +	}

I think the error message should be inverted, store should precede the load.
But tbh, I'd drop it altogether, the check right below should be sufficient.

> +
> +	if (off > subprog->incoming_stack_arg_depth) {
> +		verbose(env, "invalid read from stack arg off %d depth %d\n",
> +			off, subprog->incoming_stack_arg_depth);
> +		return -EACCES;
> +	}
> +
> +	caller = vstate->frame[vstate->curframe - 1];
> +	arg = &caller->stack_arg_regs[spi];
> +	cur = vstate->frame[vstate->curframe];
> +
> +	if (is_spillable_regtype(arg->type))
> +		copy_register_state(&cur->regs[dst_regno], arg);
> +	else
> +		mark_reg_unknown(env, cur->regs, dst_regno);

For stack writes we report error in such situations,
should the same be done here?

> +	return 0;
> +}
> +
> +static int check_outgoing_stack_args(struct bpf_verifier_env *env, struct bpf_func_state *caller,
> +				     int nargs)
> +{
> +	int i, spi;
> +
> +	for (i = MAX_BPF_FUNC_REG_ARGS; i < nargs; i++) {
> +		spi = i - MAX_BPF_FUNC_REG_ARGS;
> +		if (spi >= (caller->out_stack_arg_depth / BPF_REG_SIZE) ||
> +		    caller->stack_arg_regs[spi].type == NOT_INIT) {
> +			verbose(env, "stack %s not properly initialized\n",
> +				reg_arg_name(env, argno_from_arg(i + 1)));

Nit: error message is a bit confusing, I'd change it to better reflect the rules, e.g.:
     "function %s expects %d arguments, stack argument %d is not initialized".

> +			return -EFAULT;
> +		}
> +	}
> +
> +	return 0;
> +}

[...]

next prev parent reply	other threads:[~2026-04-28 14:30 UTC|newest]

Thread overview: 50+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-24 17:14 [PATCH bpf-next 00/18] bpf: Support stack arguments for BPF functions and kfuncs Yonghong Song
2026-04-24 17:14 ` [PATCH bpf-next 01/18] bpf: Support stack arguments for bpf functions Yonghong Song
2026-04-24 18:13   ` bot+bpf-ci
2026-04-25  5:09     ` Yonghong Song
2026-04-27 20:40       ` Yonghong Song
2026-04-28 14:29   ` Eduard Zingerman [this message]
2026-04-28 16:47     ` Yonghong Song
2026-04-28 23:50       ` Yonghong Song
2026-04-29  0:28       ` Eduard Zingerman
2026-04-24 17:14 ` [PATCH bpf-next 02/18] bpf: Add precision marking and backtracking for stack argument slots Yonghong Song
2026-04-24 18:00   ` bot+bpf-ci
2026-04-25  5:10     ` Yonghong Song
2026-04-28 16:46   ` Eduard Zingerman
2026-04-28 20:54     ` Yonghong Song
2026-04-24 17:14 ` [PATCH bpf-next 03/18] bpf: Refactor record_call_access() to extract per-arg logic Yonghong Song
2026-04-29  0:51   ` Eduard Zingerman
2026-04-24 17:14 ` [PATCH bpf-next 04/18] bpf: Extend liveness analysis to track stack argument slots Yonghong Song
2026-04-24 18:00   ` bot+bpf-ci
2026-04-25  5:11     ` Yonghong Song
2026-04-29 12:22   ` Eduard Zingerman
2026-04-24 17:14 ` [PATCH bpf-next 05/18] bpf: Reject stack arguments in non-JITed programs Yonghong Song
2026-04-24 18:00   ` bot+bpf-ci
2026-04-29 12:27   ` Eduard Zingerman
2026-04-24 17:15 ` [PATCH bpf-next 06/18] bpf: Prepare architecture JIT support for stack arguments Yonghong Song
2026-04-24 17:48   ` bot+bpf-ci
2026-04-25  5:17     ` Yonghong Song
2026-04-29 12:37   ` Eduard Zingerman
2026-04-24 17:15 ` [PATCH bpf-next 07/18] bpf: Enable r11 based insns Yonghong Song
2026-04-29 12:48   ` Eduard Zingerman
2026-04-24 17:15 ` [PATCH bpf-next 08/18] bpf: Support stack arguments for kfunc calls Yonghong Song
2026-04-24 18:00   ` bot+bpf-ci
2026-04-25  5:19     ` Yonghong Song
2026-04-24 17:15 ` [PATCH bpf-next 09/18] bpf: Reject stack arguments if tail call reachable Yonghong Song
2026-04-24 18:00   ` bot+bpf-ci
2026-04-24 17:15 ` [PATCH bpf-next 10/18] bpf,x86: Implement JIT support for stack arguments Yonghong Song
2026-04-24 18:00   ` bot+bpf-ci
2026-04-25  5:29     ` Yonghong Song
2026-04-24 17:16 ` [PATCH bpf-next 11/18] selftests/bpf: Add tests for BPF function " Yonghong Song
2026-04-24 17:16 ` [PATCH bpf-next 12/18] selftests/bpf: Add tests for stack argument validation Yonghong Song
2026-04-24 17:17 ` [PATCH bpf-next 13/18] selftests/bpf: Add verifier " Yonghong Song
2026-04-24 17:48   ` bot+bpf-ci
2026-04-25  5:33     ` Yonghong Song
2026-04-24 17:17 ` [PATCH bpf-next 14/18] selftests/bpf: Add BTF fixup for __naked subprog parameter names Yonghong Song
2026-04-24 17:17 ` [PATCH bpf-next 15/18] selftests/bpf: Add precision backtracking test for stack arguments Yonghong Song
2026-04-24 17:17 ` [PATCH bpf-next 16/18] bpf, arm64: Map BPF_REG_0 to x8 instead of x7 Yonghong Song
2026-04-24 17:17 ` [PATCH bpf-next 17/18] bpf, arm64: Add JIT support for stack arguments Yonghong Song
2026-04-24 18:00   ` bot+bpf-ci
2026-04-27  9:06     ` Puranjay Mohan
2026-04-27 20:42       ` Yonghong Song
2026-04-24 17:17 ` [PATCH bpf-next 18/18] selftests/bpf: Enable stack argument tests for arm64 Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=7a031b0dcbf54e34d6a6571256b1bb65b5617bcc.camel@gmail.com \
    --to=eddyz87@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=jose.marchesi@oracle.com \
    --cc=kernel-team@fb.com \
    --cc=martin.lau@kernel.org \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox