public inbox for netdev@vger.kernel.org
 help / color / mirror / Atom feed
From: Eduard Zingerman <eddyz87@gmail.com>
To: Amery Hung <ameryhung@gmail.com>, bpf@vger.kernel.org
Cc: netdev@vger.kernel.org, alexei.starovoitov@gmail.com,
	andrii@kernel.org, daniel@iogearbox.net, memxor@gmail.com,
	martin.lau@kernel.org,  mykyta.yatsenko5@gmail.com,
	kernel-team@meta.com
Subject: Re: [PATCH bpf-next v3 4/9] bpf: Refactor object relationship tracking and fix dynptr UAF bug
Date: Fri, 24 Apr 2026 15:48:17 -0700	[thread overview]
Message-ID: <02dbf08511c72ea0c1a5caa33ed400f4a3e3c3c9.camel@gmail.com> (raw)
In-Reply-To: <20260421221016.2967924-5-ameryhung@gmail.com>

On Tue, 2026-04-21 at 15:10 -0700, Amery Hung wrote:

Tbh, I find current state of affairs with id/ref_obj_id/parent_id hard
to follow. The release_reference() is an improvement, but the means by
which the fields are propagated to bpf_reg_state objects are convoluted.
I wonder if having a separate "object table" in bpf_verifier_state and
having bpf_reg_state->id refer to objects within this table would make
things more straight forward.

A few nits below.

[...]

> diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h

[...]

> @@ -1323,6 +1335,7 @@ struct bpf_dynptr_desc {
>  	enum bpf_dynptr_type type;
>  	u32 id;
>  	u32 ref_obj_id;
> +	u32 parent_id;
>  };

Nit: would be nice to have a comment describing when the above fields
     are populated.

>  
>  struct bpf_kfunc_call_arg_meta {
> @@ -1334,6 +1347,7 @@ struct bpf_kfunc_call_arg_meta {
>  	const char *func_name;
>  	/* Out parameters */
>  	u32 ref_obj_id;
> +	u32 id;

Nit: would be nice to have a comment here too.

[...]

> diff --git a/kernel/bpf/states.c b/kernel/bpf/states.c
> index 8478d2c6ed5b..72bd3bcda5fb 100644
> --- a/kernel/bpf/states.c
> +++ b/kernel/bpf/states.c
> @@ -494,7 +494,8 @@ static bool regs_exact(const struct bpf_reg_state *rold,
>  {
>  	return memcmp(rold, rcur, offsetof(struct bpf_reg_state, id)) == 0 &&
>  	       check_ids(rold->id, rcur->id, idmap) &&
> -	       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
> +	       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap) &&
> +	       check_ids(rold->parent_id, rcur->parent_id, idmap);

Nit: these check_ids() become repetitive, maybe add a utility function
     checking id/ref_obj_id/parent_id?

>  }
>  
>  enum exact_level {
> @@ -619,7 +620,8 @@ static bool regsafe(struct bpf_verifier_env *env, struct bpf_reg_state *rold,
>  		       range_within(rold, rcur) &&
>  		       tnum_in(rold->var_off, rcur->var_off) &&
>  		       check_ids(rold->id, rcur->id, idmap) &&
> -		       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap);
> +		       check_ids(rold->ref_obj_id, rcur->ref_obj_id, idmap) &&
> +		       check_ids(rold->parent_id, rcur->parent_id, idmap);
>  	case PTR_TO_PACKET_META:
>  	case PTR_TO_PACKET:
>  		/* We must have at least as much range as the old ptr
> @@ -799,7 +801,8 @@ static bool stacksafe(struct bpf_verifier_env *env, struct bpf_func_state *old,
>  			cur_reg = &cur->stack[spi].spilled_ptr;
>  			if (old_reg->dynptr.type != cur_reg->dynptr.type ||
>  			    old_reg->dynptr.first_slot != cur_reg->dynptr.first_slot ||
> -			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap))
> +			    !check_ids(old_reg->ref_obj_id, cur_reg->ref_obj_id, idmap) ||
> +			    !check_ids(old_reg->parent_id, cur_reg->parent_id, idmap))

Not something changed by the current patch, but still a question:
this path ignores old_reg->id, is it a bug?

>  				return false;
>  			break;
>  		case STACK_ITER:
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 0313b7d5f6c9..908a3af0e7c4 100644

[...]

> @@ -241,6 +241,7 @@ struct bpf_call_arg_meta {
>  	int mem_size;
>  	u64 msize_max_value;
>  	int ref_obj_id;
> +	u32 id;

Nit: please add a comment here as well.

[...]

> @@ -635,11 +636,12 @@ static int destroy_if_dynptr_stack_slot(struct bpf_verifier_env *env,
>  				        struct bpf_func_state *state, int spi);
>  
>  static int mark_stack_slots_dynptr(struct bpf_verifier_env *env, struct bpf_reg_state *reg,
> -				   enum bpf_arg_type arg_type, int insn_idx, int clone_ref_obj_id)
> +				   enum bpf_arg_type arg_type, int insn_idx, int parent_id,
> +				   struct bpf_dynptr_desc *dynptr)

Having both parent_id and dynptr->parent_id as parameters of this
function is very confusing, but I don't have a suggestion on how to
better deal with it.

[...]

> @@ -8489,6 +8444,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
>  			return -EACCES;
>  		}
>  		meta->ref_obj_id = reg->ref_obj_id;
> +		meta->id = reg->id;

Could you please leave a comment here explaining when e.g.
meta->ref_obj_id and meta->id would have same or different ids here?
Maybe a few lines of BPF C code.

>  	}
>  
>  	switch (base_type(arg_type)) {
> @@ -9111,26 +9067,75 @@ static int release_reference_nomark(struct bpf_verifier_state *state, int ref_ob

[...]

> +/* Release id and objects referencing the id iteratively in a DFS manner */
> +static int release_reference(struct bpf_verifier_env *env, int id)
> +{
> +	u32 mask = (1 << STACK_SPILL) | (1 << STACK_DYNPTR);
>  	struct bpf_verifier_state *vstate = env->cur_state;
> +	struct bpf_idmap *idstack = &env->idmap_scratch;
> +	struct bpf_stack_state *stack;
>  	struct bpf_func_state *state;
>  	struct bpf_reg_state *reg;
> -	int err;
> +	int root_id = id, err;
>  
> -	err = release_reference_nomark(vstate, ref_obj_id);
> -	if (err)
> -		return err;
> +	idstack->cnt = 0;
> +	idstack_push(idstack, id);
>  
> -	bpf_for_each_reg_in_vstate(vstate, state, reg, ({
> -		if (reg->ref_obj_id == ref_obj_id)
> -			mark_reg_invalid(env, reg);
> -	}));
> +	if (find_reference_state(vstate, id))
> +		WARN_ON_ONCE(release_reference_nomark(vstate, id));
> +
> +	while ((id = idstack_pop(idstack))) {
> +		bpf_for_each_reg_in_vstate_mask(vstate, state, reg, stack, mask, ({
> +			if (reg->id != id && reg->parent_id != id && reg->ref_obj_id != id)
> +				continue;
> +
> +			if (reg->ref_obj_id && id != root_id) {
> +				struct bpf_reference_state *ref_state;
> +
> +				ref_state = find_reference_state(env->cur_state, reg->ref_obj_id);
> +				verbose(env, "Unreleased reference id=%d alloc_insn=%d when releasing id=%d\n",
> +					ref_state->id, ref_state->insn_idx, root_id);
> +				return -EINVAL;
> +			}
> +
> +			if (reg->id != id) {
> +				err = idstack_push(idstack, reg->id);
> +				if (err)
> +					return err;
> +			}
> +
> +			if (!stack || stack->slot_type[BPF_REG_SIZE - 1] == STACK_SPILL)
> +				mark_reg_invalid(env, reg);
> +			else if (stack->slot_type[BPF_REG_SIZE - 1] == STACK_DYNPTR)
> +				invalidate_dynptr(env, state, stack);

invalidate_dynptr() rewrites to stack slots, can it be the case that
this body of bpf_for_each_reg_in_vstate_mask() is computed for first
and second dynptr stack slots, hence triggering invalidate_dynptr() to
rewrite three slots instead of two?

> +		}));
> +	}
>  
>  	return 0;
>  }

[...]

> @@ -12009,6 +12009,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
>  				return -EFAULT;
>  			}
>  			meta->ref_obj_id = reg->ref_obj_id;
> +			meta->id = reg->id;

And here a comment describing when this happens would be helpful.

[...]

> @@ -12171,15 +12171,10 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
>  				}
>  
>  				dynptr_arg_type |= (unsigned int)get_dynptr_type_flag(parent_type);
> -				clone_ref_obj_id = meta->dynptr.ref_obj_id;
> -				if (dynptr_type_refcounted(parent_type) && !clone_ref_obj_id) {
> -					verifier_bug(env, "missing ref obj id for parent of clone");
> -					return -EFAULT;
> -				}
>  			}
>  
> -			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type, clone_ref_obj_id,
> -						  &meta->dynptr);
> +			ret = process_dynptr_func(env, regno, insn_idx, dynptr_arg_type,
> +						  meta->ref_obj_id ? meta->id : 0, &meta->dynptr);

And an example here as well.

>  			if (ret < 0)
>  				return ret;
>  			break;

[...]

  parent reply	other threads:[~2026-04-24 22:48 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2026-04-21 22:10 [PATCH bpf-next v3 0/9] Refactor verifier object relationship tracking Amery Hung
2026-04-21 22:10 ` [PATCH bpf-next v3 1/9] bpf: Unify dynptr handling in the verifier Amery Hung
2026-04-21 22:52   ` bot+bpf-ci
2026-04-23 14:56   ` Mykyta Yatsenko
2026-04-24  0:04   ` Andrii Nakryiko
2026-04-24  5:34     ` Amery Hung
2026-04-24 16:22       ` Andrii Nakryiko
2026-04-21 22:10 ` [PATCH bpf-next v3 2/9] bpf: Assign reg->id when getting referenced kptr from ctx Amery Hung
2026-04-22 21:46   ` Eduard Zingerman
2026-04-22 22:45     ` Amery Hung
2026-04-22 22:50       ` Eduard Zingerman
2026-04-23 22:46         ` Amery Hung
2026-04-24  0:04   ` Andrii Nakryiko
2026-04-21 22:10 ` [PATCH bpf-next v3 3/9] bpf: Preserve reg->id of pointer objects after null-check Amery Hung
2026-04-21 22:52   ` bot+bpf-ci
2026-04-22 22:46     ` Eduard Zingerman
2026-04-24  0:04   ` Andrii Nakryiko
2026-04-21 22:10 ` [PATCH bpf-next v3 4/9] bpf: Refactor object relationship tracking and fix dynptr UAF bug Amery Hung
2026-04-23 18:19   ` Mykyta Yatsenko
2026-04-23 18:44     ` Amery Hung
2026-04-24  0:04       ` Andrii Nakryiko
2026-04-24  6:46         ` Amery Hung
2026-04-24 22:48   ` Eduard Zingerman [this message]
2026-04-21 22:10 ` [PATCH bpf-next v3 5/9] bpf: Remove redundant dynptr arg check for helper Amery Hung
2026-04-21 22:10 ` [PATCH bpf-next v3 6/9] selftests/bpf: Test creating dynptr from dynptr data and slice Amery Hung
2026-04-21 22:10 ` [PATCH bpf-next v3 7/9] selftests/bpf: Test using dynptr after freeing the underlying object Amery Hung
2026-04-21 22:10 ` [PATCH bpf-next v3 8/9] selftests/bpf: Test using slice after invalidating dynptr clone Amery Hung
2026-04-21 22:10 ` [PATCH bpf-next v3 9/9] selftests/bpf: Test using file dynptr after the reference on file is dropped Amery Hung
2026-04-24  0:04 ` [PATCH bpf-next v3 0/9] Refactor verifier object relationship tracking Andrii Nakryiko
2026-04-24  0:55 ` Alexei Starovoitov
2026-04-24  5:44   ` Amery Hung
2026-04-24 13:55     ` Alexei Starovoitov
2026-04-24 16:19       ` Amery Hung

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=02dbf08511c72ea0c1a5caa33ed400f4a3e3c3c9.camel@gmail.com \
    --to=eddyz87@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=ameryhung@gmail.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=kernel-team@meta.com \
    --cc=martin.lau@kernel.org \
    --cc=memxor@gmail.com \
    --cc=mykyta.yatsenko5@gmail.com \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox