BPF List
 help / color / mirror / Atom feed
From: Eduard Zingerman <eddyz87@gmail.com>
To: Amery Hung <ameryhung@gmail.com>, netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	 alexei.starovoitov@gmail.com, martin.lau@kernel.org,
	sinquersw@gmail.com,  toke@redhat.com, jhs@mojatatu.com,
	jiri@resnulli.us, stfomichev@gmail.com,
	 ekarani.silvestre@ccc.ufcg.edu.br, yangpeihao@sjtu.edu.cn,
	 xiyou.wangcong@gmail.com, yepeilin.cs@gmail.com,
	amery.hung@bytedance.com
Subject: Re: [PATCH bpf-next v2 03/14] bpf: Allow struct_ops prog to return referenced kptr
Date: Thu, 23 Jan 2025 01:57:52 -0800	[thread overview]
Message-ID: <37a51a1f055f61911f7a4df9e8072f76412ad136.camel@gmail.com> (raw)
In-Reply-To: <20241220195619.2022866-4-amery.hung@gmail.com>

On Fri, 2024-12-20 at 11:55 -0800, Amery Hung wrote:

[...]

> diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
> index d9e0af00580b..27d4a170df84 100644
> --- a/kernel/bpf/bpf_struct_ops.c
> +++ b/kernel/bpf/bpf_struct_ops.c
> @@ -386,7 +386,7 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
>  	st_ops_desc->value_type = btf_type_by_id(btf, value_id);
>  
>  	for_each_member(i, t, member) {
> -		const struct btf_type *func_proto;
> +		const struct btf_type *func_proto, *ret_type;
>  
>  		mname = btf_name_by_offset(btf, member->name_off);
>  		if (!*mname) {
> @@ -409,6 +409,16 @@ int bpf_struct_ops_desc_init(struct bpf_struct_ops_desc *st_ops_desc,
>  		if (!func_proto)
>  			continue;
>  
> +		if (func_proto->type) {
> +			ret_type = btf_type_resolve_ptr(btf, func_proto->type, NULL);
> +			if (ret_type && !__btf_type_is_struct(ret_type)) {
> +				pr_warn("func ptr %s in struct %s returns non-struct pointer, which is not supported\n",
> +					mname, st_ops->name);
> +				err = -EOPNOTSUPP;
> +				goto errout;
> +			}
> +		}
> +

This limitation seems unnecessary, if reference leaks are only allowed
for parameters marked with __ref.

>  		if (btf_distill_func_proto(log, btf,
>  					   func_proto, mname,
>  					   &st_ops->func_models[i])) {
> diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
> index 26305571e377..0e6a3c4daa7d 100644
> --- a/kernel/bpf/verifier.c
> +++ b/kernel/bpf/verifier.c
> @@ -10707,6 +10707,8 @@ record_func_key(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
>  static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exit)
>  {
>  	struct bpf_verifier_state *state = env->cur_state;
> +	enum bpf_prog_type type = resolve_prog_type(env->prog);
> +	struct bpf_reg_state *reg = reg_state(env, BPF_REG_0);
>  	bool refs_lingering = false;
>  	int i;
>  
> @@ -10716,6 +10718,12 @@ static int check_reference_leak(struct bpf_verifier_env *env, bool exception_exi
>  	for (i = 0; i < state->acquired_refs; i++) {
>  		if (state->refs[i].type != REF_TYPE_PTR)
>  			continue;
> +		/* Allow struct_ops programs to return a referenced kptr back to
> +		 * kernel. Type checks are performed later in check_return_code.
> +		 */
> +		if (type == BPF_PROG_TYPE_STRUCT_OPS && !exception_exit &&
> +		    reg->ref_obj_id == state->refs[i].id)
> +			continue;
>  		verbose(env, "Unreleased reference id=%d alloc_insn=%d\n",
>  			state->refs[i].id, state->refs[i].insn_idx);
>  		refs_lingering = true;
> @@ -16320,13 +16328,14 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
>  	const char *exit_ctx = "At program exit";
>  	struct tnum enforce_attach_type_range = tnum_unknown;
>  	const struct bpf_prog *prog = env->prog;
> -	struct bpf_reg_state *reg;
> +	struct bpf_reg_state *reg = reg_state(env, regno);
>  	struct bpf_retval_range range = retval_range(0, 1);
>  	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
>  	int err;
>  	struct bpf_func_state *frame = env->cur_state->frame[0];
>  	const bool is_subprog = frame->subprogno;
>  	bool return_32bit = false;
> +	const struct btf_type *reg_type, *ret_type = NULL;
>  
>  	/* LSM and struct_ops func-ptr's return type could be "void" */
>  	if (!is_subprog || frame->in_exception_callback_fn) {
> @@ -16335,10 +16344,26 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
>  			if (prog->expected_attach_type == BPF_LSM_CGROUP)
>  				/* See below, can be 0 or 0-1 depending on hook. */
>  				break;
> -			fallthrough;
> +			if (!prog->aux->attach_func_proto->type)
> +				return 0;
> +			break;
>  		case BPF_PROG_TYPE_STRUCT_OPS:
>  			if (!prog->aux->attach_func_proto->type)
>  				return 0;
> +
> +			if (frame->in_exception_callback_fn)
> +				break;
> +
> +			/* Allow a struct_ops program to return a referenced kptr if it
> +			 * matches the operator's return type and is in its unmodified
> +			 * form. A scalar zero (i.e., a null pointer) is also allowed.
> +			 */
> +			reg_type = reg->btf ? btf_type_by_id(reg->btf, reg->btf_id) : NULL;
> +			ret_type = btf_type_resolve_ptr(prog->aux->attach_btf,
> +							prog->aux->attach_func_proto->type,
> +							NULL);

This does not enforce the kernel provenance of the pointer.
See my comment for the next patch for an example.

I think such return should only be allowed for parameters marked with
__ref suffix. If so, pointer provenance check would just compare
reg->ref_obj_id value with known ids of __ref arguments.

> +			if (ret_type && ret_type == reg_type && reg->ref_obj_id)
> +				return __check_ptr_off_reg(env, reg, regno, false);
>  			break;
>  		default:
>  			break;
> @@ -16360,8 +16385,6 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
>  		return -EACCES;
>  	}
>  
> -	reg = cur_regs(env) + regno;
> -
>  	if (frame->in_async_callback_fn) {
>  		/* enforce return zero from async callbacks like timer */
>  		exit_ctx = "At async callback return";
> @@ -16460,6 +16483,11 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char
>  	case BPF_PROG_TYPE_NETFILTER:
>  		range = retval_range(NF_DROP, NF_ACCEPT);
>  		break;
> +	case BPF_PROG_TYPE_STRUCT_OPS:
> +		if (!ret_type)
> +			return 0;
> +		range = retval_range(0, 0);
> +		break;
>  	case BPF_PROG_TYPE_EXT:
>  		/* freplace program can return anything as its return value
>  		 * depends on the to-be-replaced kernel func or bpf program.



  parent reply	other threads:[~2025-01-23  9:57 UTC|newest]

Thread overview: 34+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-12-20 19:55 [PATCH bpf-next v2 00/14] bpf qdisc Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 01/14] bpf: Support getting referenced kptr from struct_ops argument Amery Hung
2025-01-23  9:57   ` Eduard Zingerman
2025-01-23 19:41     ` Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 02/14] selftests/bpf: Test referenced kptr arguments of struct_ops programs Amery Hung
2025-01-23  9:57   ` Eduard Zingerman
2025-01-24  0:04     ` Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 03/14] bpf: Allow struct_ops prog to return referenced kptr Amery Hung
2025-01-15 15:25   ` Ming Lei
2025-01-23  9:57   ` Eduard Zingerman [this message]
2025-01-23 18:19     ` Eduard Zingerman
2024-12-20 19:55 ` [PATCH bpf-next v2 04/14] selftests/bpf: Test returning referenced kptr from struct_ops programs Amery Hung
2025-01-23  9:58   ` Eduard Zingerman
2024-12-20 19:55 ` [PATCH bpf-next v2 05/14] bpf: net_sched: Support implementation of Qdisc_ops in bpf Amery Hung
2025-01-09 15:00   ` Amery Hung
2025-01-10  0:28   ` Martin KaFai Lau
2025-01-10  1:20   ` Jakub Kicinski
2024-12-20 19:55 ` [PATCH bpf-next v2 06/14] bpf: net_sched: Add basic bpf qdisc kfuncs Amery Hung
2025-01-10  0:24   ` Martin KaFai Lau
2025-01-10 18:00     ` Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 07/14] bpf: Search and add kfuncs in struct_ops prologue and epilogue Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 08/14] bpf: net_sched: Add a qdisc watchdog timer Amery Hung
2025-01-09  0:20   ` Martin KaFai Lau
2025-01-09 15:00     ` Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 09/14] bpf: net_sched: Support updating bstats Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 10/14] bpf: net_sched: Support updating qstats Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 11/14] bpf: net_sched: Allow writing to more Qdisc members Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 12/14] libbpf: Support creating and destroying qdisc Amery Hung
2024-12-20 19:55 ` [PATCH bpf-next v2 13/14] selftests: Add a basic fifo qdisc test Amery Hung
2025-01-10  0:05   ` Martin KaFai Lau
2024-12-20 19:55 ` [PATCH bpf-next v2 14/14] selftests: Add a bpf fq qdisc to selftest Amery Hung
2025-01-09 23:36   ` Martin KaFai Lau
2025-01-02 17:29 ` [PATCH bpf-next v2 00/14] bpf qdisc Toke Høiland-Jørgensen
2025-01-10  1:43 ` Martin KaFai Lau

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=37a51a1f055f61911f7a4df9e8072f76412ad136.camel@gmail.com \
    --to=eddyz87@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=amery.hung@bytedance.com \
    --cc=ameryhung@gmail.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=ekarani.silvestre@ccc.ufcg.edu.br \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=martin.lau@kernel.org \
    --cc=netdev@vger.kernel.org \
    --cc=sinquersw@gmail.com \
    --cc=stfomichev@gmail.com \
    --cc=toke@redhat.com \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yangpeihao@sjtu.edu.cn \
    --cc=yepeilin.cs@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox