netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Amery Hung <ameryhung@gmail.com>
To: netdev@vger.kernel.org
Cc: bpf@vger.kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	alexei.starovoitov@gmail.com, martin.lau@kernel.org,
	kuba@kernel.org, edumazet@google.com, xiyou.wangcong@gmail.com,
	cong.wang@bytedance.com, jhs@mojatatu.com, sinquersw@gmail.com,
	toke@redhat.com, jiri@resnulli.us, stfomichev@gmail.com,
	ekarani.silvestre@ccc.ufcg.edu.br, yangpeihao@sjtu.edu.cn,
	yepeilin.cs@gmail.com, ameryhung@gmail.com, ming.lei@redhat.com,
	kernel-team@meta.com
Subject: [PATCH bpf-next v3 02/18] bpf: Support getting referenced kptr from struct_ops argument
Date: Fri, 31 Jan 2025 11:28:41 -0800	[thread overview]
Message-ID: <20250131192912.133796-3-ameryhung@gmail.com> (raw)
In-Reply-To: <20250131192912.133796-1-ameryhung@gmail.com>

From: Amery Hung <amery.hung@bytedance.com>

Allows struct_ops programs to acqurie referenced kptrs from arguments
by directly reading the argument.

The verifier will acquire a reference for struct_ops a argument tagged
with "__ref" in the stub function in the beginning of the main program.
The user will be able to access the referenced kptr directly by reading
the context as long as it has not been released by the program.

This new mechanism to acquire referenced kptr (compared to the existing
"kfunc with KF_ACQUIRE") is introduced for ergonomic and semantic reasons.
In the first use case, Qdisc_ops, an skb is passed to .enqueue in the
first argument. This mechanism provides a natural way for users to get a
referenced kptr in the .enqueue struct_ops programs and makes sure that a
qdisc will always enqueue or drop the skb.

Signed-off-by: Amery Hung <amery.hung@bytedance.com>
---
 include/linux/bpf.h         |  3 +++
 kernel/bpf/bpf_struct_ops.c | 26 ++++++++++++++++++++------
 kernel/bpf/btf.c            |  1 +
 kernel/bpf/verifier.c       | 35 ++++++++++++++++++++++++++++++++---
 4 files changed, 56 insertions(+), 9 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f4df39e8c735..15164787ce7f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -968,6 +968,7 @@ struct bpf_insn_access_aux {
 		struct {
 			struct btf *btf;
 			u32 btf_id;
+			u32 ref_obj_id;
 		};
 	};
 	struct bpf_verifier_log *log; /* for verbose logs */
@@ -1481,6 +1482,8 @@ struct bpf_ctx_arg_aux {
 	enum bpf_reg_type reg_type;
 	struct btf *btf;
 	u32 btf_id;
+	u32 ref_obj_id;
+	bool refcounted;
 };
 
 struct btf_mod_pair {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 9b7f3b9c5262..68df8d8b6db3 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -146,6 +146,7 @@ void bpf_struct_ops_image_free(void *image)
 }
 
 #define MAYBE_NULL_SUFFIX "__nullable"
+#define REFCOUNTED_SUFFIX "__ref"
 
 /* Prepare argument info for every nullable argument of a member of a
  * struct_ops type.
@@ -174,11 +175,13 @@ static int prepare_arg_info(struct btf *btf,
 			    struct bpf_struct_ops_arg_info *arg_info)
 {
 	const struct btf_type *stub_func_proto, *pointed_type;
+	bool is_nullable = false, is_refcounted = false;
 	const struct btf_param *stub_args, *args;
 	struct bpf_ctx_arg_aux *info, *info_buf;
 	u32 nargs, arg_no, info_cnt = 0;
 	char ksym[KSYM_SYMBOL_LEN];
 	const char *stub_fname;
+	const char *suffix;
 	s32 stub_func_id;
 	u32 arg_btf_id;
 	int offset;
@@ -223,12 +226,19 @@ static int prepare_arg_info(struct btf *btf,
 	info = info_buf;
 	for (arg_no = 0; arg_no < nargs; arg_no++) {
 		/* Skip arguments that is not suffixed with
-		 * "__nullable".
+		 * "__nullable or __ref".
 		 */
-		if (!btf_param_match_suffix(btf, &stub_args[arg_no],
-					    MAYBE_NULL_SUFFIX))
+		is_nullable = btf_param_match_suffix(btf, &stub_args[arg_no],
+						     MAYBE_NULL_SUFFIX);
+		is_refcounted = btf_param_match_suffix(btf, &stub_args[arg_no],
+						       REFCOUNTED_SUFFIX);
+		if (!is_nullable && !is_refcounted)
 			continue;
 
+		if (is_nullable)
+			suffix = MAYBE_NULL_SUFFIX;
+		else if (is_refcounted)
+			suffix = REFCOUNTED_SUFFIX;
 		/* Should be a pointer to struct */
 		pointed_type = btf_type_resolve_ptr(btf,
 						    args[arg_no].type,
@@ -236,7 +246,7 @@ static int prepare_arg_info(struct btf *btf,
 		if (!pointed_type ||
 		    !btf_type_is_struct(pointed_type)) {
 			pr_warn("stub function %s has %s tagging to an unsupported type\n",
-				stub_fname, MAYBE_NULL_SUFFIX);
+				stub_fname, suffix);
 			goto err_out;
 		}
 
@@ -254,11 +264,15 @@ static int prepare_arg_info(struct btf *btf,
 		}
 
 		/* Fill the information of the new argument */
-		info->reg_type =
-			PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL;
 		info->btf_id = arg_btf_id;
 		info->btf = btf;
 		info->offset = offset;
+		if (is_nullable) {
+			info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID | PTR_MAYBE_NULL;
+		} else if (is_refcounted) {
+			info->reg_type = PTR_TRUSTED | PTR_TO_BTF_ID;
+			info->refcounted = true;
+		}
 
 		info++;
 		info_cnt++;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index 9de6acddd479..fd3470fbd144 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -6677,6 +6677,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 			info->reg_type = ctx_arg_info->reg_type;
 			info->btf = ctx_arg_info->btf ? : btf_vmlinux;
 			info->btf_id = ctx_arg_info->btf_id;
+			info->ref_obj_id = ctx_arg_info->refcounted ? ctx_arg_info->ref_obj_id : 0;
 			return true;
 		}
 	}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a41ba019780f..a0f51903e977 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1543,6 +1543,17 @@ static void release_reference_state(struct bpf_verifier_state *state, int idx)
 	return;
 }
 
+static bool find_reference_state(struct bpf_verifier_state *state, int ptr_id)
+{
+	int i;
+
+	for (i = 0; i < state->acquired_refs; i++)
+		if (state->refs[i].id == ptr_id)
+			return true;
+
+	return false;
+}
+
 static int release_lock_state(struct bpf_verifier_state *state, int type, int id, void *ptr)
 {
 	int i;
@@ -5981,7 +5992,8 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 /* check access to 'struct bpf_context' fields.  Supports fixed offsets only */
 static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off, int size,
 			    enum bpf_access_type t, enum bpf_reg_type *reg_type,
-			    struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx)
+			    struct btf **btf, u32 *btf_id, bool *is_retval, bool is_ldsx,
+			    u32 *ref_obj_id)
 {
 	struct bpf_insn_access_aux info = {
 		.reg_type = *reg_type,
@@ -6003,8 +6015,16 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 		*is_retval = info.is_retval;
 
 		if (base_type(*reg_type) == PTR_TO_BTF_ID) {
+			if (info.ref_obj_id &&
+			    !find_reference_state(env->cur_state, info.ref_obj_id)) {
+				verbose(env, "invalid bpf_context access off=%d. Reference may already be released\n",
+					off);
+				return -EACCES;
+			}
+
 			*btf = info.btf;
 			*btf_id = info.btf_id;
+			*ref_obj_id = info.ref_obj_id;
 		} else {
 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
 		}
@@ -7367,7 +7387,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 		struct bpf_retval_range range;
 		enum bpf_reg_type reg_type = SCALAR_VALUE;
 		struct btf *btf = NULL;
-		u32 btf_id = 0;
+		u32 btf_id = 0, ref_obj_id = 0;
 
 		if (t == BPF_WRITE && value_regno >= 0 &&
 		    is_pointer_value(env, value_regno)) {
@@ -7380,7 +7400,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 			return err;
 
 		err = check_ctx_access(env, insn_idx, off, size, t, &reg_type, &btf,
-				       &btf_id, &is_retval, is_ldsx);
+				       &btf_id, &is_retval, is_ldsx, &ref_obj_id);
 		if (err)
 			verbose_linfo(env, insn_idx, "; ");
 		if (!err && t == BPF_READ && value_regno >= 0) {
@@ -7411,6 +7431,7 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 				if (base_type(reg_type) == PTR_TO_BTF_ID) {
 					regs[value_regno].btf = btf;
 					regs[value_regno].btf_id = btf_id;
+					regs[value_regno].ref_obj_id = ref_obj_id;
 				}
 			}
 			regs[value_regno].type = reg_type;
@@ -22148,6 +22169,7 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
 {
 	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
 	struct bpf_subprog_info *sub = subprog_info(env, subprog);
+	struct bpf_prog_aux *aux = env->prog->aux;
 	struct bpf_verifier_state *state;
 	struct bpf_reg_state *regs;
 	int ret, i;
@@ -22255,6 +22277,13 @@ static int do_check_common(struct bpf_verifier_env *env, int subprog)
 		mark_reg_known_zero(env, regs, BPF_REG_1);
 	}
 
+	/* Acquire references for struct_ops program arguments tagged with "__ref" */
+	if (!subprog && env->prog->type == BPF_PROG_TYPE_STRUCT_OPS) {
+		for (i = 0; i < aux->ctx_arg_info_size; i++)
+			aux->ctx_arg_info[i].ref_obj_id = aux->ctx_arg_info[i].refcounted ?
+							  acquire_reference(env, 0) : 0;
+	}
+
 	ret = do_check(env);
 out:
 	/* check for NULL is necessary, since cur_state can be freed inside
-- 
2.47.1


  parent reply	other threads:[~2025-01-31 19:29 UTC|newest]

Thread overview: 33+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-31 19:28 [PATCH bpf-next v3 00/18] bpf qdisc Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 01/18] bpf: Make every prog keep a copy of ctx_arg_info Amery Hung
2025-02-04  5:57   ` Eduard Zingerman
2025-01-31 19:28 ` Amery Hung [this message]
2025-02-04  5:57   ` [PATCH bpf-next v3 02/18] bpf: Support getting referenced kptr from struct_ops argument Eduard Zingerman
2025-01-31 19:28 ` [PATCH bpf-next v3 03/18] selftests/bpf: Test referenced kptr arguments of struct_ops programs Amery Hung
2025-02-04  5:58   ` Eduard Zingerman
2025-01-31 19:28 ` [PATCH bpf-next v3 04/18] bpf: Allow struct_ops prog to return referenced kptr Amery Hung
2025-02-04  5:58   ` Eduard Zingerman
2025-01-31 19:28 ` [PATCH bpf-next v3 05/18] selftests/bpf: Test returning referenced kptr from struct_ops programs Amery Hung
2025-02-04  5:58   ` Eduard Zingerman
2025-01-31 19:28 ` [PATCH bpf-next v3 06/18] bpf: Prepare to reuse get_ctx_arg_idx Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 07/18] bpf: Generalize finding member offset of struct_ops prog Amery Hung
2025-02-04  5:59   ` Eduard Zingerman
2025-01-31 19:28 ` [PATCH bpf-next v3 08/18] bpf: net_sched: Support implementation of Qdisc_ops in bpf Amery Hung
2025-02-04 22:18   ` Jakub Kicinski
2025-02-04 23:21     ` Amery Hung
2025-02-05  1:27       ` Jakub Kicinski
2025-02-05  4:13         ` Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 09/18] bpf: net_sched: Add basic bpf qdisc kfuncs Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 10/18] bpf: Search and add kfuncs in struct_ops prologue and epilogue Amery Hung
2025-02-04  5:59   ` Eduard Zingerman
2025-01-31 19:28 ` [PATCH bpf-next v3 11/18] bpf: net_sched: Add a qdisc watchdog timer Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 12/18] bpf: net_sched: Support updating bstats Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 13/18] bpf: net_sched: Support updating qstats Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 14/18] bpf: net_sched: Allow writing to more Qdisc members Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 15/18] libbpf: Support creating and destroying qdisc Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 16/18] selftests/bpf: Add a basic fifo qdisc test Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 17/18] selftests/bpf: Add a bpf fq qdisc to selftest Amery Hung
2025-01-31 19:28 ` [PATCH bpf-next v3 18/18] selftests/bpf: Test attaching bpf qdisc to mq and non root Amery Hung
2025-02-01  0:45   ` Amery Hung
2025-02-04  5:58   ` Eduard Zingerman
2025-02-04 18:17     ` Amery Hung

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20250131192912.133796-3-ameryhung@gmail.com \
    --to=ameryhung@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andrii@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=cong.wang@bytedance.com \
    --cc=daniel@iogearbox.net \
    --cc=edumazet@google.com \
    --cc=ekarani.silvestre@ccc.ufcg.edu.br \
    --cc=jhs@mojatatu.com \
    --cc=jiri@resnulli.us \
    --cc=kernel-team@meta.com \
    --cc=kuba@kernel.org \
    --cc=martin.lau@kernel.org \
    --cc=ming.lei@redhat.com \
    --cc=netdev@vger.kernel.org \
    --cc=sinquersw@gmail.com \
    --cc=stfomichev@gmail.com \
    --cc=toke@redhat.com \
    --cc=xiyou.wangcong@gmail.com \
    --cc=yangpeihao@sjtu.edu.cn \
    --cc=yepeilin.cs@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).