From: Yonghong Song <yhs@fb.com>
To: <bpf@vger.kernel.org>
Cc: Alexei Starovoitov <ast@kernel.org>,
Andrii Nakryiko <andrii@kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>, <kernel-team@fb.com>,
Martin KaFai Lau <martin.lau@kernel.org>
Subject: [PATCH bpf-next] bpf: Handle MEM_RCU type properly
Date: Mon, 28 Nov 2022 18:37:13 -0800 [thread overview]
Message-ID: <20221129023713.2216451-1-yhs@fb.com> (raw)
Commit 9bb00b2895cb ("bpf: Add kfunc bpf_rcu_read_lock/unlock()")
introduced MEM_RCU and bpf_rcu_read_lock/unlock() support. In that
commit, a rcu pointer is tagged with both MEM_RCU and PTR_TRUSTED
so that it can be passed into kfuncs or helpers as an argument.
Martin raised a good question in [1] such that the rcu pointer,
although being able to accessing the object, might have reference
count of 0. This might cause a problem if the rcu pointer is passed
to a kfunc which expects trusted arguments where ref count should
be greater than 0.
So this patch tries to fix this problem by tagging rcu pointer with
MEM_RCU only. Special acquire functions are needed to try to
acquire a reference with the consideration that the original rcu
pointer ref count could be 0. This special acquire function's
argument needs to be KF_RCU, a new introduced kfunc flag. In
verifier, KF_RCU will require the actual argument register type
to be MEM_RCU.
[1] https://lore.kernel.org/bpf/ac70f574-4023-664e-b711-e0d3b18117fd@linux.dev/
Fixes: 9bb00b2895cb ("bpf: Add kfunc bpf_rcu_read_lock/unlock()")
Signed-off-by: Yonghong Song <yhs@fb.com>
---
include/linux/bpf_verifier.h | 2 +-
include/linux/btf.h | 1 +
kernel/bpf/helpers.c | 14 ++++++++
kernel/bpf/verifier.c | 36 +++++++++++++------
.../selftests/bpf/prog_tests/cgrp_kfunc.c | 4 +--
.../selftests/bpf/prog_tests/task_kfunc.c | 4 +--
.../selftests/bpf/progs/rcu_read_lock.c | 7 ++--
7 files changed, 50 insertions(+), 18 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c05aa6e1f6f5..6f192dd9025e 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -683,7 +683,7 @@ static inline bool bpf_prog_check_recur(const struct bpf_prog *prog)
}
}
-#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | MEM_RCU | PTR_TRUSTED)
+#define BPF_REG_TRUSTED_MODIFIERS (MEM_ALLOC | PTR_TRUSTED)
static inline bool bpf_type_has_unsafe_modifiers(u32 type)
{
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 9ed00077db6e..cbd6e4096f8c 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -70,6 +70,7 @@
#define KF_TRUSTED_ARGS (1 << 4) /* kfunc only takes trusted pointer arguments */
#define KF_SLEEPABLE (1 << 5) /* kfunc may sleep */
#define KF_DESTRUCTIVE (1 << 6) /* kfunc performs destructive actions */
+#define KF_RCU (1 << 7) /* kfunc only takes rcu pointer arguments */
/*
* Return the name of the passed struct, if exists, or halt the build if for
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index a5a511430f2a..46fbe027f3b6 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -1837,6 +1837,19 @@ struct task_struct *bpf_task_acquire(struct task_struct *p)
return p;
}
+/**
+ * bpf_task_acquire_rcu - Acquire a reference to a rcu task object. A task
+ * acquired by this kfunc which is not stored in a map as a kptr, must be
+ * released by calling bpf_task_release().
+ * @p: The task on which a reference is being acquired or NULL.
+ */
+struct task_struct *bpf_task_acquire_rcu(struct task_struct *p)
+{
+ if (!refcount_inc_not_zero(&p->rcu_users))
+ return NULL;
+ return p;
+}
+
/**
* bpf_task_kptr_get - Acquire a reference on a struct task_struct kptr. A task
* kptr acquired by this kfunc which is not subsequently stored in a map, must
@@ -2013,6 +2026,7 @@ BTF_ID_FLAGS(func, bpf_list_push_back)
BTF_ID_FLAGS(func, bpf_list_pop_front, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_list_pop_back, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_acquire, KF_ACQUIRE | KF_TRUSTED_ARGS)
+BTF_ID_FLAGS(func, bpf_task_acquire_rcu, KF_ACQUIRE | KF_RCU | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_kptr_get, KF_ACQUIRE | KF_KPTR_GET | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_task_release, KF_RELEASE)
#ifdef CONFIG_CGROUPS
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6599d25dae38..dda89f4d62a7 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -4275,7 +4275,7 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
return true;
/* If a register is not referenced, it is trusted if it has the
- * MEM_ALLOC, MEM_RCU or PTR_TRUSTED type modifiers, and no others. Some of the
+ * MEM_ALLOC or PTR_TRUSTED type modifiers, and no others. Some of the
* other type modifiers may be safe, but we elect to take an opt-in
* approach here as some (e.g. PTR_UNTRUSTED and PTR_MAYBE_NULL) are
* not.
@@ -4287,6 +4287,11 @@ static bool is_trusted_reg(const struct bpf_reg_state *reg)
!bpf_type_has_unsafe_modifiers(reg->type);
}
+static bool is_rcu_reg(const struct bpf_reg_state *reg)
+{
+ return reg->type & MEM_RCU;
+}
+
static int check_pkt_ptr_alignment(struct bpf_verifier_env *env,
const struct bpf_reg_state *reg,
int off, int size, bool strict)
@@ -4775,12 +4780,10 @@ static int check_ptr_to_btf_access(struct bpf_verifier_env *env,
/* Mark value register as MEM_RCU only if it is protected by
* bpf_rcu_read_lock() and the ptr reg is trusted. MEM_RCU
* itself can already indicate trustedness inside the rcu
- * read lock region. Also mark it as PTR_TRUSTED.
+ * read lock region.
*/
if (!env->cur_state->active_rcu_lock || !is_trusted_reg(reg))
flag &= ~MEM_RCU;
- else
- flag |= PTR_TRUSTED;
} else if (reg->type & MEM_RCU) {
/* ptr (reg) is marked as MEM_RCU, but the struct field is not tagged
* with __rcu. Mark the flag as PTR_UNTRUSTED conservatively.
@@ -5945,7 +5948,7 @@ static const struct bpf_reg_types btf_ptr_types = {
.types = {
PTR_TO_BTF_ID,
PTR_TO_BTF_ID | PTR_TRUSTED,
- PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED,
+ PTR_TO_BTF_ID | MEM_RCU,
},
};
static const struct bpf_reg_types percpu_btf_ptr_types = {
@@ -6124,7 +6127,7 @@ int check_func_arg_reg_off(struct bpf_verifier_env *env,
case PTR_TO_BTF_ID:
case PTR_TO_BTF_ID | MEM_ALLOC:
case PTR_TO_BTF_ID | PTR_TRUSTED:
- case PTR_TO_BTF_ID | MEM_RCU | PTR_TRUSTED:
+ case PTR_TO_BTF_ID | MEM_RCU:
case PTR_TO_BTF_ID | MEM_ALLOC | PTR_TRUSTED:
/* When referenced PTR_TO_BTF_ID is passed to release function,
* it's fixed offset must be 0. In the other cases, fixed offset
@@ -8022,6 +8025,11 @@ static bool is_kfunc_destructive(struct bpf_kfunc_call_arg_meta *meta)
return meta->kfunc_flags & KF_DESTRUCTIVE;
}
+static bool is_kfunc_rcu(struct bpf_kfunc_call_arg_meta *meta)
+{
+ return meta->kfunc_flags & KF_RCU;
+}
+
static bool is_kfunc_arg_kptr_get(struct bpf_kfunc_call_arg_meta *meta, int arg)
{
return arg == 0 && (meta->kfunc_flags & KF_KPTR_GET);
@@ -8706,13 +8714,19 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
switch (kf_arg_type) {
case KF_ARG_PTR_TO_ALLOC_BTF_ID:
case KF_ARG_PTR_TO_BTF_ID:
- if (!is_kfunc_trusted_args(meta))
+ if (!is_kfunc_trusted_args(meta) && !is_kfunc_rcu(meta))
break;
- if (!is_trusted_reg(reg)) {
- verbose(env, "R%d must be referenced or trusted\n", regno);
+ if (!is_trusted_reg(reg) && !is_rcu_reg(reg)) {
+ verbose(env, "R%d must be referenced, trusted or rcu\n", regno);
return -EINVAL;
}
+
+ if (is_kfunc_rcu(meta) != is_rcu_reg(reg)) {
+ verbose(env, "R%d does not match kf arg rcu tagging\n", regno);
+ return -EINVAL;
+ }
+
fallthrough;
case KF_ARG_PTR_TO_CTX:
/* Trusted arguments have the same offset checks as release arguments */
@@ -8823,7 +8837,7 @@ static int check_kfunc_args(struct bpf_verifier_env *env, struct bpf_kfunc_call_
case KF_ARG_PTR_TO_BTF_ID:
/* Only base_type is checked, further checks are done here */
if ((base_type(reg->type) != PTR_TO_BTF_ID ||
- bpf_type_has_unsafe_modifiers(reg->type)) &&
+ (bpf_type_has_unsafe_modifiers(reg->type) && !is_rcu_reg(reg))) &&
!reg2btf_ids[base_type(reg->type)]) {
verbose(env, "arg#%d is %s ", i, reg_type_str(env, reg->type));
verbose(env, "expected %s or socket\n",
@@ -8938,7 +8952,7 @@ static int check_kfunc_call(struct bpf_verifier_env *env, struct bpf_insn *insn,
} else if (rcu_unlock) {
bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({
if (reg->type & MEM_RCU) {
- reg->type &= ~(MEM_RCU | PTR_TRUSTED);
+ reg->type &= ~MEM_RCU;
reg->type |= PTR_UNTRUSTED;
}
}));
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
index 973f0c5af965..5fbd9edd2c4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -93,10 +93,10 @@ static struct {
const char *prog_name;
const char *expected_err_msg;
} failure_tests[] = {
- {"cgrp_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
+ {"cgrp_kfunc_acquire_untrusted", "R1 must be referenced, trusted or rcu"},
{"cgrp_kfunc_acquire_fp", "arg#0 pointer type STRUCT cgroup must point"},
{"cgrp_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
- {"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
+ {"cgrp_kfunc_acquire_trusted_walked", "R1 must be referenced, trusted or rcu"},
{"cgrp_kfunc_acquire_null", "arg#0 pointer type STRUCT cgroup must point"},
{"cgrp_kfunc_acquire_unreleased", "Unreleased reference"},
{"cgrp_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
index ffd8ef4303c8..80708c073de6 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -87,10 +87,10 @@ static struct {
const char *prog_name;
const char *expected_err_msg;
} failure_tests[] = {
- {"task_kfunc_acquire_untrusted", "R1 must be referenced or trusted"},
+ {"task_kfunc_acquire_untrusted", "R1 must be referenced, trusted or rcu"},
{"task_kfunc_acquire_fp", "arg#0 pointer type STRUCT task_struct must point"},
{"task_kfunc_acquire_unsafe_kretprobe", "reg type unsupported for arg#0 function"},
- {"task_kfunc_acquire_trusted_walked", "R1 must be referenced or trusted"},
+ {"task_kfunc_acquire_trusted_walked", "R1 must be referenced, trusted or rcu"},
{"task_kfunc_acquire_null", "arg#0 pointer type STRUCT task_struct must point"},
{"task_kfunc_acquire_unreleased", "Unreleased reference"},
{"task_kfunc_get_non_kptr_param", "arg#0 expected pointer to map value"},
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
index 94a970076b98..3035cd080ec4 100644
--- a/tools/testing/selftests/bpf/progs/rcu_read_lock.c
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -23,7 +23,7 @@ struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
void bpf_key_put(struct bpf_key *key) __ksym;
void bpf_rcu_read_lock(void) __ksym;
void bpf_rcu_read_unlock(void) __ksym;
-struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_acquire_rcu(struct task_struct *p) __ksym;
void bpf_task_release(struct task_struct *p) __ksym;
SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
@@ -135,8 +135,11 @@ int task_acquire(void *ctx)
bpf_rcu_read_lock();
real_parent = task->real_parent;
/* acquire a reference which can be used outside rcu read lock region */
- real_parent = bpf_task_acquire(real_parent);
+ real_parent = bpf_task_acquire_rcu(real_parent);
bpf_rcu_read_unlock();
+ if (!real_parent)
+ return 0;
+
(void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
bpf_task_release(real_parent);
return 0;
--
2.30.2
next reply other threads:[~2022-11-29 2:37 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-11-29 2:37 Yonghong Song [this message]
2022-11-30 0:11 ` [PATCH bpf-next] bpf: Handle MEM_RCU type properly Yonghong Song
2022-12-01 7:05 ` Martin KaFai Lau
2022-12-01 17:47 ` Yonghong Song
2022-12-01 22:05 ` Martin KaFai Lau
2022-12-02 0:08 ` Yonghong Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20221129023713.2216451-1-yhs@fb.com \
--to=yhs@fb.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kernel-team@fb.com \
--cc=martin.lau@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox