From: Delyan Kratunov <delyank@fb.com>
To: "daniel@iogearbox.net" <daniel@iogearbox.net>,
"ast@kernel.org" <ast@kernel.org>,
"andrii@kernel.org" <andrii@kernel.org>,
"bpf@vger.kernel.org" <bpf@vger.kernel.org>
Subject: [PATCH RFC bpf-next 1/3] bpf: allow maps to hold bpf_delayed_work fields
Date: Mon, 11 Jul 2022 21:48:13 +0000 [thread overview]
Message-ID: <37859ca03aaaba23f60288de044a3a10d52a79b4.1657576063.git.delyank@fb.com> (raw)
In-Reply-To: <cover.1657576063.git.delyank@fb.com>
Similarly to bpf_timer, bpf_delayed_work represents a callback that will
be executed at a later time, in a different execution context.
Its treatment in maps is practically the same as timers (to a degree
that perhaps calls for refactoring), except releasing the work does not
need to release any resources - we will wait for pending executions in
the program destruction path.
Signed-off-by: Delyan Kratunov <delyank@fb.com>
---
include/linux/bpf.h | 9 ++++++++-
include/linux/btf.h | 1 +
include/uapi/linux/bpf.h | 8 ++++++++
kernel/bpf/btf.c | 21 +++++++++++++++++++++
kernel/bpf/syscall.c | 24 ++++++++++++++++++++++--
kernel/bpf/verifier.c | 9 +++++++++
tools/include/uapi/linux/bpf.h | 8 ++++++++
7 files changed, 77 insertions(+), 3 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 0edd7d2c0064..ad9d2cfb0411 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -164,7 +164,8 @@ enum {
BPF_MAP_VALUE_OFF_MAX = 8,
BPF_MAP_OFF_ARR_MAX = BPF_MAP_VALUE_OFF_MAX +
1 + /* for bpf_spin_lock */
- 1, /* for bpf_timer */
+ 1 + /* for bpf_timer */
+ 1, /* for bpf_delayed_work */
};
enum bpf_kptr_type {
@@ -212,6 +213,7 @@ struct bpf_map {
int spin_lock_off; /* >=0 valid offset, <0 error */
struct bpf_map_value_off *kptr_off_tab;
int timer_off; /* >=0 valid offset, <0 error */
+ int delayed_work_off; /* >=0 valid offset, <0 error */
u32 id;
int numa_node;
u32 btf_key_type_id;
@@ -256,6 +258,11 @@ static inline bool map_value_has_timer(const struct bpf_map *map)
return map->timer_off >= 0;
}
+static inline bool map_value_has_delayed_work(const struct bpf_map *map)
+{
+ return map->delayed_work_off >= 0;
+}
+
static inline bool map_value_has_kptrs(const struct bpf_map *map)
{
return !IS_ERR_OR_NULL(map->kptr_off_tab);
diff --git a/include/linux/btf.h b/include/linux/btf.h
index 1bfed7fa0428..2b8f473a6aa0 100644
--- a/include/linux/btf.h
+++ b/include/linux/btf.h
@@ -132,6 +132,7 @@ bool btf_member_is_reg_int(const struct btf *btf, const struct btf_type *s,
u32 expected_offset, u32 expected_size);
int btf_find_spin_lock(const struct btf *btf, const struct btf_type *t);
int btf_find_timer(const struct btf *btf, const struct btf_type *t);
+int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t);
struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
const struct btf_type *t);
bool btf_type_is_void(const struct btf_type *t);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e81362891596..d68fc4f472f1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -6691,6 +6691,14 @@ struct bpf_dynptr {
__u64 :64;
} __attribute__((aligned(8)));
+struct bpf_delayed_work {
+ __u64 :64;
+ __u64 :64;
+ __u64 :64;
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index f08037c31dd7..e4ab52cc25fe 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3196,6 +3196,7 @@ enum btf_field_type {
BTF_FIELD_SPIN_LOCK,
BTF_FIELD_TIMER,
BTF_FIELD_KPTR,
+ BTF_FIELD_DELAYED_WORK,
};
enum {
@@ -3283,6 +3284,7 @@ static int btf_find_struct_field(const struct btf *btf, const struct btf_type *t
switch (field_type) {
case BTF_FIELD_SPIN_LOCK:
case BTF_FIELD_TIMER:
+ case BTF_FIELD_DELAYED_WORK:
ret = btf_find_struct(btf, member_type, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3333,6 +3335,7 @@ static int btf_find_datasec_var(const struct btf *btf, const struct btf_type *t,
switch (field_type) {
case BTF_FIELD_SPIN_LOCK:
case BTF_FIELD_TIMER:
+ case BTF_FIELD_DELAYED_WORK:
ret = btf_find_struct(btf, var_type, off, sz,
idx < info_cnt ? &info[idx] : &tmp);
if (ret < 0)
@@ -3375,6 +3378,11 @@ static int btf_find_field(const struct btf *btf, const struct btf_type *t,
sz = sizeof(struct bpf_timer);
align = __alignof__(struct bpf_timer);
break;
+ case BTF_FIELD_DELAYED_WORK:
+ name = "bpf_delayed_work";
+ sz = sizeof(struct bpf_delayed_work);
+ align = __alignof__(struct bpf_delayed_work);
+ break;
case BTF_FIELD_KPTR:
name = NULL;
sz = sizeof(u64);
@@ -3421,6 +3429,19 @@ int btf_find_timer(const struct btf *btf, const struct btf_type *t)
return info.off;
}
+int btf_find_delayed_work(const struct btf *btf, const struct btf_type *t)
+{
+ struct btf_field_info info;
+ int ret;
+
+ ret = btf_find_field(btf, t, BTF_FIELD_DELAYED_WORK, &info, 1);
+ if (ret < 0)
+ return ret;
+ if (!ret)
+ return -ENOENT;
+ return info.off;
+}
+
struct bpf_map_value_off *btf_parse_kptrs(const struct btf *btf,
const struct btf_type *t)
{
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7d5af5b99f0d..041972305344 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -914,10 +914,11 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
bool has_spin_lock = map_value_has_spin_lock(map);
bool has_timer = map_value_has_timer(map);
bool has_kptrs = map_value_has_kptrs(map);
+ bool has_delayed_work = map_value_has_delayed_work(map);
struct bpf_map_off_arr *off_arr;
u32 i;
- if (!has_spin_lock && !has_timer && !has_kptrs) {
+ if (!has_spin_lock && !has_timer && !has_kptrs && !has_delayed_work) {
map->off_arr = NULL;
return 0;
}
@@ -953,6 +954,13 @@ static int bpf_map_alloc_off_arr(struct bpf_map *map)
}
off_arr->cnt += tab->nr_off;
}
+ if (has_delayed_work) {
+ i = off_arr->cnt;
+
+ off_arr->field_off[i] = map->delayed_work_off;
+ off_arr->field_sz[i] = sizeof(struct bpf_delayed_work);
+ off_arr->cnt++;
+ }
if (off_arr->cnt == 1)
return 0;
@@ -1014,6 +1022,16 @@ static int map_check_btf(struct bpf_map *map, const struct btf *btf,
return -EOPNOTSUPP;
}
+ map->delayed_work_off = btf_find_delayed_work(btf, value_type);
+ if (map_value_has_delayed_work(map)) {
+ if (map->map_flags & BPF_F_RDONLY_PROG)
+ return -EACCES;
+ if (map->map_type != BPF_MAP_TYPE_HASH &&
+ map->map_type != BPF_MAP_TYPE_LRU_HASH &&
+ map->map_type != BPF_MAP_TYPE_ARRAY)
+ return -EOPNOTSUPP;
+ }
+
map->kptr_off_tab = btf_parse_kptrs(btf, value_type);
if (map_value_has_kptrs(map)) {
if (!bpf_capable()) {
@@ -1095,6 +1113,7 @@ static int map_create(union bpf_attr *attr)
map->spin_lock_off = -EINVAL;
map->timer_off = -EINVAL;
+ map->delayed_work_off = -EINVAL;
if (attr->btf_key_type_id || attr->btf_value_type_id ||
/* Even the map's value is a kernel's struct,
* the bpf_prog.o must have BTF to begin with
@@ -1863,7 +1882,8 @@ static int map_freeze(const union bpf_attr *attr)
return PTR_ERR(map);
if (map->map_type == BPF_MAP_TYPE_STRUCT_OPS ||
- map_value_has_timer(map) || map_value_has_kptrs(map)) {
+ map_value_has_timer(map) || map_value_has_kptrs(map) ||
+ map_value_has_delayed_work(map)) {
fdput(f);
return -ENOTSUPP;
}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2859901ffbe3..9fd311b7a1ff 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3817,6 +3817,15 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
return -EACCES;
}
}
+ if (map_value_has_delayed_work(map) && src == ACCESS_DIRECT) {
+ u32 t = map->delayed_work_off;
+
+ if (reg->smin_value + off < t + sizeof(struct bpf_delayed_work) &&
+ t < reg->umax_value + off + size) {
+ verbose(env, "bpf_delayed_work cannot be accessed directly by load/store regno=%d off=%d\n", regno, off);
+ return -EACCES;
+ }
+ }
if (map_value_has_kptrs(map)) {
struct bpf_map_value_off *tab = map->kptr_off_tab;
int i;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index e81362891596..d68fc4f472f1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -6691,6 +6691,14 @@ struct bpf_dynptr {
__u64 :64;
} __attribute__((aligned(8)));
+struct bpf_delayed_work {
+ __u64 :64;
+ __u64 :64;
+ __u64 :64;
+ __u64 :64;
+ __u64 :64;
+} __attribute__((aligned(8)));
+
struct bpf_sysctl {
__u32 write; /* Sysctl is being read (= 0) or written (= 1).
* Allows 1,2,4-byte read, but no write.
--
2.36.1
next prev parent reply other threads:[~2022-07-11 21:48 UTC|newest]
Thread overview: 13+ messages / expand[flat|nested] mbox.gz Atom feed top
2022-07-11 21:48 [PATCH RFC bpf-next 0/3] Execution context callbacks Delyan Kratunov
2022-07-11 21:48 ` [PATCH RFC bpf-next 2/3] bpf: add delayed_work mechanism Delyan Kratunov
2022-07-11 21:48 ` Delyan Kratunov [this message]
2022-07-14 4:23 ` [PATCH RFC bpf-next 1/3] bpf: allow maps to hold bpf_delayed_work fields Andrii Nakryiko
2022-07-11 21:48 ` [PATCH RFC bpf-next 3/3] selftests: delayed_work tests Delyan Kratunov
2022-07-12 18:07 ` [PATCH RFC bpf-next 0/3] Execution context callbacks sdf
2022-07-12 18:42 ` Delyan Kratunov
2022-07-12 22:51 ` sdf
2022-07-15 1:51 ` Alexei Starovoitov
2022-07-15 18:28 ` Delyan Kratunov
2022-07-19 19:02 ` Alexei Starovoitov
2022-07-19 22:12 ` Delyan Kratunov
2022-07-20 0:54 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=37859ca03aaaba23f60288de044a3a10d52a79b4.1657576063.git.delyank@fb.com \
--to=delyank@fb.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox