From: Yonghong Song <yhs@fb.com>
To: Andrii Nakryiko <andriin@fb.com>, <bpf@vger.kernel.org>,
Martin KaFai Lau <kafai@fb.com>, <netdev@vger.kernel.org>
Cc: Alexei Starovoitov <ast@fb.com>,
Daniel Borkmann <daniel@iogearbox.net>, <kernel-team@fb.com>
Subject: [PATCH bpf-next v1 07/19] bpf: create anonymous bpf iterator
Date: Mon, 27 Apr 2020 13:12:42 -0700 [thread overview]
Message-ID: <20200427201242.2995160-1-yhs@fb.com> (raw)
In-Reply-To: <20200427201235.2994549-1-yhs@fb.com>
A new bpf command BPF_ITER_CREATE is added.
The anonymous bpf iterator is seq_file based.
The seq_file private data are referenced by targets.
The bpf_iter infrastructure allocated additional space
at seq_file->private after the space used by targets
to store some meta data, e.g.,
prog: prog to run
session_id: an unique id for each opened seq_file
seq_num: how many times bpf programs are queried in this session
has_last: indicate whether or not bpf_prog has been called after
all valid objects have been processed
A map between file and prog/link is established to help
fops->release(). When fops->release() is called, just based on
inode and file, bpf program cannot be located since target
seq_priv_size not available. This map helps retrieve the prog
whose reference count needs to be decremented.
Signed-off-by: Yonghong Song <yhs@fb.com>
---
include/linux/bpf.h | 3 +
include/uapi/linux/bpf.h | 6 ++
kernel/bpf/bpf_iter.c | 162 ++++++++++++++++++++++++++++++++-
kernel/bpf/syscall.c | 27 ++++++
tools/include/uapi/linux/bpf.h | 6 ++
5 files changed, 203 insertions(+), 1 deletion(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 4fc39d9b5cd0..0f0cafc65a04 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1112,6 +1112,8 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
int bpf_obj_get_user(const char __user *pathname, int flags);
+#define BPF_DUMP_SEQ_NET_PRIVATE BIT(0)
+
struct bpf_iter_reg {
const char *target;
const char *target_func_name;
@@ -1133,6 +1135,7 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog,
struct bpf_prog *new_prog);
+int bpf_iter_new_fd(struct bpf_link *link);
int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f39b9fec37ab..576651110d16 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -113,6 +113,7 @@ enum bpf_cmd {
BPF_MAP_DELETE_BATCH,
BPF_LINK_CREATE,
BPF_LINK_UPDATE,
+ BPF_ITER_CREATE,
};
enum bpf_map_type {
@@ -590,6 +591,11 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
+ struct { /* struct used by BPF_ITER_CREATE command */
+ __u32 link_fd;
+ __u32 flags;
+ } iter_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index fc1ce5ee5c3f..1f4e778d1814 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2020 Facebook */
#include <linux/fs.h>
+#include <linux/anon_inodes.h>
#include <linux/filter.h>
#include <linux/bpf.h>
@@ -19,6 +20,19 @@ struct bpf_iter_link {
struct bpf_iter_target_info *tinfo;
};
+struct extra_priv_data {
+ struct bpf_prog *prog;
+ u64 session_id;
+ u64 seq_num;
+ bool has_last;
+};
+
+struct anon_file_prog_assoc {
+ struct list_head list;
+ struct file *file;
+ struct bpf_prog *prog;
+};
+
static struct list_head targets;
static struct mutex targets_mutex;
static bool bpf_iter_inited = false;
@@ -26,6 +40,50 @@ static bool bpf_iter_inited = false;
/* protect bpf_iter_link.link->prog upddate */
static struct mutex bpf_iter_mutex;
+/* Since at anon seq_file release function, the prog cannot
+ * be retrieved since target seq_priv_size is not available.
+ * Keep a list of <anon_file, prog> mapping, so that
+ * at file release stage, the prog can be released properly.
+ */
+static struct list_head anon_iter_info;
+static struct mutex anon_iter_info_mutex;
+
+/* incremented on every opened seq_file */
+static atomic64_t session_id;
+
+static u32 get_total_priv_dsize(u32 old_size)
+{
+ return roundup(old_size, 8) + sizeof(struct extra_priv_data);
+}
+
+static void *get_extra_priv_dptr(void *old_ptr, u32 old_size)
+{
+ return old_ptr + roundup(old_size, 8);
+}
+
+static int anon_iter_release(struct inode *inode, struct file *file)
+{
+ struct anon_file_prog_assoc *finfo;
+
+ mutex_lock(&anon_iter_info_mutex);
+ list_for_each_entry(finfo, &anon_iter_info, list) {
+ if (finfo->file == file) {
+ bpf_prog_put(finfo->prog);
+ list_del(&finfo->list);
+ kfree(finfo);
+ break;
+ }
+ }
+ mutex_unlock(&anon_iter_info_mutex);
+
+ return seq_release_private(inode, file);
+}
+
+static const struct file_operations anon_bpf_iter_fops = {
+ .read = seq_read,
+ .release = anon_iter_release,
+};
+
int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
{
struct bpf_iter_target_info *tinfo;
@@ -37,6 +95,8 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
INIT_LIST_HEAD(&targets);
mutex_init(&targets_mutex);
mutex_init(&bpf_iter_mutex);
+ INIT_LIST_HEAD(&anon_iter_info);
+ mutex_init(&anon_iter_info_mutex);
bpf_iter_inited = true;
}
@@ -61,7 +121,20 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
struct bpf_prog *bpf_iter_get_prog(struct seq_file *seq, u32 priv_data_size,
u64 *session_id, u64 *seq_num, bool is_last)
{
- return NULL;
+ struct extra_priv_data *extra_data;
+
+ if (seq->file->f_op != &anon_bpf_iter_fops)
+ return NULL;
+
+ extra_data = get_extra_priv_dptr(seq->private, priv_data_size);
+ if (extra_data->has_last)
+ return NULL;
+
+ *session_id = extra_data->session_id;
+ *seq_num = extra_data->seq_num++;
+ extra_data->has_last = is_last;
+
+ return extra_data->prog;
}
int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
@@ -150,3 +223,90 @@ int bpf_iter_link_replace(struct bpf_link *link, struct bpf_prog *old_prog,
mutex_unlock(&bpf_iter_mutex);
return ret;
}
+
+static void init_seq_file(void *priv_data, struct bpf_iter_target_info *tinfo,
+ struct bpf_prog *prog)
+{
+ struct extra_priv_data *extra_data;
+
+ if (tinfo->target_feature & BPF_DUMP_SEQ_NET_PRIVATE)
+ set_seq_net_private((struct seq_net_private *)priv_data,
+ current->nsproxy->net_ns);
+
+ extra_data = get_extra_priv_dptr(priv_data, tinfo->seq_priv_size);
+ extra_data->session_id = atomic64_add_return(1, &session_id);
+ extra_data->prog = prog;
+ extra_data->seq_num = 0;
+ extra_data->has_last = false;
+}
+
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
+{
+ struct anon_file_prog_assoc *finfo;
+ struct bpf_iter_target_info *tinfo;
+ struct bpf_prog *prog;
+ u32 total_priv_dsize;
+ void *priv_data;
+
+ finfo = kmalloc(sizeof(*finfo), GFP_USER | __GFP_NOWARN);
+ if (!finfo)
+ return -ENOMEM;
+
+ mutex_lock(&bpf_iter_mutex);
+ prog = link->link.prog;
+ bpf_prog_inc(prog);
+ mutex_unlock(&bpf_iter_mutex);
+
+ tinfo = link->tinfo;
+ total_priv_dsize = get_total_priv_dsize(tinfo->seq_priv_size);
+ priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
+ if (!priv_data) {
+ bpf_prog_sub(prog, 1);
+ kfree(finfo);
+ return -ENOMEM;
+ }
+
+ init_seq_file(priv_data, tinfo, prog);
+
+ finfo->file = file;
+ finfo->prog = prog;
+
+ mutex_lock(&anon_iter_info_mutex);
+ list_add(&finfo->list, &anon_iter_info);
+ mutex_unlock(&anon_iter_info_mutex);
+ return 0;
+}
+
+int bpf_iter_new_fd(struct bpf_link *link)
+{
+ struct file *file;
+ int err, fd;
+
+ if (link->ops != &bpf_iter_link_lops)
+ return -EINVAL;
+
+ fd = get_unused_fd_flags(O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+
+ file = anon_inode_getfile("bpf_iter", &anon_bpf_iter_fops,
+ NULL, O_CLOEXEC);
+ if (IS_ERR(file)) {
+ err = PTR_ERR(file);
+ goto free_fd;
+ }
+
+ err = prepare_seq_file(file,
+ container_of(link, struct bpf_iter_link, link));
+ if (err)
+ goto free_file;
+
+ fd_install(fd, file);
+ return fd;
+
+free_file:
+ fput(file);
+free_fd:
+ put_unused_fd(fd);
+ return err;
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b7af4f006f2e..458f7000887a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3696,6 +3696,30 @@ static int link_update(union bpf_attr *attr)
return ret;
}
+#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
+
+static int bpf_iter_create(union bpf_attr *attr)
+{
+ struct bpf_link *link;
+ int err;
+
+ if (CHECK_ATTR(BPF_ITER_CREATE))
+ return -EINVAL;
+
+ if (attr->iter_create.flags)
+ return -EINVAL;
+
+ link = bpf_link_get_from_fd(attr->iter_create.link_fd);
+ if (IS_ERR(link))
+ return PTR_ERR(link);
+
+ err = bpf_iter_new_fd(link);
+ if (err < 0)
+ bpf_link_put(link);
+
+ return err;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr;
@@ -3813,6 +3837,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_LINK_UPDATE:
err = link_update(&attr);
break;
+ case BPF_ITER_CREATE:
+ err = bpf_iter_create(&attr);
+ break;
default:
err = -EINVAL;
break;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f39b9fec37ab..576651110d16 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -113,6 +113,7 @@ enum bpf_cmd {
BPF_MAP_DELETE_BATCH,
BPF_LINK_CREATE,
BPF_LINK_UPDATE,
+ BPF_ITER_CREATE,
};
enum bpf_map_type {
@@ -590,6 +591,11 @@ union bpf_attr {
__u32 old_prog_fd;
} link_update;
+ struct { /* struct used by BPF_ITER_CREATE command */
+ __u32 link_fd;
+ __u32 flags;
+ } iter_create;
+
} __attribute__((aligned(8)));
/* The description below is an attempt at providing documentation to eBPF
--
2.24.1
next prev parent reply other threads:[~2020-04-27 20:14 UTC|newest]
Thread overview: 85+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-04-27 20:12 [PATCH bpf-next v1 00/19] bpf: implement bpf iterator for kernel data Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 01/19] net: refactor net assignment for seq_net_private structure Yonghong Song
2020-04-29 5:38 ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 02/19] bpf: implement an interface to register bpf_iter targets Yonghong Song
2020-04-28 16:20 ` Martin KaFai Lau
2020-04-28 16:50 ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 03/19] bpf: add bpf_map iterator Yonghong Song
2020-04-29 0:37 ` Martin KaFai Lau
2020-04-29 0:48 ` Alexei Starovoitov
2020-04-29 1:15 ` Yonghong Song
2020-04-29 2:44 ` Alexei Starovoitov
2020-04-29 5:09 ` Yonghong Song
2020-04-29 6:08 ` Andrii Nakryiko
2020-04-29 6:20 ` Yonghong Song
2020-04-29 6:30 ` Alexei Starovoitov
2020-04-29 6:40 ` Andrii Nakryiko
2020-04-29 6:44 ` Yonghong Song
2020-04-29 15:34 ` Alexei Starovoitov
2020-04-29 18:14 ` Yonghong Song
2020-04-29 19:19 ` Andrii Nakryiko
2020-04-29 20:15 ` Yonghong Song
2020-04-30 3:06 ` Alexei Starovoitov
2020-04-30 4:01 ` Yonghong Song
2020-04-29 6:34 ` Martin KaFai Lau
2020-04-29 6:51 ` Yonghong Song
2020-04-29 19:25 ` Andrii Nakryiko
2020-04-29 1:02 ` Yonghong Song
2020-04-29 6:04 ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 04/19] bpf: allow loading of a bpf_iter program Yonghong Song
2020-04-29 0:54 ` Martin KaFai Lau
2020-04-29 1:27 ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 05/19] bpf: support bpf tracing/iter programs for BPF_LINK_CREATE Yonghong Song
2020-04-29 1:17 ` [Potential Spoof] " Martin KaFai Lau
2020-04-29 6:25 ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 06/19] bpf: support bpf tracing/iter programs for BPF_LINK_UPDATE Yonghong Song
2020-04-29 1:32 ` Martin KaFai Lau
2020-04-29 5:04 ` Yonghong Song
2020-04-29 5:58 ` Martin KaFai Lau
2020-04-29 6:32 ` Andrii Nakryiko
2020-04-29 6:41 ` Martin KaFai Lau
2020-04-27 20:12 ` Yonghong Song [this message]
2020-04-29 5:39 ` [PATCH bpf-next v1 07/19] bpf: create anonymous bpf iterator Martin KaFai Lau
2020-04-29 6:56 ` Andrii Nakryiko
2020-04-29 7:06 ` Yonghong Song
2020-04-29 18:16 ` Andrii Nakryiko
2020-04-29 18:46 ` Martin KaFai Lau
2020-04-29 19:20 ` Yonghong Song
2020-04-29 20:50 ` Martin KaFai Lau
2020-04-29 20:54 ` Yonghong Song
2020-04-29 19:39 ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 08/19] bpf: create file " Yonghong Song
2020-04-29 20:40 ` Andrii Nakryiko
2020-04-30 18:02 ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 09/19] bpf: add PTR_TO_BTF_ID_OR_NULL support Yonghong Song
2020-04-29 20:46 ` Andrii Nakryiko
2020-04-29 20:51 ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 10/19] bpf: add netlink and ipv6_route targets Yonghong Song
2020-04-28 19:49 ` kbuild test robot
2020-04-28 19:49 ` kbuild test robot
2020-04-28 19:50 ` [RFC PATCH] bpf: __bpf_iter__netlink() can be static kbuild test robot
2020-04-28 19:50 ` kbuild test robot
2020-04-27 20:12 ` [PATCH bpf-next v1 11/19] bpf: add task and task/file targets Yonghong Song
2020-04-30 2:08 ` Andrii Nakryiko
2020-05-01 17:23 ` Yonghong Song
2020-05-01 19:01 ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 12/19] bpf: add bpf_seq_printf and bpf_seq_write helpers Yonghong Song
2020-04-28 6:02 ` kbuild test robot
2020-04-28 6:02 ` kbuild test robot
2020-04-28 16:35 ` Yonghong Song
2020-04-28 16:35 ` Yonghong Song
2020-04-30 20:06 ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 13/19] bpf: handle spilled PTR_TO_BTF_ID properly when checking stack_boundary Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 14/19] bpf: support variable length array in tracing programs Yonghong Song
2020-04-30 20:04 ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 15/19] tools/libbpf: add bpf_iter support Yonghong Song
2020-04-30 1:41 ` Andrii Nakryiko
2020-05-02 7:17 ` Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 16/19] tools/bpftool: add bpf_iter support for bptool Yonghong Song
2020-04-28 9:27 ` Quentin Monnet
2020-04-28 17:35 ` Yonghong Song
2020-04-29 8:37 ` Quentin Monnet
2020-04-27 20:12 ` [PATCH bpf-next v1 17/19] tools/bpf: selftests: add iterator programs for ipv6_route and netlink Yonghong Song
2020-04-30 2:12 ` Andrii Nakryiko
2020-04-27 20:12 ` [PATCH bpf-next v1 18/19] tools/bpf: selftests: add iter progs for bpf_map/task/task_file Yonghong Song
2020-04-27 20:12 ` [PATCH bpf-next v1 19/19] tools/bpf: selftests: add bpf_iter selftests Yonghong Song
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200427201242.2995160-1-yhs@fb.com \
--to=yhs@fb.com \
--cc=andriin@fb.com \
--cc=ast@fb.com \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=kafai@fb.com \
--cc=kernel-team@fb.com \
--cc=netdev@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.