From: Lorenz Bauer <lmb@cloudflare.com>
To: ast@kernel.org, yhs@fb.com, daniel@iogearbox.net,
jakub@cloudflare.com, john.fastabend@gmail.com
Cc: bpf@vger.kernel.org, kernel-team@cloudflare.com,
Lorenz Bauer <lmb@cloudflare.com>
Subject: [PATCH bpf-next 1/3] net: Allow iterating sockmap and sockhash
Date: Fri, 28 Aug 2020 10:48:32 +0100 [thread overview]
Message-ID: <20200828094834.23290-2-lmb@cloudflare.com> (raw)
In-Reply-To: <20200828094834.23290-1-lmb@cloudflare.com>
Add bpf_iter support for sockmap / sockhash, based on the bpf_sk_storage and
hashtable implementation. sockmap and sockhash share the same iteration
context: a pointer to an arbitrary key and a pointer to a socket. Both
pointers may be NULL, and so BPF has to perform a NULL check before accessing
them. Technically it's not possible for sockhash iteration to yield a NULL
socket, but we ignore this to be able to use a single iteration point.
Iteration will visit all keys that remain unmodified during the lifetime of
the iterator. It may or may not visit newly added ones.
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
---
net/core/sock_map.c | 283 ++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 283 insertions(+)
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index d6c6e1e312fc..31c4332f06e4 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -703,6 +703,116 @@ const struct bpf_func_proto bpf_msg_redirect_map_proto = {
.arg4_type = ARG_ANYTHING,
};
+struct sock_map_seq_info {
+ struct bpf_map *map;
+ struct sock *sk;
+ u32 index;
+};
+
+struct bpf_iter__sockmap {
+ __bpf_md_ptr(struct bpf_iter_meta *, meta);
+ __bpf_md_ptr(struct bpf_map *, map);
+ __bpf_md_ptr(void *, key);
+ __bpf_md_ptr(struct bpf_sock *, sk);
+};
+
+DEFINE_BPF_ITER_FUNC(sockmap, struct bpf_iter_meta *meta,
+ struct bpf_map *map, void *key,
+ struct sock *sk)
+
+static void *sock_map_seq_lookup_elem(struct sock_map_seq_info *info)
+{
+ if (unlikely(info->index >= info->map->max_entries))
+ return NULL;
+
+ info->sk = __sock_map_lookup_elem(info->map, info->index);
+ if (!info->sk || !sk_fullsock(info->sk))
+ info->sk = NULL;
+
+ /* continue iterating */
+ return info;
+}
+
+static void *sock_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct sock_map_seq_info *info = seq->private;
+
+ if (*pos == 0)
+ ++*pos;
+
+ /* pairs with sock_map_seq_stop */
+ rcu_read_lock();
+ return sock_map_seq_lookup_elem(info);
+}
+
+static void *sock_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct sock_map_seq_info *info = seq->private;
+
+ ++*pos;
+ ++info->index;
+
+ return sock_map_seq_lookup_elem(info);
+}
+
+static int __sock_map_seq_show(struct seq_file *seq, void *v)
+{
+ struct sock_map_seq_info *info = seq->private;
+ struct bpf_iter__sockmap ctx = {};
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, !v);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (v) {
+ ctx.key = &info->index;
+ ctx.sk = (struct bpf_sock *)info->sk;
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int sock_map_seq_show(struct seq_file *seq, void *v)
+{
+ return __sock_map_seq_show(seq, v);
+}
+
+static void sock_map_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__sock_map_seq_show(seq, NULL);
+
+ /* pairs with sock_map_seq_start */
+ rcu_read_unlock();
+}
+
+static const struct seq_operations sock_map_seq_ops = {
+ .start = sock_map_seq_start,
+ .next = sock_map_seq_next,
+ .stop = sock_map_seq_stop,
+ .show = sock_map_seq_show,
+};
+
+static int sock_map_init_seq_private(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct sock_map_seq_info *info = priv_data;
+
+ info->map = aux->map;
+ return 0;
+}
+
+static const struct bpf_iter_seq_info sock_map_iter_seq_info = {
+ .seq_ops = &sock_map_seq_ops,
+ .init_seq_private = sock_map_init_seq_private,
+ .seq_priv_size = sizeof(struct sock_map_seq_info),
+};
+
static int sock_map_btf_id;
const struct bpf_map_ops sock_map_ops = {
.map_alloc = sock_map_alloc,
@@ -716,6 +826,7 @@ const struct bpf_map_ops sock_map_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_stab",
.map_btf_id = &sock_map_btf_id,
+ .iter_seq_info = &sock_map_iter_seq_info,
};
struct bpf_shtab_elem {
@@ -1198,6 +1309,120 @@ const struct bpf_func_proto bpf_msg_redirect_hash_proto = {
.arg4_type = ARG_ANYTHING,
};
+struct sock_hash_seq_info {
+ struct bpf_map *map;
+ struct bpf_shtab *htab;
+ u32 bucket_id;
+};
+
+static void *sock_hash_seq_find_next(struct sock_hash_seq_info *info,
+ struct bpf_shtab_elem *prev_elem)
+{
+ const struct bpf_shtab *htab = info->htab;
+ struct bpf_shtab_bucket *bucket;
+ struct bpf_shtab_elem *elem;
+ struct hlist_node *node;
+
+ /* try to find next elem in the same bucket */
+ if (prev_elem) {
+ node = rcu_dereference_raw(hlist_next_rcu(&prev_elem->node));
+ elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
+ if (elem)
+ return elem;
+
+ /* no more elements, continue in the next bucket */
+ info->bucket_id++;
+ }
+
+ for (; info->bucket_id < htab->buckets_num; info->bucket_id++) {
+ bucket = &htab->buckets[info->bucket_id];
+ node = rcu_dereference_raw(hlist_first_rcu(&bucket->head));
+ elem = hlist_entry_safe(node, struct bpf_shtab_elem, node);
+ if (elem)
+ return elem;
+ }
+
+ return NULL;
+}
+
+static void *sock_hash_seq_start(struct seq_file *seq, loff_t *pos)
+{
+ struct sock_hash_seq_info *info = seq->private;
+
+ if (*pos == 0)
+ ++*pos;
+
+ /* pairs with sock_hash_seq_stop */
+ rcu_read_lock();
+ return sock_hash_seq_find_next(info, NULL);
+}
+
+static void *sock_hash_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+ struct sock_hash_seq_info *info = seq->private;
+
+ ++*pos;
+ return sock_hash_seq_find_next(info, v);
+}
+
+static int __sock_hash_seq_show(struct seq_file *seq, struct bpf_shtab_elem *elem)
+{
+ struct sock_hash_seq_info *info = seq->private;
+ struct bpf_iter__sockmap ctx = {};
+ struct bpf_iter_meta meta;
+ struct bpf_prog *prog;
+
+ meta.seq = seq;
+ prog = bpf_iter_get_info(&meta, !elem);
+ if (!prog)
+ return 0;
+
+ ctx.meta = &meta;
+ ctx.map = info->map;
+ if (elem) {
+ ctx.key = elem->key;
+ ctx.sk = (struct bpf_sock *)elem->sk;
+ }
+
+ return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int sock_hash_seq_show(struct seq_file *seq, void *v)
+{
+ return __sock_hash_seq_show(seq, v);
+}
+
+static void sock_hash_seq_stop(struct seq_file *seq, void *v)
+{
+ if (!v)
+ (void)__sock_hash_seq_show(seq, NULL);
+
+ /* pairs with sock_hash_seq_start */
+ rcu_read_unlock();
+}
+
+static const struct seq_operations sock_hash_seq_ops = {
+ .start = sock_hash_seq_start,
+ .next = sock_hash_seq_next,
+ .stop = sock_hash_seq_stop,
+ .show = sock_hash_seq_show,
+};
+
+static int sock_hash_init_seq_private(void *priv_data,
+ struct bpf_iter_aux_info *aux)
+{
+ struct sock_hash_seq_info *info = priv_data;
+
+ info->map = aux->map;
+ return 0;
+}
+
+static const struct bpf_iter_seq_info sock_hash_iter_seq_info = {
+ .seq_ops = &sock_hash_seq_ops,
+ .init_seq_private = sock_hash_init_seq_private,
+ .seq_priv_size = sizeof(struct sock_hash_seq_info),
+};
+
static int sock_hash_map_btf_id;
const struct bpf_map_ops sock_hash_ops = {
.map_alloc = sock_hash_alloc,
@@ -1211,6 +1436,7 @@ const struct bpf_map_ops sock_hash_ops = {
.map_check_btf = map_check_no_btf,
.map_btf_name = "bpf_shtab",
.map_btf_id = &sock_hash_map_btf_id,
+ .iter_seq_info = &sock_hash_iter_seq_info,
};
static struct sk_psock_progs *sock_map_progs(struct bpf_map *map)
@@ -1321,3 +1547,60 @@ void sock_map_close(struct sock *sk, long timeout)
release_sock(sk);
saved_close(sk, timeout);
}
+
+static int sock_map_iter_attach_target(struct bpf_prog *prog,
+ union bpf_iter_link_info *linfo,
+ struct bpf_iter_aux_info *aux)
+{
+ struct bpf_map *map;
+ int err = -EINVAL;
+
+ if (!linfo->map.map_fd)
+ return -EBADF;
+
+ map = bpf_map_get_with_uref(linfo->map.map_fd);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ if (map->map_type != BPF_MAP_TYPE_SOCKMAP)
+ goto put_map;
+
+ if (prog->aux->max_rdonly_access > map->key_size) {
+ err = -EACCES;
+ goto put_map;
+ }
+
+ aux->map = map;
+ return 0;
+
+put_map:
+ bpf_map_put_with_uref(map);
+ return err;
+}
+
+static void sock_map_iter_detach_target(struct bpf_iter_aux_info *aux)
+{
+ bpf_map_put_with_uref(aux->map);
+}
+
+static struct bpf_iter_reg sock_map_iter_reg = {
+ .target = "sockmap",
+ .attach_target = sock_map_iter_attach_target,
+ .detach_target = sock_map_iter_detach_target,
+ .show_fdinfo = bpf_iter_map_show_fdinfo,
+ .fill_link_info = bpf_iter_map_fill_link_info,
+ .ctx_arg_info_size = 2,
+ .ctx_arg_info = {
+ { offsetof(struct bpf_iter__sockmap, key),
+ PTR_TO_RDONLY_BUF_OR_NULL },
+ { offsetof(struct bpf_iter__sockmap, sk),
+ PTR_TO_SOCKET_OR_NULL },
+ },
+ .seq_info = &sock_map_iter_seq_info,
+};
+
+static int __init bpf_sockmap_iter_init(void)
+{
+ return bpf_iter_reg_target(&sock_map_iter_reg);
+}
+late_initcall(bpf_sockmap_iter_init);
--
2.25.1
next prev parent reply other threads:[~2020-08-28 9:49 UTC|newest]
Thread overview: 11+ messages / expand[flat|nested] mbox.gz Atom feed top
2020-08-28 9:48 [PATCH bpf-next 0/3] Sockmap iterator Lorenz Bauer
2020-08-28 9:48 ` Lorenz Bauer [this message]
2020-08-31 10:03 ` [PATCH bpf-next 1/3] net: Allow iterating sockmap and sockhash Jakub Sitnicki
2020-09-01 8:59 ` Lorenz Bauer
2020-08-28 9:48 ` [PATCH bpf-next 2/3] selftests: bpf: Add helper to compare socket cookies Lorenz Bauer
2020-08-28 10:50 ` Jakub Sitnicki
2020-08-28 15:48 ` Lorenz Bauer
2020-08-28 11:28 ` Jakub Sitnicki
2020-08-28 9:48 ` [PATCH bpf-next 3/3] selftests: bpf: Test copying a sockmap via bpf_iter Lorenz Bauer
2020-08-31 10:58 ` Jakub Sitnicki
2020-09-01 9:20 ` Lorenz Bauer
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20200828094834.23290-2-lmb@cloudflare.com \
--to=lmb@cloudflare.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=jakub@cloudflare.com \
--cc=john.fastabend@gmail.com \
--cc=kernel-team@cloudflare.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox