From: Alexei Starovoitov <ast@plumgrid.com>
To: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>,
Linus Torvalds <torvalds@linux-foundation.org>,
Andy Lutomirski <luto@amacapital.net>,
Daniel Borkmann <dborkman@redhat.com>,
Hannes Frederic Sowa <hannes@stressinduktion.org>,
Chema Gonzalez <chema@google.com>,
Eric Dumazet <edumazet@google.com>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Pablo Neira Ayuso <pablo@netfilter.org>,
"H. Peter Anvin" <hpa@zytor.com>,
Andrew Morton <akpm@linux-foundation.org>,
Kees Cook <keescook@chromium.org>,
linux-api@vger.kernel.org, netdev@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: [PATCH v14 net-next 03/11] bpf: add lookup/update/delete/iterate methods to BPF maps
Date: Sun, 21 Sep 2014 17:06:43 -0700 [thread overview]
Message-ID: <1411344411-3824-4-git-send-email-ast@plumgrid.com> (raw)
In-Reply-To: <1411344411-3824-1-git-send-email-ast@plumgrid.com>
'maps' is a generic storage of different types for sharing data between kernel
and userspace.
The maps are accessed from user space via BPF syscall, which has commands:
- create a map with given type and attributes
fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
returns fd or negative error
- lookup key in a given map referenced by fd
err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->value
returns zero and stores found elem into value or negative error
- create or update key/value pair in a given map
err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->value
returns zero or negative error
- find and delete element by key in a given map
err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key
- iterate map elements (based on input key return next_key)
err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
using attr->map_fd, attr->key, attr->next_key
- close(fd) deletes the map
Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
---
include/linux/bpf.h | 8 ++
include/uapi/linux/bpf.h | 38 ++++++++
kernel/bpf/syscall.c | 235 ++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 281 insertions(+)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 48014a71f0fe..2887f3f9da59 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -9,6 +9,7 @@
#include <uapi/linux/bpf.h>
#include <linux/workqueue.h>
+#include <linux/file.h>
struct bpf_map;
@@ -17,6 +18,12 @@ struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
void (*map_free)(struct bpf_map *);
+ int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+
+ /* funcs callable from userspace and from eBPF programs */
+ void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+ int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+ int (*map_delete_elem)(struct bpf_map *map, void *key);
};
struct bpf_map {
@@ -37,5 +44,6 @@ struct bpf_map_type_list {
void bpf_register_map_type(struct bpf_map_type_list *tl);
void bpf_map_put(struct bpf_map *map);
+struct bpf_map *bpf_map_get(struct fd f);
#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f58a10f9670c..395cabd2ca0a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -70,6 +70,35 @@ enum bpf_cmd {
* map is deleted when fd is closed
*/
BPF_MAP_CREATE,
+
+ /* lookup key in a given map
+ * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->value
+ * returns zero and stores found elem into value
+ * or negative error
+ */
+ BPF_MAP_LOOKUP_ELEM,
+
+ /* create or update key/value pair in a given map
+ * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->value
+ * returns zero or negative error
+ */
+ BPF_MAP_UPDATE_ELEM,
+
+ /* find and delete elem by key in a given map
+ * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key
+ * returns zero or negative error
+ */
+ BPF_MAP_DELETE_ELEM,
+
+ /* lookup key in a given map and return next key
+ * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
+ * Using attr->map_fd, attr->key, attr->next_key
+ * returns zero and stores next key or negative error
+ */
+ BPF_MAP_GET_NEXT_KEY,
};
enum bpf_map_type {
@@ -83,6 +112,15 @@ union bpf_attr {
__u32 value_size; /* size of value in bytes */
__u32 max_entries; /* max number of entries in a map */
};
+
+ struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+ __u32 map_fd;
+ __aligned_u64 key;
+ union {
+ __aligned_u64 value;
+ __aligned_u64 next_key;
+ };
+ };
} __attribute__((aligned(8)));
#endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 428a0e23adc0..f94349ecaf61 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@
#include <linux/syscalls.h>
#include <linux/slab.h>
#include <linux/anon_inodes.h>
+#include <linux/file.h>
static LIST_HEAD(bpf_map_types);
@@ -111,6 +112,228 @@ free_map:
return err;
}
+/* if error is returned, fd is released.
+ * On success caller should complete fd access with matching fdput()
+ */
+struct bpf_map *bpf_map_get(struct fd f)
+{
+ struct bpf_map *map;
+
+ if (!f.file)
+ return ERR_PTR(-EBADF);
+
+ if (f.file->f_op != &bpf_map_fops) {
+ fdput(f);
+ return ERR_PTR(-EINVAL);
+ }
+
+ map = f.file->private_data;
+
+ return map;
+}
+
+/* helper to convert user pointers passed inside __aligned_u64 fields */
+static void __user *u64_to_ptr(__u64 val)
+{
+ return (void __user *) (unsigned long) val;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
+
+static int map_lookup_elem(union bpf_attr *attr)
+{
+ void __user *ukey = u64_to_ptr(attr->key);
+ void __user *uvalue = u64_to_ptr(attr->value);
+ int ufd = attr->map_fd;
+ struct fd f = fdget(ufd);
+ struct bpf_map *map;
+ void *key, *value;
+ int err;
+
+ if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+ return -EINVAL;
+
+ map = bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ err = -ENOMEM;
+ key = kmalloc(map->key_size, GFP_USER);
+ if (!key)
+ goto err_put;
+
+ err = -EFAULT;
+ if (copy_from_user(key, ukey, map->key_size) != 0)
+ goto free_key;
+
+ err = -ESRCH;
+ rcu_read_lock();
+ value = map->ops->map_lookup_elem(map, key);
+ if (!value)
+ goto err_unlock;
+
+ err = -EFAULT;
+ if (copy_to_user(uvalue, value, map->value_size) != 0)
+ goto err_unlock;
+
+ err = 0;
+
+err_unlock:
+ rcu_read_unlock();
+free_key:
+ kfree(key);
+err_put:
+ fdput(f);
+ return err;
+}
+
+#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
+
+static int map_update_elem(union bpf_attr *attr)
+{
+ void __user *ukey = u64_to_ptr(attr->key);
+ void __user *uvalue = u64_to_ptr(attr->value);
+ int ufd = attr->map_fd;
+ struct fd f = fdget(ufd);
+ struct bpf_map *map;
+ void *key, *value;
+ int err;
+
+ if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
+ return -EINVAL;
+
+ map = bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ err = -ENOMEM;
+ key = kmalloc(map->key_size, GFP_USER);
+ if (!key)
+ goto err_put;
+
+ err = -EFAULT;
+ if (copy_from_user(key, ukey, map->key_size) != 0)
+ goto free_key;
+
+ err = -ENOMEM;
+ value = kmalloc(map->value_size, GFP_USER);
+ if (!value)
+ goto free_key;
+
+ err = -EFAULT;
+ if (copy_from_user(value, uvalue, map->value_size) != 0)
+ goto free_value;
+
+ /* eBPF program that use maps are running under rcu_read_lock(),
+ * therefore all map accessors rely on this fact, so do the same here
+ */
+ rcu_read_lock();
+ err = map->ops->map_update_elem(map, key, value);
+ rcu_read_unlock();
+
+free_value:
+ kfree(value);
+free_key:
+ kfree(key);
+err_put:
+ fdput(f);
+ return err;
+}
+
+#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
+
+static int map_delete_elem(union bpf_attr *attr)
+{
+ void __user *ukey = u64_to_ptr(attr->key);
+ int ufd = attr->map_fd;
+ struct fd f = fdget(ufd);
+ struct bpf_map *map;
+ void *key;
+ int err;
+
+ if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
+ return -EINVAL;
+
+ map = bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ err = -ENOMEM;
+ key = kmalloc(map->key_size, GFP_USER);
+ if (!key)
+ goto err_put;
+
+ err = -EFAULT;
+ if (copy_from_user(key, ukey, map->key_size) != 0)
+ goto free_key;
+
+ rcu_read_lock();
+ err = map->ops->map_delete_elem(map, key);
+ rcu_read_unlock();
+
+free_key:
+ kfree(key);
+err_put:
+ fdput(f);
+ return err;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
+
+static int map_get_next_key(union bpf_attr *attr)
+{
+ void __user *ukey = u64_to_ptr(attr->key);
+ void __user *unext_key = u64_to_ptr(attr->next_key);
+ int ufd = attr->map_fd;
+ struct fd f = fdget(ufd);
+ struct bpf_map *map;
+ void *key, *next_key;
+ int err;
+
+ if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
+ return -EINVAL;
+
+ map = bpf_map_get(f);
+ if (IS_ERR(map))
+ return PTR_ERR(map);
+
+ err = -ENOMEM;
+ key = kmalloc(map->key_size, GFP_USER);
+ if (!key)
+ goto err_put;
+
+ err = -EFAULT;
+ if (copy_from_user(key, ukey, map->key_size) != 0)
+ goto free_key;
+
+ err = -ENOMEM;
+ next_key = kmalloc(map->key_size, GFP_USER);
+ if (!next_key)
+ goto free_key;
+
+ rcu_read_lock();
+ err = map->ops->map_get_next_key(map, key, next_key);
+ rcu_read_unlock();
+ if (err)
+ goto free_next_key;
+
+ err = -EFAULT;
+ if (copy_to_user(unext_key, next_key, map->key_size) != 0)
+ goto free_next_key;
+
+ err = 0;
+
+free_next_key:
+ kfree(next_key);
+free_key:
+ kfree(key);
+err_put:
+ fdput(f);
+ return err;
+}
+
SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
{
union bpf_attr attr = {};
@@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
case BPF_MAP_CREATE:
err = map_create(&attr);
break;
+ case BPF_MAP_LOOKUP_ELEM:
+ err = map_lookup_elem(&attr);
+ break;
+ case BPF_MAP_UPDATE_ELEM:
+ err = map_update_elem(&attr);
+ break;
+ case BPF_MAP_DELETE_ELEM:
+ err = map_delete_elem(&attr);
+ break;
+ case BPF_MAP_GET_NEXT_KEY:
+ err = map_get_next_key(&attr);
+ break;
default:
err = -EINVAL;
break;
--
1.7.9.5
next prev parent reply other threads:[~2014-09-22 0:06 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2014-09-22 0:06 [PATCH v14 net-next 00/11] eBPF syscall, verifier, testsuite Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 01/11] bpf: introduce BPF syscall and maps Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 02/11] bpf: enable bpf syscall on x64 and i386 Alexei Starovoitov
2014-09-22 0:06 ` Alexei Starovoitov [this message]
2014-09-22 0:06 ` [PATCH v14 net-next 04/11] bpf: expand BPF syscall with program load/unload Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 05/11] bpf: handle pseudo BPF_CALL insn Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 06/11] bpf: verifier (add docs) Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 07/11] bpf: verifier (add ability to receive verification log) Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 08/11] bpf: handle pseudo BPF_LD_IMM64 insn Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 09/11] bpf: verifier (add branch/goto checks) Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 10/11] bpf: verifier (add verifier core) Alexei Starovoitov
[not found] ` <1411344411-3824-11-git-send-email-ast-uqk4Ao+rVK5Wk0Htik3J/w@public.gmane.org>
2014-09-26 4:40 ` David Miller
[not found] ` <20140926.004024.1179261758092351350.davem-fT/PcQaiUtIeIZ0/mPfg9Q@public.gmane.org>
2014-09-26 5:11 ` Alexei Starovoitov
2014-09-22 0:06 ` [PATCH v14 net-next 11/11] bpf: mini eBPF library, test stubs and verifier testsuite Alexei Starovoitov
[not found] ` <1411344411-3824-1-git-send-email-ast-uqk4Ao+rVK5Wk0Htik3J/w@public.gmane.org>
2014-09-22 19:57 ` [PATCH v14 net-next 00/11] eBPF syscall, verifier, testsuite Daniel Borkmann
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1411344411-3824-4-git-send-email-ast@plumgrid.com \
--to=ast@plumgrid.com \
--cc=a.p.zijlstra@chello.nl \
--cc=akpm@linux-foundation.org \
--cc=chema@google.com \
--cc=davem@davemloft.net \
--cc=dborkman@redhat.com \
--cc=edumazet@google.com \
--cc=hannes@stressinduktion.org \
--cc=hpa@zytor.com \
--cc=keescook@chromium.org \
--cc=linux-api@vger.kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=luto@amacapital.net \
--cc=mingo@kernel.org \
--cc=netdev@vger.kernel.org \
--cc=pablo@netfilter.org \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).