From: Daniel Borkmann <daniel@iogearbox.net>
To: Kaixu Xia <xiakaixu@huawei.com>,
ast@plumgrid.com, davem@davemloft.net, acme@kernel.org,
mingo@redhat.com, a.p.zijlstra@chello.nl,
masami.hiramatsu.pt@hitachi.com, jolsa@kernel.org
Cc: wangnan0@huawei.com, linux-kernel@vger.kernel.org,
pi3orama@163.com, hekuang@huawei.com
Subject: Re: [PATCH v4 1/4] bpf: Make the bpf_prog_array_map more generic
Date: Thu, 30 Jul 2015 01:17:26 +0200 [thread overview]
Message-ID: <55B95F06.4040800@iogearbox.net> (raw)
In-Reply-To: <1438082255-60683-2-git-send-email-xiakaixu@huawei.com>
On 07/28/2015 01:17 PM, Kaixu Xia wrote:
> From: Wang Nan <wangnan0@huawei.com>
>
> According to the comments from Daniel, rewrite part of
> the bpf_prog_array map code and make it more generic.
> So the new perf_event_array map type can reuse most of
> code with bpf_prog_array map and add fewer lines of
> special code.
>
> Tested the samples/bpf/tracex5 after this patch:
> $ sudo ./tracex5
> ...
> dd-1051 [000] d... 26.682903: : mmap
> dd-1051 [000] d... 26.698348: : syscall=102 (one of get/set uid/pid/gid)
> dd-1051 [000] d... 26.703892: : read(fd=0, buf=000000000078c010, size=512)
> dd-1051 [000] d... 26.705847: : write(fd=1, buf=000000000078c010, size=512)
> dd-1051 [000] d... 26.707914: : read(fd=0, buf=000000000078c010, size=512)
> dd-1051 [000] d... 26.710988: : write(fd=1, buf=000000000078c010, size=512)
> dd-1051 [000] d... 26.711865: : read(fd=0, buf=000000000078c010, size=512)
> dd-1051 [000] d... 26.712704: : write(fd=1, buf=000000000078c010, size=512)
> ...
>
> Signed-off-by: Wang Nan <wangnan0@huawei.com>
> Signed-off-by: Kaixu Xia <xiakaixu@huawei.com>
> ---
> include/linux/bpf.h | 6 ++-
> kernel/bpf/arraymap.c | 104 +++++++++++++++++++++++++++++++++++---------------
> kernel/bpf/syscall.c | 4 +-
> 3 files changed, 80 insertions(+), 34 deletions(-)
>
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4383476..610b730 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -130,6 +130,8 @@ struct bpf_prog_aux {
> };
> };
>
> +struct fd_array_map_ops;
> +
> struct bpf_array {
> struct bpf_map map;
> u32 elem_size;
> @@ -140,15 +142,17 @@ struct bpf_array {
> */
> enum bpf_prog_type owner_prog_type;
> bool owner_jited;
> + const struct fd_array_map_ops* fd_ops;
> union {
> char value[0] __aligned(8);
> + void *ptrs[0] __aligned(8);
> struct bpf_prog *prog[0] __aligned(8);
After your conversion, prog member from the union is not used here anymore
(only as offsetof(...) in JITs). We should probably get rid of it then.
> };
> };
> #define MAX_TAIL_CALL_CNT 32
>
> u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
> -void bpf_prog_array_map_clear(struct bpf_map *map);
> +void bpf_fd_array_map_clear(struct bpf_map *map);
> bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
> const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
>
> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
> index cb31229..4784cdc 100644
> --- a/kernel/bpf/arraymap.c
> +++ b/kernel/bpf/arraymap.c
> @@ -150,15 +150,62 @@ static int __init register_array_map(void)
> }
> late_initcall(register_array_map);
>
> -static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
> +struct fd_array_map_ops {
> + void *(*get_ptr)(struct bpf_array *array, int fd);
> + void (*put_ptr)(struct bpf_array *array, void *ptr);
> +};
> +
> +static void *prog_fd_array_get_ptr(struct bpf_array *array, int fd)
> +{
> + struct bpf_prog *prog = bpf_prog_get(fd);
> + if (IS_ERR(prog))
> + return prog;
> +
> + if (!bpf_prog_array_compatible(array, prog)) {
> + bpf_prog_put(prog);
> + return ERR_PTR(-EINVAL);
> + }
> + return prog;
> +}
> +
> +static void prog_fd_array_put_ptr(struct bpf_array *array __maybe_unused,
> + void *ptr)
array member seems not to be used in both implementations. It should then
probably not be part of the API?
> +{
> + struct bpf_prog *prog = (struct bpf_prog *)ptr;
No cast on void * needed.
> +
> + bpf_prog_put_rcu(prog);
> +}
> +
> +static const struct fd_array_map_ops prog_fd_array_map_ops = {
> + .get_ptr = prog_fd_array_get_ptr,
> + .put_ptr = prog_fd_array_put_ptr,
> +};
> +
> +static struct bpf_map *fd_array_map_alloc(union bpf_attr *attr,
> + const struct fd_array_map_ops *ops)
> {
> - /* only bpf_prog file descriptors can be stored in prog_array map */
> + struct bpf_map *map;
> + struct bpf_array *array;
> +
> + /* only file descriptors can be stored in this type of map */
> if (attr->value_size != sizeof(u32))
> return ERR_PTR(-EINVAL);
> - return array_map_alloc(attr);
> +
> + map = array_map_alloc(attr);
> + if (IS_ERR(map))
> + return map;
> +
> + array = container_of(map, struct bpf_array, map);
> + array->fd_ops = ops;
> + return map;
> +}
> +
> +static struct bpf_map *prog_array_map_alloc(union bpf_attr *attr)
> +{
> + return fd_array_map_alloc(attr, &prog_fd_array_map_ops);
> }
>
> -static void prog_array_map_free(struct bpf_map *map)
> +static void fd_array_map_free(struct bpf_map *map)
> {
> struct bpf_array *array = container_of(map, struct bpf_array, map);
> int i;
> @@ -167,21 +214,21 @@ static void prog_array_map_free(struct bpf_map *map)
>
> /* make sure it's empty */
> for (i = 0; i < array->map.max_entries; i++)
> - BUG_ON(array->prog[i] != NULL);
> + BUG_ON(array->ptrs[i] != NULL);
> kvfree(array);
> }
>
> -static void *prog_array_map_lookup_elem(struct bpf_map *map, void *key)
> +static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
> {
> return NULL;
> }
>
> /* only called from syscall */
> -static int prog_array_map_update_elem(struct bpf_map *map, void *key,
> - void *value, u64 map_flags)
> +static int fd_array_map_update_elem(struct bpf_map *map, void *key,
> + void *value, u64 map_flags)
> {
> struct bpf_array *array = container_of(map, struct bpf_array, map);
> - struct bpf_prog *prog, *old_prog;
> + void *new_ptr, *old_ptr;
> u32 index = *(u32 *)key, ufd;
>
> if (map_flags != BPF_ANY)
> @@ -191,34 +238,29 @@ static int prog_array_map_update_elem(struct bpf_map *map, void *key,
> return -E2BIG;
>
> ufd = *(u32 *)value;
> - prog = bpf_prog_get(ufd);
> - if (IS_ERR(prog))
> - return PTR_ERR(prog);
> -
> - if (!bpf_prog_array_compatible(array, prog)) {
> - bpf_prog_put(prog);
> - return -EINVAL;
> - }
> + new_ptr = array->fd_ops->get_ptr(array, ufd);
> + if (IS_ERR(new_ptr))
> + return PTR_ERR(new_ptr);
>
> - old_prog = xchg(array->prog + index, prog);
> - if (old_prog)
> - bpf_prog_put_rcu(old_prog);
> + old_ptr = xchg(array->ptrs + index, new_ptr);
> + if (old_ptr)
> + array->fd_ops->put_ptr(array, old_ptr);
>
> return 0;
> }
>
> -static int prog_array_map_delete_elem(struct bpf_map *map, void *key)
> +static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
> {
> struct bpf_array *array = container_of(map, struct bpf_array, map);
> - struct bpf_prog *old_prog;
> + void *old_ptr;
> u32 index = *(u32 *)key;
>
> if (index >= array->map.max_entries)
> return -E2BIG;
>
> - old_prog = xchg(array->prog + index, NULL);
> - if (old_prog) {
> - bpf_prog_put_rcu(old_prog);
> + old_ptr = xchg(array->ptrs + index, NULL);
> + if (old_ptr) {
> + array->fd_ops->put_ptr(array, old_ptr);
> return 0;
> } else {
> return -ENOENT;
> @@ -226,22 +268,22 @@ static int prog_array_map_delete_elem(struct bpf_map *map, void *key)
> }
>
> /* decrement refcnt of all bpf_progs that are stored in this map */
> -void bpf_prog_array_map_clear(struct bpf_map *map)
> +void bpf_fd_array_map_clear(struct bpf_map *map)
> {
> struct bpf_array *array = container_of(map, struct bpf_array, map);
> int i;
>
> for (i = 0; i < array->map.max_entries; i++)
> - prog_array_map_delete_elem(map, &i);
> + fd_array_map_delete_elem(map, &i);
> }
>
> static const struct bpf_map_ops prog_array_ops = {
> .map_alloc = prog_array_map_alloc,
> - .map_free = prog_array_map_free,
> + .map_free = fd_array_map_free,
> .map_get_next_key = array_map_get_next_key,
> - .map_lookup_elem = prog_array_map_lookup_elem,
> - .map_update_elem = prog_array_map_update_elem,
> - .map_delete_elem = prog_array_map_delete_elem,
> + .map_lookup_elem = fd_array_map_lookup_elem,
> + .map_update_elem = fd_array_map_update_elem,
> + .map_delete_elem = fd_array_map_delete_elem,
I'm wondering if we should move fd_ops actually into bpf_map? Seems like
then both only differ in get_ptr/put_ptr and could also reuse the same
bpf_map_ops structure?
> };
>
> static struct bpf_map_type_list prog_array_type __read_mostly = {
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index a1b14d1..de2dcc2 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -68,11 +68,11 @@ static int bpf_map_release(struct inode *inode, struct file *filp)
> {
> struct bpf_map *map = filp->private_data;
>
> - if (map->map_type == BPF_MAP_TYPE_PROG_ARRAY)
> + if (map->map_type >= BPF_MAP_TYPE_PROG_ARRAY)
> /* prog_array stores refcnt-ed bpf_prog pointers
> * release them all when user space closes prog_array_fd
> */
> - bpf_prog_array_map_clear(map);
> + bpf_fd_array_map_clear(map);
When we are going to add a new map type to the eBPF framework that is not
an fd_array_map thing, this assumption of map->map_type >= BPF_MAP_TYPE_PROG_ARRAY
might not hold then ...
> bpf_map_put(map);
> return 0;
>
next prev parent reply other threads:[~2015-07-29 23:17 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-07-28 11:17 [PATCH v4 0/4] bpf: Introduce the new ability of eBPF programs to access hardware PMU counter Kaixu Xia
2015-07-28 11:17 ` [PATCH v4 1/4] bpf: Make the bpf_prog_array_map more generic Kaixu Xia
2015-07-29 23:17 ` Daniel Borkmann [this message]
2015-07-30 1:44 ` Alexei Starovoitov
2015-07-31 8:50 ` xiakaixu
2015-07-31 15:46 ` Alexei Starovoitov
2015-07-28 11:17 ` [PATCH v4 2/4] bpf: Add new bpf map type to store the pointer to struct perf_event Kaixu Xia
2015-07-29 23:30 ` Daniel Borkmann
2015-07-30 1:45 ` Alexei Starovoitov
2015-07-28 11:17 ` [PATCH v4 3/4] bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter Kaixu Xia
2015-07-29 23:51 ` Daniel Borkmann
2015-07-28 11:17 ` [PATCH v4 4/4] samples/bpf: example of get selected PMU counter value Kaixu Xia
2015-07-29 23:56 ` Daniel Borkmann
2015-07-30 0:08 ` [PATCH v4 0/4] bpf: Introduce the new ability of eBPF programs to access hardware PMU counter Daniel Borkmann
2015-07-30 1:50 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55B95F06.4040800@iogearbox.net \
--to=daniel@iogearbox.net \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@kernel.org \
--cc=ast@plumgrid.com \
--cc=davem@davemloft.net \
--cc=hekuang@huawei.com \
--cc=jolsa@kernel.org \
--cc=linux-kernel@vger.kernel.org \
--cc=masami.hiramatsu.pt@hitachi.com \
--cc=mingo@redhat.com \
--cc=pi3orama@163.com \
--cc=wangnan0@huawei.com \
--cc=xiakaixu@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.