From mboxrd@z Thu Jan 1 00:00:00 1970 From: Yonghong Song Subject: Re: [PATCH bpf] bpf: fix bpffs non-array map seq_show issue Date: Thu, 9 Aug 2018 10:54:12 -0700 Message-ID: References: <20180809012519.3534824-1-yhs@fb.com> <20180809022530.yprizizentv7frmt@ast-mbp> <9b44c0d6-d236-75ed-abb6-2d945ceb6b81@iogearbox.net> <99a17600-57c8-06fc-c555-83dc8793f174@fb.com> <2adc84c4-2e92-db75-ba27-7e23eb9c95ad@iogearbox.net> Mime-Version: 1.0 Content-Type: text/plain; charset="utf-8"; format=flowed Content-Transfer-Encoding: 8bit Cc: , , , To: Daniel Borkmann , Alexei Starovoitov Return-path: Received: from mx0b-00082601.pphosted.com ([67.231.153.30]:59032 "EHLO mx0a-00082601.pphosted.com" rhost-flags-OK-OK-OK-FAIL) by vger.kernel.org with ESMTP id S1726827AbeHIUUo (ORCPT ); Thu, 9 Aug 2018 16:20:44 -0400 In-Reply-To: <2adc84c4-2e92-db75-ba27-7e23eb9c95ad@iogearbox.net> Content-Language: en-US Sender: netdev-owner@vger.kernel.org List-ID: On 8/9/18 10:02 AM, Daniel Borkmann wrote: > On 08/09/2018 06:55 PM, Yonghong Song wrote: >> On 8/9/18 8:59 AM, Daniel Borkmann wrote: >>> On 08/09/2018 05:15 PM, Yonghong Song wrote: >>>> On 8/9/18 7:24 AM, Daniel Borkmann wrote: >>>>> On 08/09/2018 05:55 AM, Yonghong Song wrote: >>>>>> On 8/8/18 7:25 PM, Alexei Starovoitov wrote: >>>>>>> On Wed, Aug 08, 2018 at 06:25:19PM -0700, Yonghong Song wrote: >>>>>>>> In function map_seq_next() of kernel/bpf/inode.c, >>>>>>>> the first key will be the "0" regardless of the map type. >>>>>>>> This works for array. But for hash type, if it happens >>>>>>>> key "0" is in the map, the bpffs map show will miss >>>>>>>> some items if the key "0" is not the first element of >>>>>>>> the first bucket. >>>>>>>> >>>>>>>> This patch fixed the issue by guaranteeing to get >>>>>>>> the first element, if the seq_show is just started, >>>>>>>> by passing NULL pointer key to map_get_next_key() callback. >>>>>>>> This way, no missing elements will occur for >>>>>>>> bpffs hash table show even if key "0" is in the map. >>>>>> >>>>>> Currently, map_seq_show_elem callback is only implemented >>>>>> for arraymap. So the problem actually is not exposed. >>>>>> >>>>>> The issue is discovered when I tried to implement >>>>>> map_seq_show_elem for hash maps, and I will have followup >>>>>> patches for it. >>> >>> Btw, on that note, I would also prefer if we could decouple >>> BTF from the map_seq_show_elem() as there is really no reason >>> to have it on a per-map. I had a patch below which would enable >>> it for all map types generically, and bpftool works out of the >>> box for it. Also, array doesn't really have to be 'int' type >>> enforced as long as it's some data structure with 4 bytes, it's >>> all fine, so this can be made fully generic (we only eventually >>> care about the match in size). >> >> I agree with a generic map_check_btf as mostly we only care about size >> and this change should enable btftool btf based pretty print for hash/lru_hash tables. > > Yep, agree, the below output from bpftool is from test_xdp_noinline.o > where both work with it. > >>>  From 0a8be27cbc2ac0c6fc2632865b5afe37222a1fc7 Mon Sep 17 00:00:00 2001 >>> Message-Id: <0a8be27cbc2ac0c6fc2632865b5afe37222a1fc7.1533830053.git.daniel@iogearbox.net> >>> From: Daniel Borkmann >>> Date: Thu, 9 Aug 2018 16:50:21 +0200 >>> Subject: [PATCH bpf-next] bpf, btf: enable for all maps >>> >>> # bpftool m dump id 19 >>> [{ >>>          "key": { >>>              "": { >>>                  "vip": 0, >>>                  "vipv6": [] >>>              }, >>>              "port": 0, >>>              "family": 0, >>>              "proto": 0 >>>          }, >>>          "value": { >>>              "flags": 0, >>>              "vip_num": 0 >>>          } >>>      } >>> ] >>> >>> Signed-off-by: Daniel Borkmann >>> --- >>>   include/linux/bpf.h   |  4 +--- >>>   kernel/bpf/arraymap.c | 27 --------------------------- >>>   kernel/bpf/inode.c    |  3 ++- >>>   kernel/bpf/syscall.c  | 24 ++++++++++++++++++++---- >>>   4 files changed, 23 insertions(+), 35 deletions(-) >>> >>> diff --git a/include/linux/bpf.h b/include/linux/bpf.h >>> index cd8790d..91aa4be 100644 >>> --- a/include/linux/bpf.h >>> +++ b/include/linux/bpf.h >>> @@ -48,8 +48,6 @@ struct bpf_map_ops { >>>       u32 (*map_fd_sys_lookup_elem)(void *ptr); >>>       void (*map_seq_show_elem)(struct bpf_map *map, void *key, >>>                     struct seq_file *m); >>> -    int (*map_check_btf)(const struct bpf_map *map, const struct btf *btf, >>> -                 u32 key_type_id, u32 value_type_id); >>>   }; >>> >>>   struct bpf_map { >>> @@ -118,7 +116,7 @@ static inline bool bpf_map_offload_neutral(const struct bpf_map *map) >>> >>>   static inline bool bpf_map_support_seq_show(const struct bpf_map *map) >>>   { >>> -    return map->ops->map_seq_show_elem && map->ops->map_check_btf; >>> +    return map->ops->map_seq_show_elem; >>>   } >>> >>>   extern const struct bpf_map_ops bpf_map_offload_ops; >>> diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c >>> index 2aa55d030..67f0bdf 100644 >>> --- a/kernel/bpf/arraymap.c >>> +++ b/kernel/bpf/arraymap.c >>> @@ -358,32 +358,6 @@ static void array_map_seq_show_elem(struct bpf_map *map, void *key, >>>       rcu_read_unlock(); >>>   } >>> >>> -static int array_map_check_btf(const struct bpf_map *map, const struct btf *btf, >>> -                   u32 btf_key_id, u32 btf_value_id) >>> -{ >>> -    const struct btf_type *key_type, *value_type; >>> -    u32 key_size, value_size; >>> -    u32 int_data; >>> - >>> -    key_type = btf_type_id_size(btf, &btf_key_id, &key_size); >>> -    if (!key_type || BTF_INFO_KIND(key_type->info) != BTF_KIND_INT) >>> -        return -EINVAL; >>> - >>> -    int_data = *(u32 *)(key_type + 1); >>> -    /* bpf array can only take a u32 key.  This check makes >>> -     * sure that the btf matches the attr used during map_create. >>> -     */ >>> -    if (BTF_INT_BITS(int_data) != 32 || key_size != 4 || >>> -        BTF_INT_OFFSET(int_data)) >>> -        return -EINVAL; >>> - >>> -    value_type = btf_type_id_size(btf, &btf_value_id, &value_size); >>> -    if (!value_type || value_size != map->value_size) >>> -        return -EINVAL; >>> - >>> -    return 0; >>> -} >>> - >>>   const struct bpf_map_ops array_map_ops = { >>>       .map_alloc_check = array_map_alloc_check, >>>       .map_alloc = array_map_alloc, >>> @@ -394,7 +368,6 @@ const struct bpf_map_ops array_map_ops = { >>>       .map_delete_elem = array_map_delete_elem, >>>       .map_gen_lookup = array_map_gen_lookup, >>>       .map_seq_show_elem = array_map_seq_show_elem, >>> -    .map_check_btf = array_map_check_btf, >>>   }; >>> >>>   const struct bpf_map_ops percpu_array_map_ops = { >>> diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c >>> index 76efe9a..400f27d 100644 >>> --- a/kernel/bpf/inode.c >>> +++ b/kernel/bpf/inode.c >>> @@ -332,7 +332,8 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg) >>>       struct bpf_map *map = arg; >>> >>>       return bpf_mkobj_ops(dentry, mode, arg, &bpf_map_iops, >>> -                 map->btf ? &bpffs_map_fops : &bpffs_obj_fops); >>> +                 bpf_map_support_seq_show(map) ? >>> +                 &bpffs_map_fops : &bpffs_obj_fops); >> >> There are an issue here, the condition bpf_map_support_seq_show(map) may not be enough since the map specific implementation assumes availability of btf and proper map key/value btf_type_id's. >> We can either use >>     map->btf && bpf_map_support_seq_show(map) >> condition here, or check map->btf in each individual implementation >> of map_support_seq_show(). > > Good, point, agree. Will fix and cook proper patch later today. Sounds good. i will wait until this gets merged and then resubmit. > Thanks Yonghong! > >>>   } >>> >>>   static struct dentry * >>> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c >>> index 5af4e9e..0b6f6e8 100644 >>> --- a/kernel/bpf/syscall.c >>> +++ b/kernel/bpf/syscall.c >>> @@ -455,6 +455,23 @@ static int bpf_obj_name_cpy(char *dst, const char *src) >>>       return 0; >>>   } >>> >>> +static int map_check_btf(const struct bpf_map *map, const struct btf *btf, >>> +             u32 btf_key_id, u32 btf_value_id) >>> +{ >>> +    const struct btf_type *key_type, *value_type; >>> +    u32 key_size, value_size; >>> + >>> +    key_type = btf_type_id_size(btf, &btf_key_id, &key_size); >>> +    if (!key_type || key_size != map->key_size) >>> +        return -EINVAL; >>> + >>> +    value_type = btf_type_id_size(btf, &btf_value_id, &value_size); >>> +    if (!value_type || value_size != map->value_size) >>> +        return -EINVAL; >>> + >>> +    return 0; >>> +} >>> + >>>   #define BPF_MAP_CREATE_LAST_FIELD btf_value_type_id >>>   /* called via syscall */ >>>   static int map_create(union bpf_attr *attr) >>> @@ -489,8 +506,7 @@ static int map_create(union bpf_attr *attr) >>>       atomic_set(&map->refcnt, 1); >>>       atomic_set(&map->usercnt, 1); >>> >>> -    if (bpf_map_support_seq_show(map) && >>> -        (attr->btf_key_type_id || attr->btf_value_type_id)) { >>> +    if (attr->btf_key_type_id || attr->btf_value_type_id) { >>>           struct btf *btf; >>> >>>           if (!attr->btf_key_type_id || !attr->btf_value_type_id) { >>> @@ -504,8 +520,8 @@ static int map_create(union bpf_attr *attr) >>>               goto free_map_nouncharge; >>>           } >>> >>> -        err = map->ops->map_check_btf(map, btf, attr->btf_key_type_id, >>> -                          attr->btf_value_type_id); >>> +        err = map_check_btf(map, btf, attr->btf_key_type_id, >>> +                    attr->btf_value_type_id); >>>           if (err) { >>>               btf_put(btf); >>>               goto free_map_nouncharge; >>> >