All of lore.kernel.org
 help / color / mirror / Atom feed
From: Yonghong Song <yhs@fb.com>
To: Denis Salopek <denis.salopek@sartura.hr>, <bpf@vger.kernel.org>
Cc: <juraj.vijtiuk@sartura.hr>, <luka.oreskovic@sartura.hr>,
	<luka.perkov@sartura.hr>
Subject: Re: [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab
Date: Mon, 1 Mar 2021 18:02:37 -0800	[thread overview]
Message-ID: <d8104f41-4302-7b68-5bc1-fb014a261e42@fb.com> (raw)
In-Reply-To: <YDtk/vr/lk62L4KP@gmail.com>



On 2/28/21 1:40 AM, Denis Salopek wrote:
> Extend the existing bpf_map_lookup_and_delete_elem() functionality to
> hashtab maps, in addition to stacks and queues.
> Create a new hashtab bpf_map_ops function that does lookup and deletion
> of the element under the same bucket lock and add the created map_ops to
> bpf.h.
> Add the appropriate test cases to 'maps' and 'lru_map' selftests
> accompanied with new test_progs.
> 
> Cc: Juraj Vijtiuk <juraj.vijtiuk@sartura.hr>
> Cc: Luka Oreskovic <luka.oreskovic@sartura.hr>
> Cc: Luka Perkov <luka.perkov@sartura.hr>
> Signed-off-by: Denis Salopek <denis.salopek@sartura.hr>
> ---
> v2: Add functionality for LRU/per-CPU, add test_progs tests.
> ---
>   include/linux/bpf.h                           |   2 +
>   kernel/bpf/hashtab.c                          |  80 +++++
>   kernel/bpf/syscall.c                          |  14 +-
>   .../bpf/prog_tests/lookup_and_delete.c        | 283 ++++++++++++++++++
>   .../bpf/progs/test_lookup_and_delete.c        |  26 ++
>   tools/testing/selftests/bpf/test_lru_map.c    |   8 +
>   tools/testing/selftests/bpf/test_maps.c       |  19 +-
>   7 files changed, 430 insertions(+), 2 deletions(-)
>   create mode 100644 tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
>   create mode 100644 tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> 
> diff --git a/include/linux/bpf.h b/include/linux/bpf.h
> index 4c730863fa77..0bcc4f89af40 100644
> --- a/include/linux/bpf.h
> +++ b/include/linux/bpf.h
> @@ -67,6 +67,8 @@ struct bpf_map_ops {
>   	void *(*map_lookup_elem_sys_only)(struct bpf_map *map, void *key);
>   	int (*map_lookup_batch)(struct bpf_map *map, const union bpf_attr *attr,
>   				union bpf_attr __user *uattr);
> +	int (*map_lookup_and_delete_elem)(struct bpf_map *map, void *key,
> +					  void *value);
>   	int (*map_lookup_and_delete_batch)(struct bpf_map *map,
>   					   const union bpf_attr *attr,
>   					   union bpf_attr __user *uattr);
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index 330d721dd2af..8c3334d1b6b3 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -1401,6 +1401,82 @@ static void htab_map_seq_show_elem(struct bpf_map *map, void *key,
>   	rcu_read_unlock();
>   }
>   
> +static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
> +					     void *value, bool is_lru_map,
> +					     bool is_percpu)
> +{
> +	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
> +	u32 hash, key_size, value_size;
> +	struct hlist_nulls_head *head;
> +	int cpu, off = 0, ret;
> +	struct htab_elem *l;
> +	unsigned long flags;
> +	void __percpu *pptr;
> +	struct bucket *b;
> +
> +	key_size = map->key_size;
> +	value_size = round_up(map->value_size, 8);
> +
> +	hash = htab_map_hash(key, key_size, htab->hashrnd);
> +	b = __select_bucket(htab, hash);
> +	head = &b->head;
> +
> +	ret = htab_lock_bucket(htab, b, hash, &flags);
> +	if (ret)
> +		return ret;
> +
> +	l = lookup_elem_raw(head, hash, key, key_size);
> +	if (l) {
> +		if (is_percpu) {
> +			pptr = htab_elem_get_ptr(l, key_size);
> +			for_each_possible_cpu(cpu) {
> +				bpf_long_memcpy(value + off,
> +						per_cpu_ptr(pptr, cpu),
> +						value_size);
> +				off += value_size;
> +			}
> +		} else {
> +			copy_map_value(map, value, l->key + round_up(key_size, 8));

For hashtab lookup elem, BPF_F_LOCK flag may be set by user, I think 
hashtab lookup_and_delete_elem should also support this flag, so user
can ensure they always get a lock protected sane value.

We have the following libbpf APIs.

LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value);
LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void 
*value,
                                          __u64 flags);
LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key,
                                               void *value);

Previously, bpf_map_lookup_and_delete_elem only supports queue/stack,
which does not need flags as it does not support BPF_F_LOCK so we
are fine.

Maybe similar to bpf_map_lookup_elem_flags() we add a
bpf_map_lookup_and_delete_elem_flags()? Maybe libbpf v1.0
can consolidate into a better uniform api.

> +		}
> +
> +		hlist_nulls_del_rcu(&l->hash_node);
> +		if (!is_lru_map)
> +			free_htab_elem(htab, l);
> +	} else
> +		ret = -ENOENT;
> +
> +	htab_unlock_bucket(htab, b, hash, flags);
> +
> +	if (is_lru_map && l)
> +		bpf_lru_push_free(&htab->lru, &l->lru_node);
> +
> +	return ret;
> +}
> +
> +static int htab_map_lookup_and_delete_elem(struct bpf_map *map,
> +					   void *key, void *value)
> +{
> +	return __htab_map_lookup_and_delete_elem(map, key, value, false, false);
> +}
> +
> +static int htab_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
> +						  void *key, void *value)
> +{
> +	return __htab_map_lookup_and_delete_elem(map, key, value, false, true);
> +}
> +
> +static int htab_lru_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
> +					       void *value)
> +{
> +	return __htab_map_lookup_and_delete_elem(map, key, value, true, false);
> +}
> +
> +static int htab_lru_percpu_map_lookup_and_delete_elem(struct bpf_map *map,
> +						      void *key, void *value)
> +{
> +	return __htab_map_lookup_and_delete_elem(map, key, value, true, true);
> +}
> +
>   static int
>   __htab_map_lookup_and_delete_batch(struct bpf_map *map,
>   				   const union bpf_attr *attr,
> @@ -1934,6 +2010,7 @@ const struct bpf_map_ops htab_map_ops = {
>   	.map_free = htab_map_free,
>   	.map_get_next_key = htab_map_get_next_key,
>   	.map_lookup_elem = htab_map_lookup_elem,
> +	.map_lookup_and_delete_elem = htab_map_lookup_and_delete_elem,
>   	.map_update_elem = htab_map_update_elem,
>   	.map_delete_elem = htab_map_delete_elem,
>   	.map_gen_lookup = htab_map_gen_lookup,
> @@ -1954,6 +2031,7 @@ const struct bpf_map_ops htab_lru_map_ops = {
>   	.map_free = htab_map_free,
>   	.map_get_next_key = htab_map_get_next_key,
>   	.map_lookup_elem = htab_lru_map_lookup_elem,
> +	.map_lookup_and_delete_elem = htab_lru_map_lookup_and_delete_elem,
>   	.map_lookup_elem_sys_only = htab_lru_map_lookup_elem_sys,
>   	.map_update_elem = htab_lru_map_update_elem,
>   	.map_delete_elem = htab_lru_map_delete_elem,
> @@ -2077,6 +2155,7 @@ const struct bpf_map_ops htab_percpu_map_ops = {
>   	.map_free = htab_map_free,
>   	.map_get_next_key = htab_map_get_next_key,
>   	.map_lookup_elem = htab_percpu_map_lookup_elem,
> +	.map_lookup_and_delete_elem = htab_percpu_map_lookup_and_delete_elem,
>   	.map_update_elem = htab_percpu_map_update_elem,
>   	.map_delete_elem = htab_map_delete_elem,
>   	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
> @@ -2096,6 +2175,7 @@ const struct bpf_map_ops htab_lru_percpu_map_ops = {
>   	.map_free = htab_map_free,
>   	.map_get_next_key = htab_map_get_next_key,
>   	.map_lookup_elem = htab_lru_percpu_map_lookup_elem,
> +	.map_lookup_and_delete_elem = htab_lru_percpu_map_lookup_and_delete_elem,
>   	.map_update_elem = htab_lru_percpu_map_update_elem,
>   	.map_delete_elem = htab_lru_map_delete_elem,
>   	.map_seq_show_elem = htab_percpu_map_seq_show_elem,
> diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
> index c859bc46d06c..2634aa4a2f37 100644
> --- a/kernel/bpf/syscall.c
> +++ b/kernel/bpf/syscall.c
> @@ -1495,7 +1495,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>   		goto err_put;
>   	}
>   
> -	value_size = map->value_size;
> +	value_size = bpf_map_value_size(map);
>   
>   	err = -ENOMEM;
>   	value = kmalloc(value_size, GFP_USER | __GFP_NOWARN);
> @@ -1505,6 +1505,18 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
>   	if (map->map_type == BPF_MAP_TYPE_QUEUE ||
>   	    map->map_type == BPF_MAP_TYPE_STACK) {
>   		err = map->ops->map_pop_elem(map, value);
> +	} else if (map->map_type == BPF_MAP_TYPE_HASH ||
> +		   map->map_type == BPF_MAP_TYPE_PERCPU_HASH ||
> +		   map->map_type == BPF_MAP_TYPE_LRU_HASH ||
> +		   map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH) {
> +		if (!bpf_map_is_dev_bound(map)) {
> +			bpf_disable_instrumentation();
> +			rcu_read_lock();
> +			err = map->ops->map_lookup_and_delete_elem(map, key, value);
> +			rcu_read_unlock();
> +			bpf_enable_instrumentation();
> +			maybe_wait_bpf_programs(map);

maybe_wait_bpf_programs(map) is mostly for map-in-map.
but I think it is okay to put it here in case in the future
we will support map-in-map here. If maybe_wait_bpf_programs()
get inlined which mostly likely is the case, the compiler
should be able to optimize it away.

> +		}
>   	} else {
>   		err = -ENOTSUPP;
>   	}
> diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
> new file mode 100644
> index 000000000000..05123bbcdc1c
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
> @@ -0,0 +1,283 @@
> +// SPDX-License-Identifier: GPL-2.0-only
> +
> +#include <test_progs.h>
> +#include "test_lookup_and_delete.skel.h"
> +
> +#define START_VALUE 1234
> +#define NEW_VALUE 4321
> +#define MAX_ENTRIES 2
> +
> +static int duration;
> +static int nr_cpus;
> +
> +static int fill_values(int map_fd)
> +{
> +	__u64 key, value = START_VALUE;
> +	int err;
> +
> +	for (key = 1; key < MAX_ENTRIES + 1; key++) {
> +		err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
> +		if (CHECK(err, "bpf_map_update_elem", "failed\n"))

You can use
	if (!ASSERT_OK(err, "bpf_map_update_elem"))
to save you from explicit "failed" string.
The same for some later other CHECK usages.

> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
> +static int fill_values_percpu(int map_fd)
> +{
> +	BPF_DECLARE_PERCPU(__u64, value);
> +	int i, err;
> +	u64 key;
> +
> +	for (i = 0; i < nr_cpus; i++)
> +		bpf_percpu(value, i) = START_VALUE;
> +
> +	for (key = 1; key < MAX_ENTRIES + 1; key++) {
> +		err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
> +		if (CHECK(err, "bpf_map_update_elem", "failed\n"))
> +			return -1;
> +	}
> +
> +	return 0;
> +}
> +
[...]
> diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> new file mode 100644
> index 000000000000..eb19de8bb415
> --- /dev/null
> +++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
> @@ -0,0 +1,26 @@
> +// SPDX-License-Identifier: GPL-2.0
> +
> +#include "vmlinux.h"
> +#include <bpf/bpf_helpers.h>
> +
> +__u32 set_pid;
> +__u64 set_key;
> +__u64 set_value;

Please add "= 0" to the above declaration to make
it llvm10 friendly.

> +
> +struct {
> +	__uint(type, BPF_MAP_TYPE_HASH);
> +	__uint(max_entries, 2);
> +	__type(key, __u64);
> +	__type(value, __u64);
> +} hash_map SEC(".maps");
> +
> +SEC("tp/syscalls/sys_enter_getpgid")
> +int bpf_lookup_and_delete_test(const void *ctx)
> +{
> +	if (set_pid == bpf_get_current_pid_tgid() >> 32)
> +		bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
> +
> +	return 0;
> +}
> +
> +char _license[] SEC("license") = "GPL";
[...]

  reply	other threads:[~2021-03-02 10:38 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2021-02-28  9:40 [PATCH v2 bpf-next] bpf: add lookup_and_delete_elem support to hashtab Denis Salopek
2021-03-02  2:02 ` Yonghong Song [this message]
2021-03-13  8:54   ` Denis Salopek
2021-03-15  0:08     ` Yonghong Song

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=d8104f41-4302-7b68-5bc1-fb014a261e42@fb.com \
    --to=yhs@fb.com \
    --cc=bpf@vger.kernel.org \
    --cc=denis.salopek@sartura.hr \
    --cc=juraj.vijtiuk@sartura.hr \
    --cc=luka.oreskovic@sartura.hr \
    --cc=luka.perkov@sartura.hr \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.