Re: [PATCH 3/3] bpf: hash: use per-bucket spinlock

netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed

From: Daniel Borkmann <daniel@iogearbox.net>
To: Ming Lei <tom.leiming@gmail.com>,
	linux-kernel@vger.kernel.org, Alexei Starovoitov <ast@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>,
	netdev@vger.kernel.org, Ming Lei <ming.lei@canonical.com>
Subject: Re: [PATCH 3/3] bpf: hash: use per-bucket spinlock
Date: Mon, 28 Dec 2015 10:13:05 +0100	[thread overview]
Message-ID: <5680FD21.30108@iogearbox.net> (raw)
In-Reply-To: <1451122294-16524-4-git-send-email-tom.leiming@gmail.com>

On 12/26/2015 10:31 AM, Ming Lei wrote:
> From: Ming Lei <ming.lei@canonical.com>
>
> Both htab_map_update_elem() and htab_map_delete_elem() can be
> called from eBPF program, and they may be in kernel hot path,
> so it isn't efficient to use a per-hashtable lock in this two
> helpers.
>
> The per-hashtable spinlock is used just for protecting bucket's
> hlist, and per-bucket lock should be enough. This patch converts
> the per-hashtable lock into per-bucket spinlock, so that contention
> can be decreased a lot.
>
> Signed-off-by: Ming Lei <ming.lei@canonical.com>
> ---
>   kernel/bpf/hashtab.c | 35 +++++++++++++++++++++++++----------
>   1 file changed, 25 insertions(+), 10 deletions(-)
>
> diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
> index d857fcb..66bad7a 100644
> --- a/kernel/bpf/hashtab.c
> +++ b/kernel/bpf/hashtab.c
> @@ -14,9 +14,14 @@
>   #include <linux/filter.h>
>   #include <linux/vmalloc.h>
>
> +struct bucket {
> +	struct hlist_head head;
> +	raw_spinlock_t lock;
> +};
> +
>   struct bpf_htab {
>   	struct bpf_map map;
> -	struct hlist_head *buckets;
> +	struct bucket *buckets;
>   	raw_spinlock_t lock;

Shouldn't the lock member be removed from here?

>   	atomic_t count;	/* number of elements in this hashtable */
>   	u32 n_buckets;	/* number of hash buckets */
> @@ -88,24 +93,25 @@ static struct bpf_map *htab_map_alloc(union bpf_attr *attr)
>   		/* make sure page count doesn't overflow */
>   		goto free_htab;
>
> -	htab->map.pages = round_up(htab->n_buckets * sizeof(struct hlist_head) +
> +	htab->map.pages = round_up(htab->n_buckets * sizeof(struct bucket) +
>   				   htab->elem_size * htab->map.max_entries,
>   				   PAGE_SIZE) >> PAGE_SHIFT;
>
>   	err = -ENOMEM;
> -	htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct hlist_head),
> +	htab->buckets = kmalloc_array(htab->n_buckets, sizeof(struct bucket),
>   				      GFP_USER | __GFP_NOWARN);
>
>   	if (!htab->buckets) {
> -		htab->buckets = vmalloc(htab->n_buckets * sizeof(struct hlist_head));
> +		htab->buckets = vmalloc(htab->n_buckets * sizeof(struct bucket));
>   		if (!htab->buckets)
>   			goto free_htab;
>   	}
>
> -	for (i = 0; i < htab->n_buckets; i++)
> -		INIT_HLIST_HEAD(&htab->buckets[i]);
> +	for (i = 0; i < htab->n_buckets; i++) {
> +		INIT_HLIST_HEAD(&htab->buckets[i].head);
> +		raw_spin_lock_init(&htab->buckets[i].lock);
> +	}
>
> -	raw_spin_lock_init(&htab->lock);
>   	atomic_set(&htab->count, 0);
>
>   	return &htab->map;
> @@ -120,11 +126,16 @@ static inline u32 htab_map_hash(const void *key, u32 key_len)
>   	return jhash(key, key_len, 0);
>   }
>
> -static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
> +static inline struct bucket *__select_bucket(struct bpf_htab *htab, u32 hash)
>   {
>   	return &htab->buckets[hash & (htab->n_buckets - 1)];
>   }
>
> +static inline struct hlist_head *select_bucket(struct bpf_htab *htab, u32 hash)
> +{
> +	return &__select_bucket(htab, hash)->head;
> +}
> +
>   static struct htab_elem *lookup_elem_raw(struct hlist_head *head, u32 hash,
>   					 void *key, u32 key_size)
>   {
> @@ -227,6 +238,7 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
>   	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
>   	struct htab_elem *l_new, *l_old;
>   	struct hlist_head *head;
> +	struct bucket *b;
>   	unsigned long flags;
>   	u32 key_size;
>   	int ret;
> @@ -248,7 +260,8 @@ static int htab_map_update_elem(struct bpf_map *map, void *key, void *value,
>   	memcpy(l_new->key + round_up(key_size, 8), value, map->value_size);
>
>   	l_new->hash = htab_map_hash(l_new->key, key_size);
> -	head = select_bucket(htab, l_new->hash);
> +	b = __select_bucket(htab, l_new->hash);
> +	head = &b->head;
>
>   	/* bpf_map_update_elem() can be called in_irq() */
>   	raw_spin_lock_irqsave(&htab->lock, flags);

I am a bit confused on this one, though.

The raw spin lock is still per hashtable (htab->lock), and has not been converted in
this patch to the per bucket one (as opposed to what the commit message says), so
this patch seems to go into the right direction, but is a bit incomplete? Shouldn't
the above f.e. take b->lock, etc?

> @@ -299,6 +312,7 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
>   {
>   	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
>   	struct hlist_head *head;
> +	struct bucket *b;
>   	struct htab_elem *l;
>   	unsigned long flags;
>   	u32 hash, key_size;
> @@ -309,7 +323,8 @@ static int htab_map_delete_elem(struct bpf_map *map, void *key)
>   	key_size = map->key_size;
>
>   	hash = htab_map_hash(key, key_size);
> -	head = select_bucket(htab, hash);
> +	b = __select_bucket(htab, hash);
> +	head = &b->head;
>
>   	raw_spin_lock_irqsave(&htab->lock, flags);
>

Same here?

Thanks,
Daniel

next prev parent reply	other threads:[~2015-12-28  9:13 UTC|newest]

Thread overview: 8+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-12-26  9:31 [PATCH 0/3] bpf: hash: use per-bucket spinlock Ming Lei
2015-12-26  9:31 ` [PATCH 1/3] bpf: hash: use atomic count Ming Lei
2015-12-28  9:03   ` Daniel Borkmann
2015-12-26  9:31 ` [PATCH 2/3] bpf: hash: move select_bucket() out of htab's spinlock Ming Lei
2015-12-28  9:04   ` Daniel Borkmann
2015-12-26  9:31 ` [PATCH 3/3] bpf: hash: use per-bucket spinlock Ming Lei
2015-12-28  9:13   ` Daniel Borkmann [this message]
2015-12-28 12:20     ` Ming Lei

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=5680FD21.30108@iogearbox.net \
    --to=daniel@iogearbox.net \
    --cc=ast@kernel.org \
    --cc=davem@davemloft.net \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.lei@canonical.com \
    --cc=netdev@vger.kernel.org \
    --cc=tom.leiming@gmail.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).