netdev.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Martin KaFai Lau <kafai@fb.com>
To: <netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>
Cc: FB Kernel Team <kernel-team@fb.com>,
	Alexei Starovoitov <alexei.starovoitov@gmail.com>
Subject: [PATCH net-next 2/4] bpf: bpf_htab: Add BPF_MAP_TYPE_PERCPU_HASH
Date: Thu, 7 Jan 2016 14:35:53 -0800	[thread overview]
Message-ID: <1452206155-1492617-3-git-send-email-kafai@fb.com> (raw)
In-Reply-To: <1452206155-1492617-1-git-send-email-kafai@fb.com>

This patch adds BPFMAP_TYPE_PERCPU_HASH map type and its
htab_map_ops implementation.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
---
 include/uapi/linux/bpf.h |   1 +
 kernel/bpf/hashtab.c     | 201 ++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 201 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 8bed7f1..e4f8060 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -81,6 +81,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_ARRAY,
 	BPF_MAP_TYPE_PROG_ARRAY,
 	BPF_MAP_TYPE_PERF_EVENT_ARRAY,
+	BPF_MAP_TYPE_PERCPU_HASH,
 };
 
 enum bpf_prog_type {
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d55df8c..63f2945 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -278,7 +278,7 @@ find_first_elem:
 }
 
 static struct htab_elem_common *htab_elem_common_alloc(struct bpf_htab *htab,
-							void *key)
+						       void *key)
 {
 	struct htab_elem_common *l;
 
@@ -451,9 +451,208 @@ static struct bpf_map_type_list htab_type __read_mostly = {
 	.type = BPF_MAP_TYPE_HASH,
 };
 
+/* each htab_percpu_elem is struct htab_percpu_elem + key  */
+struct htab_percpu_elem {
+	struct htab_elem_common common;
+	void * __percpu value;
+	char key[0] __aligned(8);
+};
+
+static struct htab_percpu_elem *htab_percpu_elem(struct htab_elem_common *l)
+{
+	return (struct htab_percpu_elem *)l;
+}
+
+static void htab_percpu_elem_free(struct htab_percpu_elem *l)
+{
+	free_percpu(l->value);
+	kfree(l);
+}
+
+static void htab_percpu_elem_rcu_free(struct rcu_head *head)
+{
+	struct htab_elem_common *l = container_of(head,
+						  struct htab_elem_common,
+						  rcu);
+
+	htab_percpu_elem_free(htab_percpu_elem(l));
+}
+
+static void htab_percpu_map_flush(struct bpf_htab *htab)
+{
+	int i;
+
+	for (i = 0; i < htab->n_buckets; i++) {
+		struct hlist_head *head = select_bucket(htab, i);
+		struct hlist_node *n;
+		struct htab_elem_common *l;
+
+		hlist_for_each_entry_safe(l, n, head, hash_node) {
+			hlist_del_rcu(&l->hash_node);
+			atomic_dec(&htab->count);
+			htab_percpu_elem_free(htab_percpu_elem(l));
+		}
+	}
+}
+
+/* Called from syscall */
+static struct bpf_map *htab_percpu_map_alloc(union bpf_attr *attr)
+{
+	u32 elem_size = sizeof(struct htab_percpu_elem) +
+		round_up(attr->key_size, 8);
+	u32 elem_value_size = elem_size +
+		num_possible_cpus() * attr->value_size;
+
+	return __htab_map_alloc(attr, elem_size, elem_value_size,
+				offsetof(struct htab_percpu_elem, key),
+				htab_percpu_map_flush);
+}
+
+/* Called from syscall or from eBPF program */
+static int htab_percpu_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct htab_elem_common *l;
+	struct hlist_head *head;
+	unsigned long flags;
+	u32 hash, key_size;
+	struct bucket *b;
+	int ret = -ENOENT;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	key_size = map->key_size;
+
+	hash = htab_map_hash(key, key_size);
+	b = __select_bucket(htab, hash);
+	head = &b->head;
+
+	raw_spin_lock_irqsave(&b->lock, flags);
+
+	l = lookup_elem_raw(htab, head, hash, key);
+
+	if (l) {
+		hlist_del_rcu(&l->hash_node);
+		atomic_dec(&htab->count);
+		call_rcu(&l->rcu, htab_percpu_elem_rcu_free);
+		ret = 0;
+	}
+
+	raw_spin_unlock_irqrestore(&b->lock, flags);
+	return ret;
+}
+
+/* Called from syscall or eBPF program */
+static void *htab_percpu_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct htab_elem_common *l;
+
+	l = __htab_map_lookup_elem(htab, key);
+	if (l) {
+		void *value = per_cpu_ptr(htab_percpu_elem(l)->value,
+					  smp_processor_id());
+		return value;
+	}
+
+	return NULL;
+
+}
+
+/* Called from syscall or from eBPF program */
+static int htab_percpu_map_update_elem(struct bpf_map *map, void *key,
+				       void *value, u64 map_flags)
+{
+	struct bpf_htab *htab = container_of(map, struct bpf_htab, map);
+	struct htab_percpu_elem *l_new, *l_old;
+	struct hlist_head *head;
+	struct bucket *b;
+	unsigned long flags;
+	int ret;
+
+	if (map_flags > BPF_EXIST)
+		/* unknown flags */
+		return -EINVAL;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	/* allocate new element outside of lock */
+	l_new = htab_percpu_elem(htab_elem_common_alloc(htab, key));
+	if (!l_new)
+		return -ENOMEM;
+
+	l_new->value = __alloc_percpu_gfp(htab->map.value_size, 8,
+					  GFP_ATOMIC | __GFP_NOWARN);
+	if (!l_new->value) {
+		htab_percpu_elem_free(l_new);
+		return -ENOMEM;
+	}
+
+	memcpy(raw_cpu_ptr(l_new->value), value, map->value_size);
+
+	b = __select_bucket(htab, l_new->common.hash);
+	head = &b->head;
+
+	/* bpf_map_update_elem() can be called in_irq() */
+	raw_spin_lock_irqsave(&b->lock, flags);
+
+	l_old = htab_percpu_elem(lookup_elem_raw(htab, head, l_new->common.hash,
+						 key));
+
+	if (!l_old && unlikely(atomic_read(&htab->count) >= map->max_entries)) {
+		/* if elem with this 'key' doesn't exist and we've reached
+		 * max_entries limit, fail insertion of new elem
+		 */
+		ret = -E2BIG;
+		goto err;
+	}
+
+	if (l_old && map_flags == BPF_NOEXIST) {
+		/* elem already exists */
+		ret = -EEXIST;
+		goto err;
+	}
+
+	if (!l_old && map_flags == BPF_EXIST) {
+		/* elem doesn't exist, cannot update it */
+		ret = -ENOENT;
+		goto err;
+	}
+
+	if (l_old) {
+		memcpy(this_cpu_ptr(l_old->value), value, map->value_size);
+	} else {
+		hlist_add_head_rcu(&l_new->common.hash_node, head);
+		atomic_inc(&htab->count);
+	}
+
+	raw_spin_unlock_irqrestore(&b->lock, flags);
+
+	return 0;
+err:
+	raw_spin_unlock_irqrestore(&b->lock, flags);
+	htab_percpu_elem_free(l_new);
+	return ret;
+}
+
+static const struct bpf_map_ops htab_percpu_ops = {
+	.map_alloc = htab_percpu_map_alloc,
+	.map_free = htab_map_free,
+	.map_get_next_key = htab_map_get_next_key,
+	.map_lookup_elem = htab_percpu_map_lookup_elem,
+	.map_update_elem = htab_percpu_map_update_elem,
+	.map_delete_elem = htab_percpu_map_delete_elem,
+};
+
+static struct bpf_map_type_list htab_percpu_type __read_mostly = {
+	.ops = &htab_percpu_ops,
+	.type = BPF_MAP_TYPE_PERCPU_HASH,
+};
+
 static int __init register_htab_map(void)
 {
 	bpf_register_map_type(&htab_type);
+	bpf_register_map_type(&htab_percpu_type);
 	return 0;
 }
 late_initcall(register_htab_map);
-- 
2.5.1

  parent reply	other threads:[~2016-01-07 22:36 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-01-07 22:35 [PATCH net-next 0/4] bpf: bpf_htab: Add BPF_MAP_TYPE_PERCPU_HASH Martin KaFai Lau
2016-01-07 22:35 ` [PATCH net-next 1/4] bpf: bpf_htab: Refactor some htab_elem logic Martin KaFai Lau
2016-01-07 22:35 ` Martin KaFai Lau [this message]
2016-01-09 10:06   ` [PATCH net-next 2/4] bpf: bpf_htab: Add BPF_MAP_TYPE_PERCPU_HASH Ming Lei
2016-01-12  3:11     ` Martin KaFai Lau
2016-01-12  7:44       ` Martin KaFai Lau
2016-01-09 10:33   ` Ming Lei
2016-01-07 22:35 ` [PATCH net-next 3/4] bpf: bpf_htab: Add syscall to iterate percpu value of a key Martin KaFai Lau
2016-01-07 22:35 ` [PATCH net-next 4/4] bpf: bpf_htab: Test for BPF_MAP_TYPE_PERCPU_HASH Martin KaFai Lau
2016-01-08  6:55 ` [PATCH net-next 0/4] bpf: bpf_htab: Add BPF_MAP_TYPE_PERCPU_HASH Ming Lei
2016-01-09  0:44   ` Martin KaFai Lau
2016-01-09  9:39     ` Ming Lei
2016-01-10  2:30       ` Martin KaFai Lau
2016-01-11  2:20         ` Ming Lei
2016-01-11 22:35           ` Martin KaFai Lau
2016-01-12  5:48             ` Ming Lei
2016-01-12  6:00               ` Alexei Starovoitov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1452206155-1492617-3-git-send-email-kafai@fb.com \
    --to=kafai@fb.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=kernel-team@fb.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=netdev@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).