From: Alexei Starovoitov <ast@fb.com>
To: "David S . Miller" <davem@davemloft.net>
Cc: Daniel Borkmann <daniel@iogearbox.net>,
Daniel Wagner <daniel.wagner@bmw-carit.de>,
Tom Zanussi <tom.zanussi@linux.intel.com>,
Wang Nan <wangnan0@huawei.com>, He Kuang <hekuang@huawei.com>,
Martin KaFai Lau <kafai@fb.com>,
Brendan Gregg <brendan.d.gregg@gmail.com>,
<netdev@vger.kernel.org>, <linux-kernel@vger.kernel.org>,
<kernel-team@fb.com>
Subject: [PATCH v2 net-next 02/12] bpf: introduce percpu_freelist
Date: Mon, 7 Mar 2016 21:57:14 -0800 [thread overview]
Message-ID: <1457416641-306326-3-git-send-email-ast@fb.com> (raw)
In-Reply-To: <1457416641-306326-1-git-send-email-ast@fb.com>
Introduce simple percpu_freelist to keep single list of elements
spread across per-cpu singly linked lists.
/* push element into the list */
void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
/* pop element from the list */
struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
The object is pushed to the current cpu list.
Pop first trying to get the object from the current cpu list,
if it's empty goes to the neigbour cpu list.
For bpf program usage pattern the collision rate is very low,
since programs push and pop the objects typically on the same cpu.
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
kernel/bpf/Makefile | 2 +-
kernel/bpf/percpu_freelist.c | 100 +++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/percpu_freelist.h | 31 ++++++++++++++
3 files changed, 132 insertions(+), 1 deletion(-)
create mode 100644 kernel/bpf/percpu_freelist.c
create mode 100644 kernel/bpf/percpu_freelist.h
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 8a932d079c24..eed911d091da 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,7 +1,7 @@
obj-y := core.o
obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o
-obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o
+obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o
ifeq ($(CONFIG_PERF_EVENTS),y)
obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
endif
diff --git a/kernel/bpf/percpu_freelist.c b/kernel/bpf/percpu_freelist.c
new file mode 100644
index 000000000000..5c51d1985b51
--- /dev/null
+++ b/kernel/bpf/percpu_freelist.c
@@ -0,0 +1,100 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include "percpu_freelist.h"
+
+int pcpu_freelist_init(struct pcpu_freelist *s)
+{
+ int cpu;
+
+ s->freelist = alloc_percpu(struct pcpu_freelist_head);
+ if (!s->freelist)
+ return -ENOMEM;
+
+ for_each_possible_cpu(cpu) {
+ struct pcpu_freelist_head *head = per_cpu_ptr(s->freelist, cpu);
+
+ raw_spin_lock_init(&head->lock);
+ head->first = NULL;
+ }
+ return 0;
+}
+
+void pcpu_freelist_destroy(struct pcpu_freelist *s)
+{
+ free_percpu(s->freelist);
+}
+
+static inline void __pcpu_freelist_push(struct pcpu_freelist_head *head,
+ struct pcpu_freelist_node *node)
+{
+ raw_spin_lock(&head->lock);
+ node->next = head->first;
+ head->first = node;
+ raw_spin_unlock(&head->lock);
+}
+
+void pcpu_freelist_push(struct pcpu_freelist *s,
+ struct pcpu_freelist_node *node)
+{
+ struct pcpu_freelist_head *head = this_cpu_ptr(s->freelist);
+
+ __pcpu_freelist_push(head, node);
+}
+
+void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
+ u32 nr_elems)
+{
+ struct pcpu_freelist_head *head;
+ unsigned long flags;
+ int i, cpu, pcpu_entries;
+
+ pcpu_entries = nr_elems / num_possible_cpus() + 1;
+ i = 0;
+
+ /* disable irq to workaround lockdep false positive
+ * in bpf usage pcpu_freelist_populate() will never race
+ * with pcpu_freelist_push()
+ */
+ local_irq_save(flags);
+ for_each_possible_cpu(cpu) {
+again:
+ head = per_cpu_ptr(s->freelist, cpu);
+ __pcpu_freelist_push(head, buf);
+ i++;
+ buf += elem_size;
+ if (i == nr_elems)
+ break;
+ if (i % pcpu_entries)
+ goto again;
+ }
+ local_irq_restore(flags);
+}
+
+struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *s)
+{
+ struct pcpu_freelist_head *head;
+ struct pcpu_freelist_node *node;
+ int orig_cpu, cpu;
+
+ orig_cpu = cpu = raw_smp_processor_id();
+ while (1) {
+ head = per_cpu_ptr(s->freelist, cpu);
+ raw_spin_lock(&head->lock);
+ node = head->first;
+ if (node) {
+ head->first = node->next;
+ raw_spin_unlock(&head->lock);
+ return node;
+ }
+ raw_spin_unlock(&head->lock);
+ cpu = cpumask_next(cpu, cpu_possible_mask);
+ if (cpu >= nr_cpu_ids)
+ cpu = 0;
+ if (cpu == orig_cpu)
+ return NULL;
+ }
+}
diff --git a/kernel/bpf/percpu_freelist.h b/kernel/bpf/percpu_freelist.h
new file mode 100644
index 000000000000..3049aae8ea1e
--- /dev/null
+++ b/kernel/bpf/percpu_freelist.h
@@ -0,0 +1,31 @@
+/* Copyright (c) 2016 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef __PERCPU_FREELIST_H__
+#define __PERCPU_FREELIST_H__
+#include <linux/spinlock.h>
+#include <linux/percpu.h>
+
+struct pcpu_freelist_head {
+ struct pcpu_freelist_node *first;
+ raw_spinlock_t lock;
+};
+
+struct pcpu_freelist {
+ struct pcpu_freelist_head __percpu *freelist;
+};
+
+struct pcpu_freelist_node {
+ struct pcpu_freelist_node *next;
+};
+
+void pcpu_freelist_push(struct pcpu_freelist *, struct pcpu_freelist_node *);
+struct pcpu_freelist_node *pcpu_freelist_pop(struct pcpu_freelist *);
+void pcpu_freelist_populate(struct pcpu_freelist *s, void *buf, u32 elem_size,
+ u32 nr_elems);
+int pcpu_freelist_init(struct pcpu_freelist *);
+void pcpu_freelist_destroy(struct pcpu_freelist *s);
+#endif
--
2.8.0.rc1
next prev parent reply other threads:[~2016-03-08 5:57 UTC|newest]
Thread overview: 14+ messages / expand[flat|nested] mbox.gz Atom feed top
2016-03-08 5:57 [PATCH v2 net-next 0/12] bpf: map pre-alloc Alexei Starovoitov
2016-03-08 5:57 ` [PATCH v2 net-next 01/12] bpf: prevent kprobe+bpf deadlocks Alexei Starovoitov
2016-03-08 5:57 ` Alexei Starovoitov [this message]
2016-03-08 5:57 ` [PATCH v2 net-next 03/12] bpf: pre-allocate hash map elements Alexei Starovoitov
2016-03-08 5:57 ` [PATCH v2 net-next 04/12] bpf: check for reserved flag bits in array and stack maps Alexei Starovoitov
2016-03-08 5:57 ` [PATCH v2 net-next 05/12] bpf: convert stackmap to pre-allocation Alexei Starovoitov
2016-03-08 5:57 ` [PATCH v2 net-next 06/12] samples/bpf: make map creation more verbose Alexei Starovoitov
2016-03-08 5:57 ` [PATCH v2 net-next 07/12] samples/bpf: move ksym_search() into library Alexei Starovoitov
2016-03-08 5:57 ` [PATCH v2 net-next 08/12] samples/bpf: add map_flags to bpf loader Alexei Starovoitov
2016-03-08 5:57 ` [PATCH v2 net-next 09/12] samples/bpf: test both pre-alloc and normal maps Alexei Starovoitov
2016-03-08 9:13 ` [PATCH v2 net-next 0/12] bpf: map pre-alloc Daniel Wagner
2016-03-08 16:38 ` Alexei Starovoitov
2016-03-08 20:31 ` David Miller
2016-03-08 23:05 ` Alexei Starovoitov
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1457416641-306326-3-git-send-email-ast@fb.com \
--to=ast@fb.com \
--cc=brendan.d.gregg@gmail.com \
--cc=daniel.wagner@bmw-carit.de \
--cc=daniel@iogearbox.net \
--cc=davem@davemloft.net \
--cc=hekuang@huawei.com \
--cc=kafai@fb.com \
--cc=kernel-team@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=netdev@vger.kernel.org \
--cc=tom.zanussi@linux.intel.com \
--cc=wangnan0@huawei.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).