* [RFC PATCH 11/16] bpf: cfg: detect irreducible loop using Eric Stoltz algorithm
From: John Fastabend @ 2018-06-01 9:33 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
We don't want to do any further loop bounds analysis for irreducible loop,
so just reject programs containing it.
The current DOM based loop detection can't detect irreducible loop. We'd
use the algorithm given by Eric Stoltz to detect it. The algorithm requires
DOM info.
Algorithm pseudo code:
test_dom(a,b) returns TRUE if a dominates b
push( v ) pushes v onto a reverse topologically-sorted stack
top_sort( entry node )
top_sort( node v ) {
mark_visited( v );
Visit all successors s of v {
if (mark_visited(s) &&
!pushed(s) &&
!test_dom(s, v)) {
Irreducible_Graph = TRUE;
Exit -- no need to continue now!
}
if(!mark_visited(s))
top_sort( s );
}
push( v );
}
Tested by:
int xdp_prog1(struct xdp_md *ctx) {
char *data = (char *)(unsigned long)(ctx->data);
char *data_end = (char *)(unsigned long)(ctx->data_end);
if (data + 60 > (unsigned char *)(unsigned long)ctx->data_end)
return XDP_ABORTED;
char *p = data + 14;
char *mark = *(char **)p;
unsigned long mark2 = *(unsigned long *)(p + 24);
mark += 1;
if (p + 1 > data) {
B:
mark += 2;
if (mark < mark2 * 3)
goto C;
} else if (mark < data) {
C:
mark += 1;
if (mark < 1000)
goto B;
}
return XDP_PASS;
}
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/cfg.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++---
kernel/bpf/cfg.h | 5 ++
kernel/bpf/verifier.c | 9 ++++
3 files changed, 123 insertions(+), 8 deletions(-)
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index 2a9b513..c67541c 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -767,6 +767,109 @@ bool subprog_has_loop(struct cfg_node_allocator *allocator,
return false;
}
+/* We don't want to do any further loop bounds analysis for irreducible loop,
+ * so just reject programs containing it.
+ *
+ * The current DOM based loop detection can't detect irreducible loop. We'd
+ * use the algorithm given by Eric Stoltz to detect it. The algorithm requires
+ * DOM info.
+ *
+ * Algorithm pseudo code:
+ *
+ * test_dom(a,b) returns TRUE if a dominates b
+ * push( v ) pushes v onto a reverse topologically-sorted stack
+ *
+ * top_sort( entry node )
+ *
+ * top_sort( node v ) {
+ * mark_visited( v );
+ * Visit all successors s of v {
+ * if (mark_visited(s) && !pushed(s) && !test_dom(s, v)) {
+ * Irreducible_Graph = TRUE;
+ * Exit -- no need to continue now!
+ * }
+ * if(!mark_visited(s))
+ * top_sort( s );
+ * }
+ * push( v );
+ * }
+ */
+int subprog_has_irreduciable_loop(struct cfg_node_allocator *allocator,
+ struct bpf_subprog_info *subprog)
+{
+ u16 bb_num = subprog->bb_num - 2, sp = 0, *status;
+ void **bb_list = (void **)&subprog->bbs;
+ struct edge_node **stack, *e, *prev_e;
+ int lane_len = BITS_TO_LONGS(bb_num);
+ struct bb_node *entry_bb, *exit_bb;
+ int found = 0;
+
+ stack = kmalloc_array(bb_num, sizeof(struct edge_node *), GFP_KERNEL);
+ if (!stack)
+ return -ENOMEM;
+
+ status = kcalloc(bb_num, sizeof(u16), GFP_KERNEL);
+ if (!status)
+ return -ENOMEM;
+
+ entry_bb = entry_bb(bb_list);
+ exit_bb = exit_bb(bb_list);
+ e = cfg_node_delink(allocator, &entry_bb->e_succs);
+ prev_e = e;
+
+ while (1) {
+ struct bb_node *bb_dst;
+
+ while (e) {
+ bb_dst = e->dst;
+
+ if (bb_dst == exit_bb ||
+ status[bb_dst->idx] == DFS_NODE_EXPLORED) {
+ prev_e = e;
+ e = cfg_node_delink(allocator, &e->link);
+ continue;
+ }
+
+ if (status[bb_dst->idx] == DFS_NODE_EXPLORING) {
+ u16 src_idx = e->src->idx;
+ unsigned long *bb_map;
+
+ bb_map = subprog->dtree + src_idx * lane_len;
+ if (!test_bit(bb_dst->idx, bb_map)) {
+ found = 1;
+ goto free_and_ret;
+ } else {
+ prev_e = e;
+ e = cfg_node_delink(allocator,
+ &e->link);
+ continue;
+ }
+ }
+
+ status[bb_dst->idx] = DFS_NODE_EXPLORING;
+ stack[sp++] = e;
+ /* e should never be NULL as it couldn't be exit_bb. */
+ e = cfg_node_delink(allocator, &bb_dst->e_succs);
+ }
+
+ if (prev_e->src != entry_bb)
+ status[prev_e->src->idx] = DFS_NODE_EXPLORED;
+
+ if (!sp)
+ break;
+
+ e = stack[--sp];
+ prev_e = e;
+ e = cfg_node_delink(allocator, &e->link);
+ }
+
+free_and_ret:
+ kfree(stack);
+ kfree(status);
+
+ return found;
+}
+
static int cmp_subprogs(const void *a, const void *b)
{
return ((struct bpf_subprog_info *)a)->start -
@@ -824,8 +927,6 @@ static void ci_next(struct cfg_node_allocator *allocator,
ci->callee = cfg_node_delink(allocator, &c->link);
}
-#define EXPLORING 1
-#define EXPLORED 2
int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
struct cfg_node_allocator *allocator,
struct bpf_subprog_info *subprog)
@@ -844,19 +945,19 @@ int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
}
ci.head = subprog->callees;
ci.callee = subprog->callees;
- status[0] = EXPLORING;
+ status[0] = DFS_NODE_EXPLORING;
while (1) {
while (!ci_end_p(&ci)) {
callee = ci.callee;
idx = callee->callee_idx;
- if (status[idx] == EXPLORING) {
+ if (status[idx] == DFS_NODE_EXPLORING) {
bpf_verifier_log_write(env, "cgraph - recursive call\n");
ret = -EINVAL;
goto err_free;
}
- status[idx] = EXPLORING;
+ status[idx] = DFS_NODE_EXPLORING;
if (sp == 64) {
bpf_verifier_log_write(env, "cgraph - call frame too deep\n");
@@ -870,10 +971,10 @@ int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
}
if (ci.head)
- status[ci.head->caller_idx] = EXPLORED;
+ status[ci.head->caller_idx] = DFS_NODE_EXPLORED;
else
/* leaf func. */
- status[idx] = EXPLORED;
+ status[idx] = DFS_NODE_EXPLORED;
if (!sp)
break;
@@ -883,7 +984,7 @@ int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
}
for (idx = 0; idx < env->subprog_cnt; idx++)
- if (status[idx] != EXPLORED) {
+ if (status[idx] != DFS_NODE_EXPLORED) {
bpf_verifier_log_write(env, "cgraph - unreachable subprog\n");
ret = -EINVAL;
goto err_free;
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index cb833bd..8363406 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -35,8 +35,13 @@ int subprog_build_dom_info(struct bpf_verifier_env *env,
struct bpf_subprog_info *subprog);
bool subprog_has_loop(struct cfg_node_allocator *allocator,
struct bpf_subprog_info *subprog);
+int subprog_has_irreduciable_loop(struct cfg_node_allocator *allocator,
+ struct bpf_subprog_info *subprog);
int subprog_init_bb(struct cfg_node_allocator *allocator, void **bb_list,
int subprog_start, int subprog_end);
void subprog_free(struct bpf_subprog_info *subprog, int end_idx);
+#define DFS_NODE_EXPLORING 1
+#define DFS_NODE_EXPLORED 2
+
#endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 90619da..49895f3 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -913,6 +913,15 @@ static int check_subprogs(struct bpf_verifier_env *env)
&subprog[cur_subprog]);
if (ret < 0)
goto free_nodes;
+ ret = subprog_has_irreduciable_loop(&allocator,
+ &subprog[cur_subprog]);
+ if (ret < 0)
+ goto free_nodes;
+ if (ret > 0) {
+ verbose(env, "cfg - irreduciable loop detected");
+ ret = -EINVAL;
+ goto free_nodes;
+ }
if (subprog_has_loop(&allocator,
&subprog[cur_subprog])) {
verbose(env, "cfg - loop detected");
^ permalink raw reply related
* [RFC PATCH 10/16] bpf: cfg: reduce memory usage by using singly list + compat pointer
From: John Fastabend @ 2018-06-01 9:33 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
Because there are going to be quite a few nodes (bb, edge etc) for a
reasonable sized program, the size of cfg nodes matters.
The cfg traversal used at the moment are mostly via edge which contains
pointers to both src and dst basic block. The traversal is not via links
between basic blocks and edge nodes themselves, so link them with doubly
list_head is unnecessary, and is too heavy on 64-bit host as pointer will
take 8-bytes.
This patch reduce memory usage from two ways:
1. use singly linked list to link basic block and edge nodes.
2. introduce a compact pointer representation for pointers generated from
memory pool. If a pointer is from pool, we could represent it using
pool_base + pool_offset,
struct cfg_node_link {
u16 offset;
s8 idx;
};
the compact pointer "cfg_node_link" will only take 3-bytes.
the delink of the pointer will becomes:
static void *cfg_node_delink(struct cfg_node_allocator *allocator,
struct cfg_node_link *link)
{
s8 idx = link->idx;
if (idx == -1)
return NULL;
return allocator->base[idx] + link->offset;
}
For example, basic blocks nodes changed from:
struct bb_node {
struct list_head l;
struct list_head e_prevs;
struct list_head e_succs;
s16 head;
u16 idx;
};
into:
struct bb_node {
struct cfg_node_link link;
struct cfg_node_link e_prevs;
struct cfg_node_link e_succs;
s16 head;
u16 idx;
};
We reduced node size from 52 to 16. We could further reduce node size to 14
if we reshuffle fields of link/e_prevs/e_succs to avoid alignment padding,
but that will with a cost of code readability.
>From benchmarks like test_xdp_noinline, this patch reduce peek memory usage
of new cfg infrastructure by more than 50%.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
include/linux/bpf_verifier.h | 7 -
kernel/bpf/cfg.c | 503 +++++++++++++++++++++++-------------------
kernel/bpf/cfg.h | 23 +-
kernel/bpf/verifier.c | 33 +--
4 files changed, 312 insertions(+), 254 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 83ccbc4..07844ff 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -177,8 +177,11 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
- struct list_head callees; /* callees list. */
- struct list_head bbs; /* basic blocks list. */
+ void *callees; /* callees list. */
+ struct {
+ void *entry_bb; /* basic blocks list head. */
+ void *exit_bb; /* basic blocks list tail. */
+ } bbs;
u32 bb_num; /* total basic block num. */
unsigned long *dtree;
bool dtree_avail;
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index d213289..2a9b513 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -13,54 +13,55 @@
#include "cfg.h"
+/* The size of cfg nodes matters, therefore we try to avoid using doubly linked
+ * list and we try to use base + offset to address node, by this we only need
+ * to keep offset.
+ */
+struct cfg_node_link {
+ u16 offset;
+ s8 idx;
+};
+
+static void *cfg_node_delink(struct cfg_node_allocator *allocator,
+ struct cfg_node_link *link)
+{
+ s8 idx = link->idx;
+
+ if (idx == -1)
+ return NULL;
+
+ return allocator->base[idx] + link->offset;
+}
+
struct cedge_node {
- struct list_head l;
+ struct cfg_node_link link;
u8 caller_idx;
u8 callee_idx;
};
struct edge_node {
- struct list_head l;
+ struct cfg_node_link link;
struct bb_node *src;
struct bb_node *dst;
};
-struct edge_iter {
- struct list_head *list_head;
- struct edge_node *edge;
+struct bb_node {
+ struct cfg_node_link link;
+ struct cfg_node_link e_prevs;
+ struct cfg_node_link e_succs;
+ s16 head;
+ u16 idx;
};
-#define first_edge(e_list) list_first_entry(e_list, struct edge_node, l)
-#define last_edge(e_list) list_last_entry(e_list, struct edge_node, l)
-#define next_edge(e) list_next_entry(e, l)
+#define entry_bb(bb_list) (struct bb_node *)(*bb_list)
+#define exit_bb(bb_list) (struct bb_node *)(*(bb_list + 1))
-static bool ei_end_p(struct edge_iter *ei)
+static struct bb_node *bb_next(struct cfg_node_allocator *allocator,
+ struct bb_node *bb)
{
- return &ei->edge->l == ei->list_head;
+ return (struct bb_node *)cfg_node_delink(allocator, &bb->link);
}
-static void ei_next(struct edge_iter *ei)
-{
- struct edge_node *e = ei->edge;
-
- ei->edge = next_edge(e);
-}
-
-struct bb_node {
- struct list_head l;
- struct list_head e_prevs;
- struct list_head e_succs;
- u16 head;
- u16 idx;
-};
-
-#define bb_prev(bb) list_prev_entry(bb, l)
-#define bb_next(bb) list_next_entry(bb, l)
-#define bb_first(bb_list) list_first_entry(bb_list, struct bb_node, l)
-#define bb_last(bb_list) list_last_entry(bb_list, struct bb_node, l)
-#define entry_bb(bb_list) bb_first(bb_list)
-#define exit_bb(bb_list) bb_last(bb_list)
-
struct dom_info {
u16 *dfs_parent;
u16 *dfs_order;
@@ -82,9 +83,12 @@ struct dom_info {
u16 dfsnum;
};
-struct node_pool {
- struct list_head l;
- void *data;
+struct mem_frag {
+ struct cfg_node_link link;
+ void *p;
+};
+
+struct pool_head {
u32 size;
u32 used;
};
@@ -97,67 +101,103 @@ struct node_pool {
static int cfg_node_allocator_grow(struct cfg_node_allocator *allocator,
int min_grow_size)
{
- int s = min_grow_size;
- struct node_pool *pool;
- void *data;
+ int s = min_grow_size, pool_cnt = allocator->pool_cnt;
+ struct pool_head *pool;
- s += sizeof(struct node_pool);
+ if (pool_cnt >= MAX_POOL_NUM)
+ return -E2BIG;
+
+ s += sizeof(struct pool_head);
s = ALIGN(s, MEM_CHUNK_SIZE);
- data = kzalloc(s, GFP_KERNEL);
- if (!data)
+ if (s > U16_MAX)
+ return -E2BIG;
+
+ pool = kzalloc(s, GFP_KERNEL);
+ if (!pool)
return -ENOMEM;
- pool = (struct node_pool *)data;
- pool->data = pool + 1;
- pool->size = s - sizeof(struct node_pool);
- pool->used = 0;
- allocator->cur_free_pool = pool;
- list_add_tail(&pool->l, &allocator->pools);
+ allocator->base[pool_cnt] = pool;
+ pool->size = s;
+ pool->used = sizeof(struct pool_head);
+ allocator->pool_cnt++;
return 0;
}
-static void *cfg_node_alloc(struct cfg_node_allocator *allocator, int size)
+static int cfg_node_alloc(struct cfg_node_allocator *allocator,
+ struct mem_frag *frag, int size)
{
- struct node_pool *pool = allocator->cur_free_pool;
+ int pool_idx = allocator->pool_cnt - 1;
+ struct pool_head *pool;
void *p;
+ pool = allocator->base[pool_idx];
if (pool->used + size > pool->size) {
int ret = cfg_node_allocator_grow(allocator, size);
if (ret < 0)
- return NULL;
+ return ret;
- pool = allocator->cur_free_pool;
+ pool_idx++;
+ pool = allocator->base[pool_idx];
}
- p = pool->data + pool->used;
+ p = (void *)pool + pool->used;
+ frag->p = p;
+ frag->link.idx = pool_idx;
+ frag->link.offset = pool->used;
pool->used += size;
- return p;
+ return 0;
}
-static struct bb_node *get_single_bb_nodes(struct cfg_node_allocator *allocator)
+static int get_link_nodes(struct cfg_node_allocator *allocator,
+ struct mem_frag *frag, int num, int elem_size)
{
- int size = sizeof(struct bb_node);
+ int i, ret;
+ struct cfg_node_link *link;
- return (struct bb_node *)cfg_node_alloc(allocator, size);
+ ret = cfg_node_alloc(allocator, frag, num * elem_size);
+ if (ret < 0)
+ return ret;
+
+ for (i = 0; i < num; i++) {
+ link = frag->p + i * elem_size;
+ link->idx = -1;
+ }
+
+ return 0;
}
-static struct edge_node *get_edge_nodes(struct cfg_node_allocator *allocator,
- int num)
+static int get_bb_nodes(struct cfg_node_allocator *allocator,
+ struct mem_frag *frag, int num)
{
- int size = num * sizeof(struct edge_node);
+ struct bb_node *bb;
+ int i, ret;
+
+ ret = get_link_nodes(allocator, frag, num, sizeof(struct bb_node));
+ if (ret < 0)
+ return ret;
+
+ bb = frag->p;
+ for (i = 0; i < num; i++) {
+ bb[i].e_prevs.idx = -1;
+ bb[i].e_succs.idx = -1;
+ }
- return (struct edge_node *)cfg_node_alloc(allocator, size);
+ return 0;
}
-static struct cedge_node *
-get_single_cedge_node(struct cfg_node_allocator *allocator)
+static int get_edge_nodes(struct cfg_node_allocator *allocator,
+ struct mem_frag *frag, int num)
{
- int size = sizeof(struct cedge_node);
+ return get_link_nodes(allocator, frag, num, sizeof(struct edge_node));
+}
- return (struct cedge_node *)cfg_node_alloc(allocator, size);
+static int get_single_cedge_node(struct cfg_node_allocator *allocator,
+ struct mem_frag *frag)
+{
+ return get_link_nodes(allocator, frag, 1, sizeof(struct cedge_node));
}
int cfg_node_allocator_init(struct cfg_node_allocator *allocator,
@@ -167,7 +207,8 @@ int cfg_node_allocator_init(struct cfg_node_allocator *allocator,
s += 2 * bb_num_esti * sizeof(struct edge_node);
s += cedge_num_esti * sizeof(struct cedge_node);
- INIT_LIST_HEAD(&allocator->pools);
+
+ allocator->pool_cnt = 0;
ret = cfg_node_allocator_grow(allocator, s);
if (ret < 0)
return ret;
@@ -177,127 +218,123 @@ int cfg_node_allocator_init(struct cfg_node_allocator *allocator,
void cfg_node_allocator_free(struct cfg_node_allocator *allocator)
{
- struct list_head *pools = &allocator->pools;
- struct node_pool *pool, *tmp;
+ int i, cnt = allocator->pool_cnt;
- pool = first_node_pool(pools);
- list_for_each_entry_safe_from(pool, tmp, pools, l) {
- list_del(&pool->l);
- kfree(pool);
- }
+ for (i = 0; i < cnt; i++)
+ kfree(allocator->base[i]);
}
-int subprog_append_bb(struct cfg_node_allocator *allocator,
- struct list_head *bb_list, int head)
+int subprog_append_bb(struct cfg_node_allocator *allocator, void **bb_list,
+ int head)
{
- struct bb_node *new_bb, *bb;
+ struct bb_node *cur = entry_bb(bb_list);
+ struct bb_node *prev = cur;
+ struct mem_frag frag;
+ int ret;
- list_for_each_entry(bb, bb_list, l) {
- if (bb->head == head)
+ while (cur) {
+ if (cur->head == head)
return 0;
- else if (bb->head > head)
+ else if (cur->head > head)
break;
- }
+ prev = cur;
+ cur = cfg_node_delink(allocator, &cur->link);
+ };
- bb = bb_prev(bb);
- new_bb = get_single_bb_nodes(allocator);
- if (!new_bb)
- return -ENOMEM;
+ ret = get_bb_nodes(allocator, &frag, 1);
+ if (ret < 0)
+ return ret;
- INIT_LIST_HEAD(&new_bb->e_prevs);
- INIT_LIST_HEAD(&new_bb->e_succs);
- new_bb->head = head;
- list_add(&new_bb->l, &bb->l);
+ cur = frag.p;
+ cur->head = head;
+ cur->link = prev->link;
+ prev->link = frag.link;
return 0;
}
-int subprog_fini_bb(struct cfg_node_allocator *allocator,
- struct list_head *bb_list, int subprog_end)
+int subprog_init_bb(struct cfg_node_allocator *allocator, void **bb_list,
+ int subprog_start, int subprog_end)
{
- struct bb_node *bb = get_single_bb_nodes(allocator);
-
- if (!bb)
- return -ENOMEM;
- /* entry bb. */
- bb->head = -1;
- INIT_LIST_HEAD(&bb->e_prevs);
- INIT_LIST_HEAD(&bb->e_succs);
- list_add(&bb->l, bb_list);
-
- bb = get_single_bb_nodes(allocator);
- if (!bb)
- return -ENOMEM;
- /* exit bb. */
- bb->head = subprog_end;
- INIT_LIST_HEAD(&bb->e_prevs);
- INIT_LIST_HEAD(&bb->e_succs);
- list_add_tail(&bb->l, bb_list);
-
- return 0;
-}
+ struct bb_node **list_head = (struct bb_node **)bb_list;
+ struct bb_node **list_tail = (struct bb_node **)(bb_list + 1);
+ struct bb_node *entry_bb, *first_bb, *exit_bb;
+ int ret, s = sizeof(struct bb_node);
+ struct mem_frag frag;
-int subprog_init_bb(struct cfg_node_allocator *allocator,
- struct list_head *bb_list, int subprog_start)
-{
- int ret;
-
- INIT_LIST_HEAD(bb_list);
- ret = subprog_append_bb(allocator, bb_list, subprog_start);
+ ret = get_bb_nodes(allocator, &frag, 3);
if (ret < 0)
return ret;
+ entry_bb = frag.p;
+ *list_head = entry_bb;
+ entry_bb->head = -1;
+ first_bb = frag.p + s;
+ first_bb->head = subprog_start;
+ exit_bb = frag.p + 2 * s;
+ exit_bb->head = subprog_end;
+ entry_bb->link.idx = frag.link.idx;
+ entry_bb->link.offset = frag.link.offset + s;
+ first_bb->link.idx = frag.link.idx;
+ first_bb->link.offset = frag.link.offset + 2 * s;
+ *list_tail = exit_bb;
+
return 0;
}
-static struct bb_node *search_bb_with_head(struct list_head *bb_list, int head)
+static struct bb_node *search_bb_with_head(struct cfg_node_allocator *allocator,
+ void **bb_list, int head)
{
- struct bb_node *bb;
+ struct bb_node *cur = entry_bb(bb_list);
- list_for_each_entry(bb, bb_list, l) {
- if (bb->head == head)
- return bb;
- }
+ while (cur) {
+ if (cur->head == head)
+ return cur;
+
+ cur = cfg_node_delink(allocator, &cur->link);
+ };
return NULL;
}
int subprog_add_bb_edges(struct cfg_node_allocator *allocator,
- struct bpf_insn *insns, struct list_head *bb_list)
+ struct bpf_insn *insns, void **bb_list)
{
- struct bb_node *bb, *exit_bb;
+ struct bb_node *bb = entry_bb(bb_list), *exit_bb;
struct edge_node *edge;
- int bb_num;
+ struct mem_frag frag;
+ int ret, bb_num;
- bb = entry_bb(bb_list);
- edge = get_edge_nodes(allocator, 2);
- if (!edge)
- return -ENOMEM;
+ ret = get_edge_nodes(allocator, &frag, 2);
+ if (ret < 0)
+ return ret;
+ edge = frag.p;
edge->src = bb;
- edge->dst = bb_next(bb);
- list_add_tail(&edge->l, &bb->e_succs);
+ edge->dst = bb_next(allocator, bb);
+ bb->e_succs = frag.link;
edge[1].src = edge->src;
edge[1].dst = edge->dst;
- list_add_tail(&edge[1].l, &edge[1].dst->e_prevs);
+ edge->dst->e_prevs = frag.link;
bb->idx = -1;
exit_bb = exit_bb(bb_list);
exit_bb->idx = -2;
- bb = bb_next(bb);
+ bb = edge->dst;
bb_num = 0;
- list_for_each_entry_from(bb, &exit_bb->l, l) {
+ while (bb && bb != exit_bb) {
+ struct bb_node *next_bb = bb_next(allocator, bb);
bool has_fallthrough, only_has_fallthrough;
bool has_branch, only_has_branch;
- struct bb_node *next_bb = bb_next(bb);
int tail = next_bb->head - 1;
struct bpf_insn insn;
u8 code;
bb->idx = bb_num++;
- edge = get_edge_nodes(allocator, 2);
- if (!edge)
- return -ENOMEM;
+ ret = get_edge_nodes(allocator, &frag, 2);
+ if (ret < 0)
+ return ret;
+ edge = frag.p;
edge->src = bb;
edge[1].src = bb;
@@ -322,8 +359,11 @@ int subprog_add_bb_edges(struct cfg_node_allocator *allocator,
next_bb = exit_bb;
edge->dst = next_bb;
edge[1].dst = next_bb;
- list_add_tail(&edge->l, &bb->e_succs);
- list_add_tail(&edge[1].l, &edge[1].dst->e_prevs);
+ edge->link = bb->e_succs;
+ bb->e_succs = frag.link;
+ frag.link.offset += sizeof(struct edge_node);
+ edge[1].link = next_bb->e_prevs;
+ next_bb->e_prevs = frag.link;
edge = NULL;
}
@@ -331,23 +371,29 @@ int subprog_add_bb_edges(struct cfg_node_allocator *allocator,
struct bb_node *tgt;
if (!edge) {
- edge = get_edge_nodes(allocator, 2);
- if (!edge)
- return -ENOMEM;
+ ret = get_edge_nodes(allocator, &frag, 2);
+ if (ret < 0)
+ return ret;
+ edge = frag.p;
edge->src = bb;
edge[1].src = bb;
}
- tgt = search_bb_with_head(bb_list,
+ tgt = search_bb_with_head(allocator, bb_list,
tail + insn.off + 1);
if (!tgt)
return -EINVAL;
edge->dst = tgt;
edge[1].dst = tgt;
- list_add_tail(&edge->l, &bb->e_succs);
- list_add_tail(&edge[1].l, &tgt->e_prevs);
+ edge->link = bb->e_succs;
+ bb->e_succs = frag.link;
+ frag.link.offset += sizeof(struct edge_node);
+ edge[1].link = tgt->e_prevs;
+ tgt->e_prevs = frag.link;
}
+
+ bb = bb_next(allocator, bb);
}
return bb_num + 2;
@@ -451,13 +497,13 @@ static void link(struct dom_info *di, unsigned int v, unsigned int w)
}
}
-static void calc_idoms(struct bpf_subprog_info *subprog, struct dom_info *di,
- bool reverse)
+static void
+calc_idoms(struct cfg_node_allocator *allocator,
+ struct bpf_subprog_info *subprog, struct dom_info *di, bool reverse)
{
u16 entry_bb_fake_idx = subprog->bb_num - 2, idx, w, k, par;
- struct list_head *bb_list = &subprog->bbs;
+ void **bb_list = (void **)&subprog->bbs;
struct bb_node *entry_bb;
- struct edge_iter ei;
if (reverse)
entry_bb = exit_bb(bb_list);
@@ -472,24 +518,21 @@ static void calc_idoms(struct bpf_subprog_info *subprog, struct dom_info *di,
par = di->dfs_parent[idx];
k = idx;
- if (reverse) {
- ei.edge = first_edge(&bb->e_succs);
- ei.list_head = &bb->e_succs;
- } else {
- ei.edge = first_edge(&bb->e_prevs);
- ei.list_head = &bb->e_prevs;
- }
+ if (reverse)
+ e = cfg_node_delink(allocator, &bb->e_succs);
+ else
+ e = cfg_node_delink(allocator, &bb->e_prevs);
- while (!ei_end_p(&ei)) {
+ while (e) {
struct bb_node *b;
u16 k1;
- e = ei.edge;
if (reverse)
b = e->dst;
else
b = e->src;
- ei_next(&ei);
+
+ e = cfg_node_delink(allocator, &e->link);
if (b == entry_bb)
k1 = di->dfs_order[entry_bb_fake_idx];
@@ -525,19 +568,21 @@ static void calc_idoms(struct bpf_subprog_info *subprog, struct dom_info *di,
}
static int
-calc_dfs_tree(struct bpf_verifier_env *env, struct bpf_subprog_info *subprog,
- struct dom_info *di, bool reverse)
+calc_dfs_tree(struct bpf_verifier_env *env,
+ struct cfg_node_allocator *allocator,
+ struct bpf_subprog_info *subprog, struct dom_info *di,
+ bool reverse)
{
u16 bb_num = subprog->bb_num, sp = 0, idx, parent_idx, i;
- struct list_head *bb_list = &subprog->bbs;
+ void **bb_list = (void **)&subprog->bbs;
u16 entry_bb_fake_idx = bb_num - 2;
struct bb_node *entry_bb, *exit_bb;
- struct edge_iter ei, *stack;
- struct edge_node *e;
+ struct edge_node **stack, *e;
di->dfs_order[entry_bb_fake_idx] = di->dfsnum;
- stack = kmalloc_array(bb_num - 1, sizeof(struct edge_iter), GFP_KERNEL);
+ stack = kmalloc_array(bb_num - 1, sizeof(struct edge_node *),
+ GFP_KERNEL);
if (!stack)
return -ENOMEM;
@@ -545,27 +590,24 @@ static void calc_idoms(struct bpf_subprog_info *subprog, struct dom_info *di,
entry_bb = exit_bb(bb_list);
exit_bb = entry_bb(bb_list);
di->dfs_to_bb[di->dfsnum++] = exit_bb;
- ei.edge = first_edge(&entry_bb->e_prevs);
- ei.list_head = &entry_bb->e_prevs;
+ e = cfg_node_delink(allocator, &entry_bb->e_prevs);
} else {
entry_bb = entry_bb(bb_list);
exit_bb = exit_bb(bb_list);
di->dfs_to_bb[di->dfsnum++] = entry_bb;
- ei.edge = first_edge(&entry_bb->e_succs);
- ei.list_head = &entry_bb->e_succs;
+ e = cfg_node_delink(allocator, &entry_bb->e_succs);
}
while (1) {
struct bb_node *bb_dst, *bb_src;
- while (!ei_end_p(&ei)) {
- e = ei.edge;
-
+ while (e) {
if (reverse) {
bb_dst = e->src;
if (bb_dst == exit_bb ||
di->dfs_order[bb_dst->idx]) {
- ei_next(&ei);
+ e = cfg_node_delink(allocator,
+ &e->link);
continue;
}
bb_src = e->dst;
@@ -573,7 +615,8 @@ static void calc_idoms(struct bpf_subprog_info *subprog, struct dom_info *di,
bb_dst = e->dst;
if (bb_dst == exit_bb ||
di->dfs_order[bb_dst->idx]) {
- ei_next(&ei);
+ e = cfg_node_delink(allocator,
+ &e->link);
continue;
}
bb_src = e->src;
@@ -589,21 +632,20 @@ static void calc_idoms(struct bpf_subprog_info *subprog, struct dom_info *di,
di->dfs_to_bb[idx] = bb_dst;
di->dfs_parent[idx] = parent_idx;
- stack[sp++] = ei;
- if (reverse) {
- ei.edge = first_edge(&bb_dst->e_prevs);
- ei.list_head = &bb_dst->e_prevs;
- } else {
- ei.edge = first_edge(&bb_dst->e_succs);
- ei.list_head = &bb_dst->e_succs;
- }
+ stack[sp++] = e;
+ if (reverse)
+ e = cfg_node_delink(allocator,
+ &bb_dst->e_prevs);
+ else
+ e = cfg_node_delink(allocator,
+ &bb_dst->e_succs);
}
if (!sp)
break;
- ei = stack[--sp];
- ei_next(&ei);
+ e = stack[--sp];
+ e = cfg_node_delink(allocator, &e->link);
}
kfree(stack);
@@ -667,6 +709,7 @@ static int idoms_to_doms(struct bpf_subprog_info *subprog, struct dom_info *di)
*/
int subprog_build_dom_info(struct bpf_verifier_env *env,
+ struct cfg_node_allocator *allocator,
struct bpf_subprog_info *subprog)
{
struct dom_info di;
@@ -676,11 +719,11 @@ int subprog_build_dom_info(struct bpf_verifier_env *env,
if (ret < 0)
goto free_dominfo;
- ret = calc_dfs_tree(env, subprog, &di, false);
+ ret = calc_dfs_tree(env, allocator, subprog, &di, false);
if (ret < 0)
goto free_dominfo;
- calc_idoms(subprog, &di, false);
+ calc_idoms(allocator, subprog, &di, false);
ret = idoms_to_doms(subprog, &di);
if (ret < 0)
goto free_dominfo;
@@ -694,25 +737,33 @@ int subprog_build_dom_info(struct bpf_verifier_env *env,
return ret;
}
-bool subprog_has_loop(struct bpf_subprog_info *subprog)
+bool subprog_has_loop(struct cfg_node_allocator *allocator,
+ struct bpf_subprog_info *subprog)
{
int lane_len = BITS_TO_LONGS(subprog->bb_num - 2);
- struct list_head *bb_list = &subprog->bbs;
- struct bb_node *bb, *entry_bb;
+ struct bb_node *bb, *entry_bb, *exit_bb;
+ void **bb_list = (void **)&subprog->bbs;
struct edge_node *e;
entry_bb = entry_bb(bb_list);
- bb = bb_next(entry_bb);
- list_for_each_entry_from(bb, &exit_bb(bb_list)->l, l)
- list_for_each_entry(e, &bb->e_prevs, l) {
+ exit_bb = exit_bb(bb_list);
+ bb = bb_next(allocator, entry_bb);
+ while (bb && bb != exit_bb) {
+ e = cfg_node_delink(allocator, &bb->e_prevs);
+ while (e) {
struct bb_node *latch = e->src;
if (latch != entry_bb &&
test_bit(bb->idx,
subprog->dtree + latch->idx * lane_len))
return true;
+
+ e = cfg_node_delink(allocator, &e->link);
}
+ bb = bb_next(allocator, bb);
+ }
+
return false;
}
@@ -756,37 +807,34 @@ int add_subprog(struct bpf_verifier_env *env, int off)
}
struct callee_iter {
- struct list_head *list_head;
+ struct cedge_node *head;
struct cedge_node *callee;
};
-#define first_callee(c_list) list_first_entry(c_list, struct cedge_node, l)
-#define next_callee(c) list_next_entry(c, l)
-
static bool ci_end_p(struct callee_iter *ci)
{
- return &ci->callee->l == ci->list_head;
+ return !ci->callee;
}
-static void ci_next(struct callee_iter *ci)
+static void ci_next(struct cfg_node_allocator *allocator,
+ struct callee_iter *ci)
{
struct cedge_node *c = ci->callee;
- ci->callee = next_callee(c);
+ ci->callee = cfg_node_delink(allocator, &c->link);
}
#define EXPLORING 1
#define EXPLORED 2
int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
+ struct cfg_node_allocator *allocator,
struct bpf_subprog_info *subprog)
{
- struct callee_iter *stack;
+ int sp = 0, idx = 0, ret, *status;
+ struct callee_iter *stack, ci;
struct cedge_node *callee;
- int sp = 0, idx = 0, ret;
- struct callee_iter ci;
- int *status;
- stack = kmalloc_array(128, sizeof(struct callee_iter), GFP_KERNEL);
+ stack = kmalloc_array(64, sizeof(struct callee_iter), GFP_KERNEL);
if (!stack)
return -ENOMEM;
status = kcalloc(env->subprog_cnt, sizeof(int), GFP_KERNEL);
@@ -794,8 +842,8 @@ int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
kfree(stack);
return -ENOMEM;
}
- ci.callee = first_callee(&subprog->callees);
- ci.list_head = &subprog->callees;
+ ci.head = subprog->callees;
+ ci.callee = subprog->callees;
status[0] = EXPLORING;
while (1) {
@@ -810,20 +858,19 @@ int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
status[idx] = EXPLORING;
- if (sp == 127) {
+ if (sp == 64) {
bpf_verifier_log_write(env, "cgraph - call frame too deep\n");
ret = -EINVAL;
goto err_free;
}
stack[sp++] = ci;
- ci.callee = first_callee(&subprog[idx].callees);
- ci.list_head = &subprog[idx].callees;
+ ci.head = subprog[idx].callees;
+ ci.callee = subprog[idx].callees;
}
- if (!list_empty(ci.list_head))
- status[first_callee(ci.list_head)->caller_idx] =
- EXPLORED;
+ if (ci.head)
+ status[ci.head->caller_idx] = EXPLORED;
else
/* leaf func. */
status[idx] = EXPLORED;
@@ -832,7 +879,7 @@ int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
break;
ci = stack[--sp];
- ci_next(&ci);
+ ci_next(allocator, &ci);
}
for (idx = 0; idx < env->subprog_cnt; idx++)
@@ -851,27 +898,37 @@ int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
int subprog_append_callee(struct bpf_verifier_env *env,
struct cfg_node_allocator *allocator,
- struct list_head *callees_list,
- int caller_idx, int off)
+ void **callees_list, int caller_idx, int off)
{
- int callee_idx = find_subprog(env, off);
+ int callee_idx = find_subprog(env, off), ret;
struct cedge_node *new_callee, *callee;
+ struct mem_frag frag;
if (callee_idx < 0)
return callee_idx;
- list_for_each_entry(callee, callees_list, l) {
+ callee = (struct cedge_node *)*callees_list;
+ while (callee) {
if (callee->callee_idx == callee_idx)
return 0;
+
+ callee = cfg_node_delink(allocator, &callee->link);
}
- new_callee = get_single_cedge_node(allocator);
- if (!new_callee)
- return -ENOMEM;
+ ret = get_single_cedge_node(allocator, &frag);
+ if (ret < 0)
+ return ret;
+ new_callee = frag.p;
new_callee->caller_idx = caller_idx;
new_callee->callee_idx = callee_idx;
- list_add_tail(&new_callee->l, callees_list);
+ callee = (struct cedge_node *)*callees_list;
+ if (!callee) {
+ *callees_list = new_callee;
+ } else {
+ new_callee->link = callee->link;
+ callee->link = frag.link;
+ }
return 0;
}
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index 1868587..cb833bd 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -8,9 +8,11 @@
#ifndef __BPF_CFG_H__
#define __BPF_CFG_H__
+#define MAX_POOL_NUM 32
+
struct cfg_node_allocator {
- struct list_head pools;
- struct node_pool *cur_free_pool;
+ void *base[MAX_POOL_NUM];
+ u8 pool_cnt;
};
int add_subprog(struct bpf_verifier_env *env, int off);
@@ -18,22 +20,23 @@ struct cfg_node_allocator {
int cfg_node_allocator_init(struct cfg_node_allocator *allocator,
int bb_num_esti, int cedge_num_esti);
int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
+ struct cfg_node_allocator *allocator,
struct bpf_subprog_info *subprog);
int find_subprog(struct bpf_verifier_env *env, int off);
int subprog_add_bb_edges(struct cfg_node_allocator *allocator,
- struct bpf_insn *insns, struct list_head *bb_list);
+ struct bpf_insn *insns, void **bb_list);
int subprog_append_bb(struct cfg_node_allocator *allocator,
- struct list_head *bb_list, int head);
+ void **bb_list, int head);
int subprog_append_callee(struct bpf_verifier_env *env,
struct cfg_node_allocator *allocator,
- struct list_head *bb_list, int caller_idx, int off);
+ void **callees, int caller_idx, int off);
int subprog_build_dom_info(struct bpf_verifier_env *env,
+ struct cfg_node_allocator *allocator,
struct bpf_subprog_info *subprog);
-int subprog_fini_bb(struct cfg_node_allocator *allocator,
- struct list_head *bb_list, int subprog_end);
-bool subprog_has_loop(struct bpf_subprog_info *subprog);
-int subprog_init_bb(struct cfg_node_allocator *allocator,
- struct list_head *bb_list, int subprog_start);
+bool subprog_has_loop(struct cfg_node_allocator *allocator,
+ struct bpf_subprog_info *subprog);
+int subprog_init_bb(struct cfg_node_allocator *allocator, void **bb_list,
+ int subprog_start, int subprog_end);
void subprog_free(struct bpf_subprog_info *subprog, int end_idx);
#endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 5a5016d..90619da 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -766,9 +766,9 @@ static int check_subprogs(struct bpf_verifier_env *env)
{
int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
struct bpf_subprog_info *subprog = env->subprog_info;
- struct list_head *cur_bb_list, *cur_callee_list;
struct bpf_insn *insn = env->prog->insnsi;
int cedge_num_esti = 0, bb_num_esti = 3;
+ void **cur_bb_list, **cur_callee_list;
struct cfg_node_allocator allocator;
int insn_cnt = env->prog->len;
u8 code;
@@ -813,24 +813,19 @@ static int check_subprogs(struct bpf_verifier_env *env)
*/
subprog[env->subprog_cnt].start = insn_cnt;
- for (i = 0; i < env->subprog_cnt; i++) {
+ for (i = 0; i < env->subprog_cnt; i++)
if (env->log.level > 1)
verbose(env, "func#%d @%d\n", i, subprog[i].start);
- /* Don't init callees inside add_subprog which will sort the
- * array which breaks list link.
- */
- INIT_LIST_HEAD(&subprog[i].callees);
- }
-
ret = cfg_node_allocator_init(&allocator, bb_num_esti,
cedge_num_esti);
if (ret < 0)
return ret;
subprog_start = subprog[cur_subprog].start;
subprog_end = subprog[cur_subprog + 1].start;
- cur_bb_list = &subprog[cur_subprog].bbs;
- ret = subprog_init_bb(&allocator, cur_bb_list, subprog_start);
+ cur_bb_list = (void **)&subprog[cur_subprog].bbs;
+ ret = subprog_init_bb(&allocator, cur_bb_list, subprog_start,
+ subprog_end);
if (ret < 0)
goto free_nodes;
cur_callee_list = &env->subprog_info[cur_subprog].callees;
@@ -909,20 +904,17 @@ static int check_subprogs(struct bpf_verifier_env *env)
goto free_nodes;
}
- ret = subprog_fini_bb(&allocator, cur_bb_list,
- subprog_end);
- if (ret < 0)
- goto free_nodes;
ret = subprog_add_bb_edges(&allocator, insn,
cur_bb_list);
if (ret < 0)
goto free_nodes;
subprog[cur_subprog].bb_num = ret;
- ret = subprog_build_dom_info(env,
+ ret = subprog_build_dom_info(env, &allocator,
&subprog[cur_subprog]);
if (ret < 0)
goto free_nodes;
- if (subprog_has_loop(&subprog[cur_subprog])) {
+ if (subprog_has_loop(&allocator,
+ &subprog[cur_subprog])) {
verbose(env, "cfg - loop detected");
ret = -EINVAL;
goto free_nodes;
@@ -931,9 +923,11 @@ static int check_subprogs(struct bpf_verifier_env *env)
cur_subprog++;
if (cur_subprog < env->subprog_cnt) {
subprog_end = subprog[cur_subprog + 1].start;
- cur_bb_list = &subprog[cur_subprog].bbs;
+ cur_bb_list =
+ (void **)&subprog[cur_subprog].bbs;
ret = subprog_init_bb(&allocator, cur_bb_list,
- subprog_start);
+ subprog_start,
+ subprog_end);
if (ret < 0)
goto free_nodes;
cur_callee_list = &subprog[cur_subprog].callees;
@@ -941,7 +935,8 @@ static int check_subprogs(struct bpf_verifier_env *env)
}
}
- ret = cgraph_check_recursive_unreachable(env, env->subprog_info);
+ ret = cgraph_check_recursive_unreachable(env, &allocator,
+ env->subprog_info);
if (ret < 0)
goto free_nodes;
^ permalink raw reply related
* [RFC PATCH 09/16] bpf: cfg: reduce k*alloc/free call by using memory pool for allocating nodes
From: John Fastabend @ 2018-06-01 9:33 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
During building control flow graph, we need to build basic block nodes
and edge nodes etc. These nodes needs to allocated dynamically as we
don't have pre-scan pass to know how many nodes we need accurately.
It is better to manage their allocation using memory pool which could
reduce calling of *alloc/kfree functions and also could simplify freeing
nodes.
This patch:
- implements a simple memory pool based node allocator.
nodes need dynamic allocation migrated to this allocator.
- estimate node numbers inside subprog detection pass, asking allocator
to do an initial allocation of estimated size (aligned to 2K). The pool
will grow later if space are not enough.
- There is no support on returning memory back to the pool.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/cfg.c | 164 ++++++++++++++++++++++++++++++++++++-------------
kernel/bpf/cfg.h | 21 +++++-
kernel/bpf/verifier.c | 39 +++++++++---
3 files changed, 170 insertions(+), 54 deletions(-)
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index 56c08e8..d213289 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -82,7 +82,113 @@ struct dom_info {
u16 dfsnum;
};
-int subprog_append_bb(struct list_head *bb_list, int head)
+struct node_pool {
+ struct list_head l;
+ void *data;
+ u32 size;
+ u32 used;
+};
+
+#define first_node_pool(pool_list) \
+ list_first_entry(pool_list, struct node_pool, l)
+
+#define MEM_CHUNK_SIZE (1024)
+
+static int cfg_node_allocator_grow(struct cfg_node_allocator *allocator,
+ int min_grow_size)
+{
+ int s = min_grow_size;
+ struct node_pool *pool;
+ void *data;
+
+ s += sizeof(struct node_pool);
+ s = ALIGN(s, MEM_CHUNK_SIZE);
+ data = kzalloc(s, GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+
+ pool = (struct node_pool *)data;
+ pool->data = pool + 1;
+ pool->size = s - sizeof(struct node_pool);
+ pool->used = 0;
+ allocator->cur_free_pool = pool;
+ list_add_tail(&pool->l, &allocator->pools);
+
+ return 0;
+}
+
+static void *cfg_node_alloc(struct cfg_node_allocator *allocator, int size)
+{
+ struct node_pool *pool = allocator->cur_free_pool;
+ void *p;
+
+ if (pool->used + size > pool->size) {
+ int ret = cfg_node_allocator_grow(allocator, size);
+
+ if (ret < 0)
+ return NULL;
+
+ pool = allocator->cur_free_pool;
+ }
+
+ p = pool->data + pool->used;
+ pool->used += size;
+
+ return p;
+}
+
+static struct bb_node *get_single_bb_nodes(struct cfg_node_allocator *allocator)
+{
+ int size = sizeof(struct bb_node);
+
+ return (struct bb_node *)cfg_node_alloc(allocator, size);
+}
+
+static struct edge_node *get_edge_nodes(struct cfg_node_allocator *allocator,
+ int num)
+{
+ int size = num * sizeof(struct edge_node);
+
+ return (struct edge_node *)cfg_node_alloc(allocator, size);
+}
+
+static struct cedge_node *
+get_single_cedge_node(struct cfg_node_allocator *allocator)
+{
+ int size = sizeof(struct cedge_node);
+
+ return (struct cedge_node *)cfg_node_alloc(allocator, size);
+}
+
+int cfg_node_allocator_init(struct cfg_node_allocator *allocator,
+ int bb_num_esti, int cedge_num_esti)
+{
+ int s = bb_num_esti * sizeof(struct bb_node), ret;
+
+ s += 2 * bb_num_esti * sizeof(struct edge_node);
+ s += cedge_num_esti * sizeof(struct cedge_node);
+ INIT_LIST_HEAD(&allocator->pools);
+ ret = cfg_node_allocator_grow(allocator, s);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+void cfg_node_allocator_free(struct cfg_node_allocator *allocator)
+{
+ struct list_head *pools = &allocator->pools;
+ struct node_pool *pool, *tmp;
+
+ pool = first_node_pool(pools);
+ list_for_each_entry_safe_from(pool, tmp, pools, l) {
+ list_del(&pool->l);
+ kfree(pool);
+ }
+}
+
+int subprog_append_bb(struct cfg_node_allocator *allocator,
+ struct list_head *bb_list, int head)
{
struct bb_node *new_bb, *bb;
@@ -94,7 +200,7 @@ int subprog_append_bb(struct list_head *bb_list, int head)
}
bb = bb_prev(bb);
- new_bb = kzalloc(sizeof(*new_bb), GFP_KERNEL);
+ new_bb = get_single_bb_nodes(allocator);
if (!new_bb)
return -ENOMEM;
@@ -106,9 +212,10 @@ int subprog_append_bb(struct list_head *bb_list, int head)
return 0;
}
-int subprog_fini_bb(struct list_head *bb_list, int subprog_end)
+int subprog_fini_bb(struct cfg_node_allocator *allocator,
+ struct list_head *bb_list, int subprog_end)
{
- struct bb_node *bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+ struct bb_node *bb = get_single_bb_nodes(allocator);
if (!bb)
return -ENOMEM;
@@ -118,7 +225,7 @@ int subprog_fini_bb(struct list_head *bb_list, int subprog_end)
INIT_LIST_HEAD(&bb->e_succs);
list_add(&bb->l, bb_list);
- bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+ bb = get_single_bb_nodes(allocator);
if (!bb)
return -ENOMEM;
/* exit bb. */
@@ -130,12 +237,13 @@ int subprog_fini_bb(struct list_head *bb_list, int subprog_end)
return 0;
}
-int subprog_init_bb(struct list_head *bb_list, int subprog_start)
+int subprog_init_bb(struct cfg_node_allocator *allocator,
+ struct list_head *bb_list, int subprog_start)
{
int ret;
INIT_LIST_HEAD(bb_list);
- ret = subprog_append_bb(bb_list, subprog_start);
+ ret = subprog_append_bb(allocator, bb_list, subprog_start);
if (ret < 0)
return ret;
@@ -154,14 +262,15 @@ static struct bb_node *search_bb_with_head(struct list_head *bb_list, int head)
return NULL;
}
-int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list)
+int subprog_add_bb_edges(struct cfg_node_allocator *allocator,
+ struct bpf_insn *insns, struct list_head *bb_list)
{
struct bb_node *bb, *exit_bb;
struct edge_node *edge;
int bb_num;
bb = entry_bb(bb_list);
- edge = kcalloc(2, sizeof(*edge), GFP_KERNEL);
+ edge = get_edge_nodes(allocator, 2);
if (!edge)
return -ENOMEM;
edge->src = bb;
@@ -186,7 +295,7 @@ int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list)
bb->idx = bb_num++;
- edge = kcalloc(2, sizeof(*edge), GFP_KERNEL);
+ edge = get_edge_nodes(allocator, 2);
if (!edge)
return -ENOMEM;
edge->src = bb;
@@ -222,7 +331,7 @@ int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list)
struct bb_node *tgt;
if (!edge) {
- edge = kcalloc(2, sizeof(*edge), GFP_KERNEL);
+ edge = get_edge_nodes(allocator, 2);
if (!edge)
return -ENOMEM;
edge->src = bb;
@@ -741,6 +850,7 @@ int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
}
int subprog_append_callee(struct bpf_verifier_env *env,
+ struct cfg_node_allocator *allocator,
struct list_head *callees_list,
int caller_idx, int off)
{
@@ -755,7 +865,7 @@ int subprog_append_callee(struct bpf_verifier_env *env,
return 0;
}
- new_callee = kzalloc(sizeof(*new_callee), GFP_KERNEL);
+ new_callee = get_single_cedge_node(allocator);
if (!new_callee)
return -ENOMEM;
@@ -766,41 +876,11 @@ int subprog_append_callee(struct bpf_verifier_env *env,
return 0;
}
-static void subprog_free_edge(struct bb_node *bb)
-{
- struct list_head *succs = &bb->e_succs;
- struct edge_node *e, *tmp;
-
- /* prevs and succs are allocated as pair, succs is the start addr. */
- list_for_each_entry_safe(e, tmp, succs, l) {
- list_del(&e->l);
- kfree(e);
- }
-}
-
void subprog_free(struct bpf_subprog_info *subprog, int end_idx)
{
int i = 0;
for (; i <= end_idx; i++) {
- struct list_head *callees = &subprog[i].callees;
- struct list_head *bbs = &subprog[i].bbs;
- struct cedge_node *callee, *tmp_callee;
- struct bb_node *bb, *tmp, *exit;
-
- bb = entry_bb(bbs);
- exit = exit_bb(bbs);
- list_for_each_entry_safe_from(bb, tmp, &exit->l, l) {
- subprog_free_edge(bb);
- list_del(&bb->l);
- kfree(bb);
- }
-
- list_for_each_entry_safe(callee, tmp_callee, callees, l) {
- list_del(&callee->l);
- kfree(callee);
- }
-
if (subprog[i].dtree_avail)
kfree(subprog[i].dtree);
}
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index 577c22c..1868587 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -8,19 +8,32 @@
#ifndef __BPF_CFG_H__
#define __BPF_CFG_H__
+struct cfg_node_allocator {
+ struct list_head pools;
+ struct node_pool *cur_free_pool;
+};
+
int add_subprog(struct bpf_verifier_env *env, int off);
+void cfg_node_allocator_free(struct cfg_node_allocator *allocator);
+int cfg_node_allocator_init(struct cfg_node_allocator *allocator,
+ int bb_num_esti, int cedge_num_esti);
int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
struct bpf_subprog_info *subprog);
int find_subprog(struct bpf_verifier_env *env, int off);
-int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list);
-int subprog_append_bb(struct list_head *bb_list, int head);
+int subprog_add_bb_edges(struct cfg_node_allocator *allocator,
+ struct bpf_insn *insns, struct list_head *bb_list);
+int subprog_append_bb(struct cfg_node_allocator *allocator,
+ struct list_head *bb_list, int head);
int subprog_append_callee(struct bpf_verifier_env *env,
+ struct cfg_node_allocator *allocator,
struct list_head *bb_list, int caller_idx, int off);
int subprog_build_dom_info(struct bpf_verifier_env *env,
struct bpf_subprog_info *subprog);
-int subprog_fini_bb(struct list_head *bb_list, int subprog_end);
+int subprog_fini_bb(struct cfg_node_allocator *allocator,
+ struct list_head *bb_list, int subprog_end);
bool subprog_has_loop(struct bpf_subprog_info *subprog);
-int subprog_init_bb(struct list_head *bb_list, int subprog_start);
+int subprog_init_bb(struct cfg_node_allocator *allocator,
+ struct list_head *bb_list, int subprog_start);
void subprog_free(struct bpf_subprog_info *subprog, int end_idx);
#endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 338ebc5..5a5016d 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -768,7 +768,10 @@ static int check_subprogs(struct bpf_verifier_env *env)
struct bpf_subprog_info *subprog = env->subprog_info;
struct list_head *cur_bb_list, *cur_callee_list;
struct bpf_insn *insn = env->prog->insnsi;
+ int cedge_num_esti = 0, bb_num_esti = 3;
+ struct cfg_node_allocator allocator;
int insn_cnt = env->prog->len;
+ u8 code;
/* Add entry function. */
ret = add_subprog(env, 0);
@@ -777,8 +780,18 @@ static int check_subprogs(struct bpf_verifier_env *env)
/* determine subprog starts. The end is one before the next starts */
for (i = 0; i < insn_cnt; i++) {
- if (insn[i].code != (BPF_JMP | BPF_CALL))
+ code = insn[i].code;
+ if (BPF_CLASS(code) != BPF_JMP)
+ continue;
+ if (BPF_OP(code) == BPF_EXIT) {
+ if (i + 1 < insn_cnt)
+ bb_num_esti++;
continue;
+ }
+ if (BPF_OP(code) != BPF_CALL) {
+ bb_num_esti += 2;
+ continue;
+ }
if (insn[i].src_reg != BPF_PSEUDO_CALL)
continue;
if (!env->allow_ptr_leaks) {
@@ -789,6 +802,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
verbose(env, "function calls in offloaded programs are not supported yet\n");
return -EINVAL;
}
+ cedge_num_esti++;
ret = add_subprog(env, i + insn[i].imm + 1);
if (ret < 0)
return ret;
@@ -809,10 +823,14 @@ static int check_subprogs(struct bpf_verifier_env *env)
INIT_LIST_HEAD(&subprog[i].callees);
}
+ ret = cfg_node_allocator_init(&allocator, bb_num_esti,
+ cedge_num_esti);
+ if (ret < 0)
+ return ret;
subprog_start = subprog[cur_subprog].start;
subprog_end = subprog[cur_subprog + 1].start;
cur_bb_list = &subprog[cur_subprog].bbs;
- ret = subprog_init_bb(cur_bb_list, subprog_start);
+ ret = subprog_init_bb(&allocator, cur_bb_list, subprog_start);
if (ret < 0)
goto free_nodes;
cur_callee_list = &env->subprog_info[cur_subprog].callees;
@@ -825,7 +843,8 @@ static int check_subprogs(struct bpf_verifier_env *env)
if (BPF_OP(code) == BPF_EXIT) {
if (i + 1 < subprog_end) {
- ret = subprog_append_bb(cur_bb_list, i + 1);
+ ret = subprog_append_bb(&allocator, cur_bb_list,
+ i + 1);
if (ret < 0)
goto free_nodes;
}
@@ -837,6 +856,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
int target = i + insn[i].imm + 1;
ret = subprog_append_callee(env,
+ &allocator,
cur_callee_list,
cur_subprog,
target);
@@ -860,12 +880,12 @@ static int check_subprogs(struct bpf_verifier_env *env)
goto free_nodes;
}
- ret = subprog_append_bb(cur_bb_list, off);
+ ret = subprog_append_bb(&allocator, cur_bb_list, off);
if (ret < 0)
goto free_nodes;
if (i + 1 < subprog_end) {
- ret = subprog_append_bb(cur_bb_list, i + 1);
+ ret = subprog_append_bb(&allocator, cur_bb_list, i + 1);
if (ret < 0)
goto free_nodes;
}
@@ -889,10 +909,12 @@ static int check_subprogs(struct bpf_verifier_env *env)
goto free_nodes;
}
- ret = subprog_fini_bb(cur_bb_list, subprog_end);
+ ret = subprog_fini_bb(&allocator, cur_bb_list,
+ subprog_end);
if (ret < 0)
goto free_nodes;
- ret = subprog_add_bb_edges(insn, cur_bb_list);
+ ret = subprog_add_bb_edges(&allocator, insn,
+ cur_bb_list);
if (ret < 0)
goto free_nodes;
subprog[cur_subprog].bb_num = ret;
@@ -910,7 +932,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
if (cur_subprog < env->subprog_cnt) {
subprog_end = subprog[cur_subprog + 1].start;
cur_bb_list = &subprog[cur_subprog].bbs;
- ret = subprog_init_bb(cur_bb_list,
+ ret = subprog_init_bb(&allocator, cur_bb_list,
subprog_start);
if (ret < 0)
goto free_nodes;
@@ -926,6 +948,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
ret = 0;
free_nodes:
+ cfg_node_allocator_free(&allocator);
subprog_free(subprog, cur_subprog == env->subprog_cnt ?
cur_subprog - 1 : cur_subprog);
return ret;
^ permalink raw reply related
* [RFC PATCH 08/16] bpf: cfg: remove push_insn and check_cfg
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
As we have detected loop and unreachable insns based on domination
information and call graph, there is no need of check_cfg.
This patch removes check_cfg and it's associated push_insn.
state prune heuristic marking as moved to check_subprog.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
---
kernel/bpf/verifier.c | 228 +++----------------------------------------------
1 file changed, 16 insertions(+), 212 deletions(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ddba8ad..338ebc5 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -760,6 +760,8 @@ enum reg_arg_type {
DST_OP_NO_MARK /* same as above, check only, don't mark */
};
+#define STATE_LIST_MARK ((struct bpf_verifier_state_list *)-1L)
+
static int check_subprogs(struct bpf_verifier_env *env)
{
int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
@@ -840,8 +842,14 @@ static int check_subprogs(struct bpf_verifier_env *env)
target);
if (ret < 0)
return ret;
+
+ env->explored_states[target] = STATE_LIST_MARK;
+ env->explored_states[i] = STATE_LIST_MARK;
}
+ if (i + 1 < insn_cnt)
+ env->explored_states[i + 1] = STATE_LIST_MARK;
+
goto next;
}
@@ -861,6 +869,13 @@ static int check_subprogs(struct bpf_verifier_env *env)
if (ret < 0)
goto free_nodes;
}
+
+ if (BPF_OP(code) != BPF_JA) {
+ env->explored_states[i] = STATE_LIST_MARK;
+ env->explored_states[off] = STATE_LIST_MARK;
+ } else if (i + 1 < insn_cnt) {
+ env->explored_states[i + 1] = STATE_LIST_MARK;
+ }
next:
if (i == subprog_end - 1) {
/* to avoid fall-through from one subprog into another
@@ -4093,217 +4108,6 @@ static int check_return_code(struct bpf_verifier_env *env)
return 0;
}
-/* non-recursive DFS pseudo code
- * 1 procedure DFS-iterative(G,v):
- * 2 label v as discovered
- * 3 let S be a stack
- * 4 S.push(v)
- * 5 while S is not empty
- * 6 t <- S.pop()
- * 7 if t is what we're looking for:
- * 8 return t
- * 9 for all edges e in G.adjacentEdges(t) do
- * 10 if edge e is already labelled
- * 11 continue with the next edge
- * 12 w <- G.adjacentVertex(t,e)
- * 13 if vertex w is not discovered and not explored
- * 14 label e as tree-edge
- * 15 label w as discovered
- * 16 S.push(w)
- * 17 continue at 5
- * 18 else if vertex w is discovered
- * 19 label e as back-edge
- * 20 else
- * 21 // vertex w is explored
- * 22 label e as forward- or cross-edge
- * 23 label t as explored
- * 24 S.pop()
- *
- * convention:
- * 0x10 - discovered
- * 0x11 - discovered and fall-through edge labelled
- * 0x12 - discovered and fall-through and branch edges labelled
- * 0x20 - explored
- */
-
-enum {
- DISCOVERED = 0x10,
- EXPLORED = 0x20,
- FALLTHROUGH = 1,
- BRANCH = 2,
-};
-
-#define STATE_LIST_MARK ((struct bpf_verifier_state_list *) -1L)
-
-static int *insn_stack; /* stack of insns to process */
-static int cur_stack; /* current stack index */
-static int *insn_state;
-
-/* t, w, e - match pseudo-code above:
- * t - index of current instruction
- * w - next instruction
- * e - edge
- */
-static int push_insn(int t, int w, int e, struct bpf_verifier_env *env)
-{
- if (e == FALLTHROUGH && insn_state[t] >= (DISCOVERED | FALLTHROUGH))
- return 0;
-
- if (e == BRANCH && insn_state[t] >= (DISCOVERED | BRANCH))
- return 0;
-
- if (w < 0 || w >= env->prog->len) {
- verbose(env, "jump out of range from insn %d to %d\n", t, w);
- return -EINVAL;
- }
-
- if (e == BRANCH)
- /* mark branch target for state pruning */
- env->explored_states[w] = STATE_LIST_MARK;
-
- if (insn_state[w] == 0) {
- /* tree-edge */
- insn_state[t] = DISCOVERED | e;
- insn_state[w] = DISCOVERED;
- if (cur_stack >= env->prog->len)
- return -E2BIG;
- insn_stack[cur_stack++] = w;
- return 1;
- } else if ((insn_state[w] & 0xF0) == DISCOVERED) {
- verbose(env, "back-edge from insn %d to %d\n", t, w);
- return -EINVAL;
- } else if (insn_state[w] == EXPLORED) {
- /* forward- or cross-edge */
- insn_state[t] = DISCOVERED | e;
- } else {
- verbose(env, "insn state internal bug\n");
- return -EFAULT;
- }
- return 0;
-}
-
-/* non-recursive depth-first-search to detect loops in BPF program
- * loop == back-edge in directed graph
- */
-static int check_cfg(struct bpf_verifier_env *env)
-{
- struct bpf_insn *insns = env->prog->insnsi;
- int insn_cnt = env->prog->len;
- int ret = 0;
- int i, t;
-
- ret = check_subprogs(env);
- if (ret < 0)
- return ret;
-
- insn_state = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
- if (!insn_state)
- return -ENOMEM;
-
- insn_stack = kcalloc(insn_cnt, sizeof(int), GFP_KERNEL);
- if (!insn_stack) {
- kfree(insn_state);
- return -ENOMEM;
- }
-
- insn_state[0] = DISCOVERED; /* mark 1st insn as discovered */
- insn_stack[0] = 0; /* 0 is the first instruction */
- cur_stack = 1;
-
-peek_stack:
- if (cur_stack == 0)
- goto check_state;
- t = insn_stack[cur_stack - 1];
-
- if (BPF_CLASS(insns[t].code) == BPF_JMP) {
- u8 opcode = BPF_OP(insns[t].code);
-
- if (opcode == BPF_EXIT) {
- goto mark_explored;
- } else if (opcode == BPF_CALL) {
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- if (t + 1 < insn_cnt)
- env->explored_states[t + 1] = STATE_LIST_MARK;
- if (insns[t].src_reg == BPF_PSEUDO_CALL) {
- env->explored_states[t] = STATE_LIST_MARK;
- ret = push_insn(t, t + insns[t].imm + 1, BRANCH, env);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- }
- } else if (opcode == BPF_JA) {
- if (BPF_SRC(insns[t].code) != BPF_K) {
- ret = -EINVAL;
- goto err_free;
- }
- /* unconditional jump with single edge */
- ret = push_insn(t, t + insns[t].off + 1,
- FALLTHROUGH, env);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- /* tell verifier to check for equivalent states
- * after every call and jump
- */
- if (t + 1 < insn_cnt)
- env->explored_states[t + 1] = STATE_LIST_MARK;
- } else {
- /* conditional jump with two edges */
- env->explored_states[t] = STATE_LIST_MARK;
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
-
- ret = push_insn(t, t + insns[t].off + 1, BRANCH, env);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- }
- } else {
- /* all other non-branch instructions with single
- * fall-through edge
- */
- ret = push_insn(t, t + 1, FALLTHROUGH, env);
- if (ret == 1)
- goto peek_stack;
- else if (ret < 0)
- goto err_free;
- }
-
-mark_explored:
- insn_state[t] = EXPLORED;
- if (cur_stack-- <= 0) {
- verbose(env, "pop stack internal bug\n");
- ret = -EFAULT;
- goto err_free;
- }
- goto peek_stack;
-
-check_state:
- for (i = 0; i < insn_cnt; i++) {
- if (insn_state[i] != EXPLORED) {
- verbose(env, "unreachable insn %d\n", i);
- ret = -EINVAL;
- goto err_free;
- }
- }
- ret = 0; /* cfg looks good */
-
-err_free:
- kfree(insn_state);
- kfree(insn_stack);
- return ret;
-}
-
/* check %cur's range satisfies %old's */
static bool range_within(struct bpf_reg_state *old,
struct bpf_reg_state *cur)
@@ -5914,7 +5718,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr)
env->allow_ptr_leaks = capable(CAP_SYS_ADMIN);
- ret = check_cfg(env);
+ ret = check_subprogs(env);
if (ret < 0)
goto skip_full_check;
^ permalink raw reply related
* [RFC PATCH 07/16] bpf: cfg: build call graph and detect unreachable/recursive call
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
This patch build call graph during insn scan inside check_subprogs.
Then do recursive and unreachable subprog detection using call graph.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
include/linux/bpf_verifier.h | 1
kernel/bpf/cfg.c | 133 ++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/cfg.h | 4 +
kernel/bpf/verifier.c | 32 +++++++++-
4 files changed, 166 insertions(+), 4 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 0eb838d..83ccbc4 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -177,6 +177,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
+ struct list_head callees; /* callees list. */
struct list_head bbs; /* basic blocks list. */
u32 bb_num; /* total basic block num. */
unsigned long *dtree;
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index 5175aa7..56c08e8 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -13,6 +13,12 @@
#include "cfg.h"
+struct cedge_node {
+ struct list_head l;
+ u8 caller_idx;
+ u8 callee_idx;
+};
+
struct edge_node {
struct list_head l;
struct bb_node *src;
@@ -640,6 +646,126 @@ int add_subprog(struct bpf_verifier_env *env, int off)
return 0;
}
+struct callee_iter {
+ struct list_head *list_head;
+ struct cedge_node *callee;
+};
+
+#define first_callee(c_list) list_first_entry(c_list, struct cedge_node, l)
+#define next_callee(c) list_next_entry(c, l)
+
+static bool ci_end_p(struct callee_iter *ci)
+{
+ return &ci->callee->l == ci->list_head;
+}
+
+static void ci_next(struct callee_iter *ci)
+{
+ struct cedge_node *c = ci->callee;
+
+ ci->callee = next_callee(c);
+}
+
+#define EXPLORING 1
+#define EXPLORED 2
+int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
+ struct bpf_subprog_info *subprog)
+{
+ struct callee_iter *stack;
+ struct cedge_node *callee;
+ int sp = 0, idx = 0, ret;
+ struct callee_iter ci;
+ int *status;
+
+ stack = kmalloc_array(128, sizeof(struct callee_iter), GFP_KERNEL);
+ if (!stack)
+ return -ENOMEM;
+ status = kcalloc(env->subprog_cnt, sizeof(int), GFP_KERNEL);
+ if (!status) {
+ kfree(stack);
+ return -ENOMEM;
+ }
+ ci.callee = first_callee(&subprog->callees);
+ ci.list_head = &subprog->callees;
+ status[0] = EXPLORING;
+
+ while (1) {
+ while (!ci_end_p(&ci)) {
+ callee = ci.callee;
+ idx = callee->callee_idx;
+ if (status[idx] == EXPLORING) {
+ bpf_verifier_log_write(env, "cgraph - recursive call\n");
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ status[idx] = EXPLORING;
+
+ if (sp == 127) {
+ bpf_verifier_log_write(env, "cgraph - call frame too deep\n");
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ stack[sp++] = ci;
+ ci.callee = first_callee(&subprog[idx].callees);
+ ci.list_head = &subprog[idx].callees;
+ }
+
+ if (!list_empty(ci.list_head))
+ status[first_callee(ci.list_head)->caller_idx] =
+ EXPLORED;
+ else
+ /* leaf func. */
+ status[idx] = EXPLORED;
+
+ if (!sp)
+ break;
+
+ ci = stack[--sp];
+ ci_next(&ci);
+ }
+
+ for (idx = 0; idx < env->subprog_cnt; idx++)
+ if (status[idx] != EXPLORED) {
+ bpf_verifier_log_write(env, "cgraph - unreachable subprog\n");
+ ret = -EINVAL;
+ goto err_free;
+ }
+
+ ret = 0;
+err_free:
+ kfree(status);
+ kfree(stack);
+ return ret;
+}
+
+int subprog_append_callee(struct bpf_verifier_env *env,
+ struct list_head *callees_list,
+ int caller_idx, int off)
+{
+ int callee_idx = find_subprog(env, off);
+ struct cedge_node *new_callee, *callee;
+
+ if (callee_idx < 0)
+ return callee_idx;
+
+ list_for_each_entry(callee, callees_list, l) {
+ if (callee->callee_idx == callee_idx)
+ return 0;
+ }
+
+ new_callee = kzalloc(sizeof(*new_callee), GFP_KERNEL);
+ if (!new_callee)
+ return -ENOMEM;
+
+ new_callee->caller_idx = caller_idx;
+ new_callee->callee_idx = callee_idx;
+ list_add_tail(&new_callee->l, callees_list);
+
+ return 0;
+}
+
static void subprog_free_edge(struct bb_node *bb)
{
struct list_head *succs = &bb->e_succs;
@@ -657,7 +783,9 @@ void subprog_free(struct bpf_subprog_info *subprog, int end_idx)
int i = 0;
for (; i <= end_idx; i++) {
+ struct list_head *callees = &subprog[i].callees;
struct list_head *bbs = &subprog[i].bbs;
+ struct cedge_node *callee, *tmp_callee;
struct bb_node *bb, *tmp, *exit;
bb = entry_bb(bbs);
@@ -668,6 +796,11 @@ void subprog_free(struct bpf_subprog_info *subprog, int end_idx)
kfree(bb);
}
+ list_for_each_entry_safe(callee, tmp_callee, callees, l) {
+ list_del(&callee->l);
+ kfree(callee);
+ }
+
if (subprog[i].dtree_avail)
kfree(subprog[i].dtree);
}
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index 57eab9b..577c22c 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -9,9 +9,13 @@
#define __BPF_CFG_H__
int add_subprog(struct bpf_verifier_env *env, int off);
+int cgraph_check_recursive_unreachable(struct bpf_verifier_env *env,
+ struct bpf_subprog_info *subprog);
int find_subprog(struct bpf_verifier_env *env, int off);
int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list);
int subprog_append_bb(struct list_head *bb_list, int head);
+int subprog_append_callee(struct bpf_verifier_env *env,
+ struct list_head *bb_list, int caller_idx, int off);
int subprog_build_dom_info(struct bpf_verifier_env *env,
struct bpf_subprog_info *subprog);
int subprog_fini_bb(struct list_head *bb_list, int subprog_end);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 782dd17..ddba8ad 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -764,9 +764,9 @@ static int check_subprogs(struct bpf_verifier_env *env)
{
int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
struct bpf_subprog_info *subprog = env->subprog_info;
+ struct list_head *cur_bb_list, *cur_callee_list;
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
- struct list_head *cur_bb_list;
/* Add entry function. */
ret = add_subprog(env, 0);
@@ -797,16 +797,23 @@ static int check_subprogs(struct bpf_verifier_env *env)
*/
subprog[env->subprog_cnt].start = insn_cnt;
- if (env->log.level > 1)
- for (i = 0; i < env->subprog_cnt; i++)
+ for (i = 0; i < env->subprog_cnt; i++) {
+ if (env->log.level > 1)
verbose(env, "func#%d @%d\n", i, subprog[i].start);
+ /* Don't init callees inside add_subprog which will sort the
+ * array which breaks list link.
+ */
+ INIT_LIST_HEAD(&subprog[i].callees);
+ }
+
subprog_start = subprog[cur_subprog].start;
subprog_end = subprog[cur_subprog + 1].start;
cur_bb_list = &subprog[cur_subprog].bbs;
ret = subprog_init_bb(cur_bb_list, subprog_start);
if (ret < 0)
goto free_nodes;
+ cur_callee_list = &env->subprog_info[cur_subprog].callees;
/* now check that all jumps are within the same subprog */
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
@@ -823,8 +830,20 @@ static int check_subprogs(struct bpf_verifier_env *env)
goto next;
}
- if (BPF_OP(code) == BPF_CALL)
+ if (BPF_OP(code) == BPF_CALL) {
+ if (insn[i].src_reg == BPF_PSEUDO_CALL) {
+ int target = i + insn[i].imm + 1;
+
+ ret = subprog_append_callee(env,
+ cur_callee_list,
+ cur_subprog,
+ target);
+ if (ret < 0)
+ return ret;
+ }
+
goto next;
+ }
off = i + insn[i].off + 1;
if (off < subprog_start || off >= subprog_end) {
@@ -880,10 +899,15 @@ static int check_subprogs(struct bpf_verifier_env *env)
subprog_start);
if (ret < 0)
goto free_nodes;
+ cur_callee_list = &subprog[cur_subprog].callees;
}
}
}
+ ret = cgraph_check_recursive_unreachable(env, env->subprog_info);
+ if (ret < 0)
+ goto free_nodes;
+
ret = 0;
free_nodes:
^ permalink raw reply related
* [RFC PATCH 06/16] bpf: cfg: move find_subprog/add_subprog to cfg.c
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
This patch centre find_subprog and add_subprog to cfg.c.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/cfg.c | 41 +++++++++++++++++++++++++++++++++++++++++
kernel/bpf/cfg.h | 2 ++
kernel/bpf/verifier.c | 42 ------------------------------------------
3 files changed, 43 insertions(+), 42 deletions(-)
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index 2f0ac00..5175aa7 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -6,8 +6,10 @@
*/
#include <linux/bpf_verifier.h>
+#include <linux/bsearch.h>
#include <linux/list.h>
#include <linux/slab.h>
+#include <linux/sort.h>
#include "cfg.h"
@@ -599,6 +601,45 @@ bool subprog_has_loop(struct bpf_subprog_info *subprog)
return false;
}
+static int cmp_subprogs(const void *a, const void *b)
+{
+ return ((struct bpf_subprog_info *)a)->start -
+ ((struct bpf_subprog_info *)b)->start;
+}
+
+int find_subprog(struct bpf_verifier_env *env, int off)
+{
+ struct bpf_subprog_info *p;
+
+ p = bsearch(&off, env->subprog_info, env->subprog_cnt,
+ sizeof(env->subprog_info[0]), cmp_subprogs);
+ if (!p)
+ return -ENOENT;
+ return p - env->subprog_info;
+}
+
+int add_subprog(struct bpf_verifier_env *env, int off)
+{
+ int insn_cnt = env->prog->len;
+ int ret;
+
+ if (off >= insn_cnt || off < 0) {
+ bpf_verifier_log_write(env, "call to invalid destination\n");
+ return -EINVAL;
+ }
+ ret = find_subprog(env, off);
+ if (ret >= 0)
+ return 0;
+ if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
+ bpf_verifier_log_write(env, "too many subprograms\n");
+ return -E2BIG;
+ }
+ env->subprog_info[env->subprog_cnt++].start = off;
+ sort(env->subprog_info, env->subprog_cnt,
+ sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
+ return 0;
+}
+
static void subprog_free_edge(struct bb_node *bb)
{
struct list_head *succs = &bb->e_succs;
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index 02729a9..57eab9b 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -8,6 +8,8 @@
#ifndef __BPF_CFG_H__
#define __BPF_CFG_H__
+int add_subprog(struct bpf_verifier_env *env, int off);
+int find_subprog(struct bpf_verifier_env *env, int off);
int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list);
int subprog_append_bb(struct list_head *bb_list, int head);
int subprog_build_dom_info(struct bpf_verifier_env *env,
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 29797d2..782dd17 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -20,8 +20,6 @@
#include <linux/file.h>
#include <linux/vmalloc.h>
#include <linux/stringify.h>
-#include <linux/bsearch.h>
-#include <linux/sort.h>
#include <linux/perf_event.h>
#include "cfg.h"
@@ -762,46 +760,6 @@ enum reg_arg_type {
DST_OP_NO_MARK /* same as above, check only, don't mark */
};
-static int cmp_subprogs(const void *a, const void *b)
-{
- return ((struct bpf_subprog_info *)a)->start -
- ((struct bpf_subprog_info *)b)->start;
-}
-
-static int find_subprog(struct bpf_verifier_env *env, int off)
-{
- struct bpf_subprog_info *p;
-
- p = bsearch(&off, env->subprog_info, env->subprog_cnt,
- sizeof(env->subprog_info[0]), cmp_subprogs);
- if (!p)
- return -ENOENT;
- return p - env->subprog_info;
-
-}
-
-static int add_subprog(struct bpf_verifier_env *env, int off)
-{
- int insn_cnt = env->prog->len;
- int ret;
-
- if (off >= insn_cnt || off < 0) {
- verbose(env, "call to invalid destination\n");
- return -EINVAL;
- }
- ret = find_subprog(env, off);
- if (ret >= 0)
- return 0;
- if (env->subprog_cnt >= BPF_MAX_SUBPROGS) {
- verbose(env, "too many subprograms\n");
- return -E2BIG;
- }
- env->subprog_info[env->subprog_cnt++].start = off;
- sort(env->subprog_info, env->subprog_cnt,
- sizeof(env->subprog_info[0]), cmp_subprogs, NULL);
- return 0;
-}
-
static int check_subprogs(struct bpf_verifier_env *env)
{
int i, ret, subprog_start, subprog_end, off, cur_subprog = 0;
^ permalink raw reply related
* [RFC PATCH 05/16] bpf: cfg: detect unreachable basic blocks
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
Do unreachable basic blocks detection as a side-product of the dfs walk
when building domination information.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/cfg.c | 19 ++++++++++++++-----
kernel/bpf/cfg.h | 3 ++-
kernel/bpf/verifier.c | 3 ++-
3 files changed, 18 insertions(+), 7 deletions(-)
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index 90692e4..2f0ac00 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -407,10 +407,11 @@ static void calc_idoms(struct bpf_subprog_info *subprog, struct dom_info *di,
di->idom[idx] = di->idom[di->idom[idx]];
}
-static int calc_dfs_tree(struct bpf_subprog_info *subprog, struct dom_info *di,
- bool reverse)
+static int
+calc_dfs_tree(struct bpf_verifier_env *env, struct bpf_subprog_info *subprog,
+ struct dom_info *di, bool reverse)
{
- u16 bb_num = subprog->bb_num, sp = 0, idx, parent_idx;
+ u16 bb_num = subprog->bb_num, sp = 0, idx, parent_idx, i;
struct list_head *bb_list = &subprog->bbs;
u16 entry_bb_fake_idx = bb_num - 2;
struct bb_node *entry_bb, *exit_bb;
@@ -490,6 +491,13 @@ static int calc_dfs_tree(struct bpf_subprog_info *subprog, struct dom_info *di,
kfree(stack);
+ for (i = 0; i < bb_num - 2; i++) {
+ if (!di->dfs_order[i]) {
+ bpf_verifier_log_write(env, "cfg - unreachable insn\n");
+ return -EINVAL;
+ }
+ }
+
return 0;
}
@@ -541,7 +549,8 @@ static int idoms_to_doms(struct bpf_subprog_info *subprog, struct dom_info *di)
* The implementation also referenced GNU GCC 3.0.
*/
-int subprog_build_dom_info(struct bpf_subprog_info *subprog)
+int subprog_build_dom_info(struct bpf_verifier_env *env,
+ struct bpf_subprog_info *subprog)
{
struct dom_info di;
int ret;
@@ -550,7 +559,7 @@ int subprog_build_dom_info(struct bpf_subprog_info *subprog)
if (ret < 0)
goto free_dominfo;
- ret = calc_dfs_tree(subprog, &di, false);
+ ret = calc_dfs_tree(env, subprog, &di, false);
if (ret < 0)
goto free_dominfo;
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index c02c4cf..02729a9 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -10,7 +10,8 @@
int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list);
int subprog_append_bb(struct list_head *bb_list, int head);
-int subprog_build_dom_info(struct bpf_subprog_info *subprog);
+int subprog_build_dom_info(struct bpf_verifier_env *env,
+ struct bpf_subprog_info *subprog);
int subprog_fini_bb(struct list_head *bb_list, int subprog_end);
bool subprog_has_loop(struct bpf_subprog_info *subprog);
int subprog_init_bb(struct list_head *bb_list, int subprog_start);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c349c45..29797d2 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -904,7 +904,8 @@ static int check_subprogs(struct bpf_verifier_env *env)
if (ret < 0)
goto free_nodes;
subprog[cur_subprog].bb_num = ret;
- ret = subprog_build_dom_info(&subprog[cur_subprog]);
+ ret = subprog_build_dom_info(env,
+ &subprog[cur_subprog]);
if (ret < 0)
goto free_nodes;
if (subprog_has_loop(&subprog[cur_subprog])) {
^ permalink raw reply related
* [RFC PATCH 04/16] bpf: cfg: detect loop use domination information
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
If one bb is dominating its predecessor, then there is loop.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/cfg.c | 22 ++++++++++++++++++++++
kernel/bpf/cfg.h | 1 +
kernel/bpf/verifier.c | 8 ++++++++
3 files changed, 31 insertions(+)
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index b50937a..90692e4 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -568,6 +568,28 @@ int subprog_build_dom_info(struct bpf_subprog_info *subprog)
return ret;
}
+bool subprog_has_loop(struct bpf_subprog_info *subprog)
+{
+ int lane_len = BITS_TO_LONGS(subprog->bb_num - 2);
+ struct list_head *bb_list = &subprog->bbs;
+ struct bb_node *bb, *entry_bb;
+ struct edge_node *e;
+
+ entry_bb = entry_bb(bb_list);
+ bb = bb_next(entry_bb);
+ list_for_each_entry_from(bb, &exit_bb(bb_list)->l, l)
+ list_for_each_entry(e, &bb->e_prevs, l) {
+ struct bb_node *latch = e->src;
+
+ if (latch != entry_bb &&
+ test_bit(bb->idx,
+ subprog->dtree + latch->idx * lane_len))
+ return true;
+ }
+
+ return false;
+}
+
static void subprog_free_edge(struct bb_node *bb)
{
struct list_head *succs = &bb->e_succs;
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index cbb44f2..c02c4cf 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -12,6 +12,7 @@
int subprog_append_bb(struct list_head *bb_list, int head);
int subprog_build_dom_info(struct bpf_subprog_info *subprog);
int subprog_fini_bb(struct list_head *bb_list, int subprog_end);
+bool subprog_has_loop(struct bpf_subprog_info *subprog);
int subprog_init_bb(struct list_head *bb_list, int subprog_start);
void subprog_free(struct bpf_subprog_info *subprog, int end_idx);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index eccaee4..c349c45 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -904,6 +904,14 @@ static int check_subprogs(struct bpf_verifier_env *env)
if (ret < 0)
goto free_nodes;
subprog[cur_subprog].bb_num = ret;
+ ret = subprog_build_dom_info(&subprog[cur_subprog]);
+ if (ret < 0)
+ goto free_nodes;
+ if (subprog_has_loop(&subprog[cur_subprog])) {
+ verbose(env, "cfg - loop detected");
+ ret = -EINVAL;
+ goto free_nodes;
+ }
subprog_start = subprog_end;
cur_subprog++;
if (cur_subprog < env->subprog_cnt) {
^ permalink raw reply related
* [RFC PATCH 03/16] bpf: cfg: build domination tree using Tarjan algorithm
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
- When building domination information, we need to some array data
structure, and need to index into them using basic block index.
Calculate the basic block index during adding edges.
- The Step-1 in Tarjan algorithm is to assign dfs order number for
each bb in the cfg. We need to iterate on all bbs in reverse dfs order.
- Step-2 and Step-3 in Tarjan algorithm to calculate semi-dominators and
immediate-dominators implicitly and explicitly.
- Step-4, we add a new bitmap field inside subprog_info to keep the
dominator tree there.
- Post-domination information is built but not tested.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
include/linux/bpf_verifier.h | 3
kernel/bpf/cfg.c | 386 ++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/cfg.h | 3
kernel/bpf/verifier.c | 5 -
4 files changed, 393 insertions(+), 4 deletions(-)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index c70faf5..0eb838d 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -178,6 +178,9 @@ struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
struct list_head bbs; /* basic blocks list. */
+ u32 bb_num; /* total basic block num. */
+ unsigned long *dtree;
+ bool dtree_avail;
};
/* single container for all structs
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index 208aac7..b50937a 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -17,11 +17,33 @@ struct edge_node {
struct bb_node *dst;
};
+struct edge_iter {
+ struct list_head *list_head;
+ struct edge_node *edge;
+};
+
+#define first_edge(e_list) list_first_entry(e_list, struct edge_node, l)
+#define last_edge(e_list) list_last_entry(e_list, struct edge_node, l)
+#define next_edge(e) list_next_entry(e, l)
+
+static bool ei_end_p(struct edge_iter *ei)
+{
+ return &ei->edge->l == ei->list_head;
+}
+
+static void ei_next(struct edge_iter *ei)
+{
+ struct edge_node *e = ei->edge;
+
+ ei->edge = next_edge(e);
+}
+
struct bb_node {
struct list_head l;
struct list_head e_prevs;
struct list_head e_succs;
u16 head;
+ u16 idx;
};
#define bb_prev(bb) list_prev_entry(bb, l)
@@ -31,6 +53,27 @@ struct bb_node {
#define entry_bb(bb_list) bb_first(bb_list)
#define exit_bb(bb_list) bb_last(bb_list)
+struct dom_info {
+ u16 *dfs_parent;
+ u16 *dfs_order;
+ struct bb_node **dfs_to_bb;
+ /* immediate-dominator */
+ u16 *idom;
+ /* semi-dominator */
+ u16 *sdom;
+ u16 *bucket;
+ u16 *next_bucket;
+ /* best node during path compression. */
+ u16 *best;
+ /* ancestor along tree edge. */
+ u16 *ancestor;
+ /* size and child are used for tree balancing. */
+ u16 *size;
+ u16 *child;
+
+ u16 dfsnum;
+};
+
int subprog_append_bb(struct list_head *bb_list, int head)
{
struct bb_node *new_bb, *bb;
@@ -107,6 +150,7 @@ int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list)
{
struct bb_node *bb, *exit_bb;
struct edge_node *edge;
+ int bb_num;
bb = entry_bb(bb_list);
edge = kcalloc(2, sizeof(*edge), GFP_KERNEL);
@@ -118,9 +162,12 @@ int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list)
edge[1].src = edge->src;
edge[1].dst = edge->dst;
list_add_tail(&edge[1].l, &edge[1].dst->e_prevs);
+ bb->idx = -1;
exit_bb = exit_bb(bb_list);
+ exit_bb->idx = -2;
bb = bb_next(bb);
+ bb_num = 0;
list_for_each_entry_from(bb, &exit_bb->l, l) {
bool has_fallthrough, only_has_fallthrough;
bool has_branch, only_has_branch;
@@ -129,6 +176,8 @@ int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list)
struct bpf_insn insn;
u8 code;
+ bb->idx = bb_num++;
+
edge = kcalloc(2, sizeof(*edge), GFP_KERNEL);
if (!edge)
return -ENOMEM;
@@ -184,9 +233,341 @@ int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list)
}
}
+ return bb_num + 2;
+}
+
+static int init_dom_info(struct bpf_subprog_info *subprog, struct dom_info *di)
+{
+ u16 *p, bb_num, i;
+
+ di->dfs_parent = NULL;
+ di->dfs_to_bb = NULL;
+
+ bb_num = subprog->bb_num;
+ p = kcalloc(10 * bb_num, sizeof(*p), GFP_KERNEL);
+ if (!p)
+ return -ENOMEM;
+
+ di->dfs_parent = p;
+ di->dfs_order = di->dfs_parent + bb_num;
+ di->idom = di->dfs_order + bb_num;
+ di->sdom = di->idom + bb_num;
+ di->bucket = di->sdom + bb_num;
+ di->next_bucket = di->bucket + bb_num;
+ di->best = di->next_bucket + bb_num;
+ di->ancestor = di->best + bb_num;
+ di->size = di->ancestor + bb_num;
+ di->child = di->size + bb_num;
+ di->dfs_to_bb = kcalloc(bb_num, sizeof(struct bb_node *), GFP_KERNEL);
+ di->dfsnum = 1;
+
+ for (i = 0; i < bb_num; i++) {
+ di->size[i] = 1;
+ di->best[i] = i;
+ di->sdom[i] = i;
+ }
+
return 0;
}
+static void compress_path(struct dom_info *di, unsigned int v)
+{
+ unsigned int parent = di->ancestor[v];
+
+ if (di->ancestor[parent]) {
+ compress_path(di, parent);
+
+ if (di->sdom[di->best[parent]] < di->sdom[di->best[v]])
+ di->best[v] = di->best[parent];
+
+ di->ancestor[v] = di->ancestor[parent];
+ }
+}
+
+static unsigned int eval(struct dom_info *di, unsigned int v)
+{
+ unsigned int ancestor = di->ancestor[v];
+
+ /* v is root. */
+ if (!ancestor)
+ return di->best[v];
+
+ /* compress path */
+ compress_path(di, v);
+ ancestor = di->ancestor[v];
+
+ if (di->sdom[di->best[ancestor]] >= di->sdom[di->best[v]])
+ return di->best[v];
+ else
+ return di->best[ancestor];
+}
+
+/* Re-balancing the tree before linking. */
+static void link(struct dom_info *di, unsigned int v, unsigned int w)
+{
+ unsigned int s = w;
+
+ while (di->sdom[di->best[w]] < di->sdom[di->best[di->child[s]]]) {
+ if (di->size[s] + di->size[di->child[di->child[s]]] >=
+ 2 * di->size[di->child[s]]) {
+ di->ancestor[di->child[s]] = s;
+ di->child[s] = di->child[di->child[s]];
+ } else {
+ di->size[di->child[s]] = di->size[s];
+ di->ancestor[s] = di->child[s];
+ s = di->child[s];
+ }
+ }
+
+ di->best[s] = di->best[w];
+ di->size[v] += di->size[w];
+ if (di->size[v] < 2 * di->size[w]) {
+ unsigned int t = s;
+
+ s = di->child[v];
+ di->child[v] = t;
+ }
+
+ while (s) {
+ di->ancestor[s] = v;
+ s = di->child[s];
+ }
+}
+
+static void calc_idoms(struct bpf_subprog_info *subprog, struct dom_info *di,
+ bool reverse)
+{
+ u16 entry_bb_fake_idx = subprog->bb_num - 2, idx, w, k, par;
+ struct list_head *bb_list = &subprog->bbs;
+ struct bb_node *entry_bb;
+ struct edge_iter ei;
+
+ if (reverse)
+ entry_bb = exit_bb(bb_list);
+ else
+ entry_bb = entry_bb(bb_list);
+ idx = di->dfsnum - 1;
+
+ while (idx > 1) {
+ struct bb_node *bb = di->dfs_to_bb[idx];
+ struct edge_node *e;
+
+ par = di->dfs_parent[idx];
+ k = idx;
+
+ if (reverse) {
+ ei.edge = first_edge(&bb->e_succs);
+ ei.list_head = &bb->e_succs;
+ } else {
+ ei.edge = first_edge(&bb->e_prevs);
+ ei.list_head = &bb->e_prevs;
+ }
+
+ while (!ei_end_p(&ei)) {
+ struct bb_node *b;
+ u16 k1;
+
+ e = ei.edge;
+ if (reverse)
+ b = e->dst;
+ else
+ b = e->src;
+ ei_next(&ei);
+
+ if (b == entry_bb)
+ k1 = di->dfs_order[entry_bb_fake_idx];
+ else
+ k1 = di->dfs_order[b->idx];
+
+ if (k1 > idx)
+ k1 = di->sdom[eval(di, k1)];
+ if (k1 < k)
+ k = k1;
+ }
+
+ di->sdom[idx] = k;
+ link(di, par, idx);
+ di->next_bucket[idx] = di->bucket[k];
+ di->bucket[k] = idx;
+
+ for (w = di->bucket[par]; w; w = di->next_bucket[w]) {
+ k = eval(di, w);
+ if (di->sdom[k] < di->sdom[w])
+ di->idom[w] = k;
+ else
+ di->idom[w] = par;
+ }
+ di->bucket[par] = 0;
+ idx--;
+ }
+
+ di->idom[1] = 0;
+ for (idx = 2; idx <= di->dfsnum - 1; idx++)
+ if (di->idom[idx] != di->sdom[idx])
+ di->idom[idx] = di->idom[di->idom[idx]];
+}
+
+static int calc_dfs_tree(struct bpf_subprog_info *subprog, struct dom_info *di,
+ bool reverse)
+{
+ u16 bb_num = subprog->bb_num, sp = 0, idx, parent_idx;
+ struct list_head *bb_list = &subprog->bbs;
+ u16 entry_bb_fake_idx = bb_num - 2;
+ struct bb_node *entry_bb, *exit_bb;
+ struct edge_iter ei, *stack;
+ struct edge_node *e;
+
+ di->dfs_order[entry_bb_fake_idx] = di->dfsnum;
+
+ stack = kmalloc_array(bb_num - 1, sizeof(struct edge_iter), GFP_KERNEL);
+ if (!stack)
+ return -ENOMEM;
+
+ if (reverse) {
+ entry_bb = exit_bb(bb_list);
+ exit_bb = entry_bb(bb_list);
+ di->dfs_to_bb[di->dfsnum++] = exit_bb;
+ ei.edge = first_edge(&entry_bb->e_prevs);
+ ei.list_head = &entry_bb->e_prevs;
+ } else {
+ entry_bb = entry_bb(bb_list);
+ exit_bb = exit_bb(bb_list);
+ di->dfs_to_bb[di->dfsnum++] = entry_bb;
+ ei.edge = first_edge(&entry_bb->e_succs);
+ ei.list_head = &entry_bb->e_succs;
+ }
+
+ while (1) {
+ struct bb_node *bb_dst, *bb_src;
+
+ while (!ei_end_p(&ei)) {
+ e = ei.edge;
+
+ if (reverse) {
+ bb_dst = e->src;
+ if (bb_dst == exit_bb ||
+ di->dfs_order[bb_dst->idx]) {
+ ei_next(&ei);
+ continue;
+ }
+ bb_src = e->dst;
+ } else {
+ bb_dst = e->dst;
+ if (bb_dst == exit_bb ||
+ di->dfs_order[bb_dst->idx]) {
+ ei_next(&ei);
+ continue;
+ }
+ bb_src = e->src;
+ }
+
+ if (bb_src != entry_bb)
+ parent_idx = di->dfs_order[bb_src->idx];
+ else
+ parent_idx = di->dfs_order[entry_bb_fake_idx];
+
+ idx = di->dfsnum++;
+ di->dfs_order[bb_dst->idx] = idx;
+ di->dfs_to_bb[idx] = bb_dst;
+ di->dfs_parent[idx] = parent_idx;
+
+ stack[sp++] = ei;
+ if (reverse) {
+ ei.edge = first_edge(&bb_dst->e_prevs);
+ ei.list_head = &bb_dst->e_prevs;
+ } else {
+ ei.edge = first_edge(&bb_dst->e_succs);
+ ei.list_head = &bb_dst->e_succs;
+ }
+ }
+
+ if (!sp)
+ break;
+
+ ei = stack[--sp];
+ ei_next(&ei);
+ }
+
+ kfree(stack);
+
+ return 0;
+}
+
+static int idoms_to_doms(struct bpf_subprog_info *subprog, struct dom_info *di)
+{
+ int bb_num, i, end_index, bb, bb_idom, lane_len;
+ unsigned long *bitmap;
+
+ bb_num = subprog->bb_num - 2;
+ lane_len = BITS_TO_LONGS(bb_num);
+ bitmap = kcalloc(bb_num, lane_len * sizeof(long), GFP_KERNEL);
+ subprog->dtree = bitmap;
+ if (!subprog->dtree)
+ return -ENOMEM;
+ subprog->dtree_avail = true;
+ end_index = di->dfs_order[bb_num];
+
+ for (i = 1; i <= di->dfsnum - 1; i++) {
+ if (i == end_index)
+ continue;
+
+ bb = di->dfs_to_bb[i]->idx;
+ if (di->idom[i] && di->idom[i] != end_index) {
+ bb_idom = di->dfs_to_bb[di->idom[i]]->idx;
+ bitmap_copy(bitmap + bb * lane_len,
+ bitmap + bb_idom * lane_len, bb_num);
+ }
+
+ bitmap_set(bitmap + bb * lane_len, bb, 1);
+ }
+
+ return 0;
+}
+
+/* Build domination information using Lengauer and Tarjan algorithm.
+ *
+ * 1. dfs on cfg to assign dfs-num for each node(bb).
+ * 2. calculate semi-dominator and calculate immediate dominator when
+ * possible.
+ * 3. calculate immediate dominator not finished during step 2.
+ * 4. build domination bitmap using immediate dominator information.
+ *
+ * See:
+ * A fast algorithm for finding dominators in a flowgraph.
+ * - 1979, by T Lengauer and R Tarjan
+ *
+ * Especially, Appendix B: The Complete Dominators Algorithms.
+ *
+ * The implementation also referenced GNU GCC 3.0.
+ */
+
+int subprog_build_dom_info(struct bpf_subprog_info *subprog)
+{
+ struct dom_info di;
+ int ret;
+
+ ret = init_dom_info(subprog, &di);
+ if (ret < 0)
+ goto free_dominfo;
+
+ ret = calc_dfs_tree(subprog, &di, false);
+ if (ret < 0)
+ goto free_dominfo;
+
+ calc_idoms(subprog, &di, false);
+ ret = idoms_to_doms(subprog, &di);
+ if (ret < 0)
+ goto free_dominfo;
+
+ ret = 0;
+
+free_dominfo:
+ if (di.dfs_parent)
+ kfree(di.dfs_parent);
+
+ return ret;
+}
+
static void subprog_free_edge(struct bb_node *bb)
{
struct list_head *succs = &bb->e_succs;
@@ -199,7 +580,7 @@ static void subprog_free_edge(struct bb_node *bb)
}
}
-void subprog_free_bb(struct bpf_subprog_info *subprog, int end_idx)
+void subprog_free(struct bpf_subprog_info *subprog, int end_idx)
{
int i = 0;
@@ -214,5 +595,8 @@ void subprog_free_bb(struct bpf_subprog_info *subprog, int end_idx)
list_del(&bb->l);
kfree(bb);
}
+
+ if (subprog[i].dtree_avail)
+ kfree(subprog[i].dtree);
}
}
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index 7a9d5dd..cbb44f2 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -10,8 +10,9 @@
int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list);
int subprog_append_bb(struct list_head *bb_list, int head);
+int subprog_build_dom_info(struct bpf_subprog_info *subprog);
int subprog_fini_bb(struct list_head *bb_list, int subprog_end);
int subprog_init_bb(struct list_head *bb_list, int subprog_start);
-void subprog_free_bb(struct bpf_subprog_info *subprog, int end_idx);
+void subprog_free(struct bpf_subprog_info *subprog, int end_idx);
#endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index bcac7c8..eccaee4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -903,6 +903,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
ret = subprog_add_bb_edges(insn, cur_bb_list);
if (ret < 0)
goto free_nodes;
+ subprog[cur_subprog].bb_num = ret;
subprog_start = subprog_end;
cur_subprog++;
if (cur_subprog < env->subprog_cnt) {
@@ -919,8 +920,8 @@ static int check_subprogs(struct bpf_verifier_env *env)
ret = 0;
free_nodes:
- subprog_free_bb(subprog, cur_subprog == env->subprog_cnt ?
- cur_subprog - 1 : cur_subprog);
+ subprog_free(subprog, cur_subprog == env->subprog_cnt ?
+ cur_subprog - 1 : cur_subprog);
return ret;
}
^ permalink raw reply related
* [RFC PATCH 02/16] bpf: cfg: add edges between basic blocks to form CFG
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
This patch add edges between basic blocks. Both edges for predecessors and
successors are added.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
kernel/bpf/cfg.c | 129 ++++++++++++++++++++++++++++++++++++++++++++++++-
kernel/bpf/cfg.h | 1
kernel/bpf/verifier.c | 3 +
3 files changed, 131 insertions(+), 2 deletions(-)
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
index b1af714..208aac7 100644
--- a/kernel/bpf/cfg.c
+++ b/kernel/bpf/cfg.c
@@ -11,8 +11,16 @@
#include "cfg.h"
+struct edge_node {
+ struct list_head l;
+ struct bb_node *src;
+ struct bb_node *dst;
+};
+
struct bb_node {
struct list_head l;
+ struct list_head e_prevs;
+ struct list_head e_succs;
u16 head;
};
@@ -39,6 +47,8 @@ int subprog_append_bb(struct list_head *bb_list, int head)
if (!new_bb)
return -ENOMEM;
+ INIT_LIST_HEAD(&new_bb->e_prevs);
+ INIT_LIST_HEAD(&new_bb->e_succs);
new_bb->head = head;
list_add(&new_bb->l, &bb->l);
@@ -53,6 +63,8 @@ int subprog_fini_bb(struct list_head *bb_list, int subprog_end)
return -ENOMEM;
/* entry bb. */
bb->head = -1;
+ INIT_LIST_HEAD(&bb->e_prevs);
+ INIT_LIST_HEAD(&bb->e_succs);
list_add(&bb->l, bb_list);
bb = kzalloc(sizeof(*bb), GFP_KERNEL);
@@ -60,6 +72,8 @@ int subprog_fini_bb(struct list_head *bb_list, int subprog_end)
return -ENOMEM;
/* exit bb. */
bb->head = subprog_end;
+ INIT_LIST_HEAD(&bb->e_prevs);
+ INIT_LIST_HEAD(&bb->e_succs);
list_add_tail(&bb->l, bb_list);
return 0;
@@ -77,15 +91,126 @@ int subprog_init_bb(struct list_head *bb_list, int subprog_start)
return 0;
}
+static struct bb_node *search_bb_with_head(struct list_head *bb_list, int head)
+{
+ struct bb_node *bb;
+
+ list_for_each_entry(bb, bb_list, l) {
+ if (bb->head == head)
+ return bb;
+ }
+
+ return NULL;
+}
+
+int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list)
+{
+ struct bb_node *bb, *exit_bb;
+ struct edge_node *edge;
+
+ bb = entry_bb(bb_list);
+ edge = kcalloc(2, sizeof(*edge), GFP_KERNEL);
+ if (!edge)
+ return -ENOMEM;
+ edge->src = bb;
+ edge->dst = bb_next(bb);
+ list_add_tail(&edge->l, &bb->e_succs);
+ edge[1].src = edge->src;
+ edge[1].dst = edge->dst;
+ list_add_tail(&edge[1].l, &edge[1].dst->e_prevs);
+
+ exit_bb = exit_bb(bb_list);
+ bb = bb_next(bb);
+ list_for_each_entry_from(bb, &exit_bb->l, l) {
+ bool has_fallthrough, only_has_fallthrough;
+ bool has_branch, only_has_branch;
+ struct bb_node *next_bb = bb_next(bb);
+ int tail = next_bb->head - 1;
+ struct bpf_insn insn;
+ u8 code;
+
+ edge = kcalloc(2, sizeof(*edge), GFP_KERNEL);
+ if (!edge)
+ return -ENOMEM;
+ edge->src = bb;
+ edge[1].src = bb;
+
+ insn = insns[tail];
+ code = insn.code;
+ only_has_fallthrough = BPF_CLASS(code) != BPF_JMP ||
+ BPF_OP(code) == BPF_EXIT;
+ has_fallthrough = only_has_fallthrough ||
+ (BPF_CLASS(code) == BPF_JMP &&
+ BPF_OP(code) != BPF_CALL &&
+ BPF_OP(code) != BPF_JA);
+ only_has_branch = BPF_CLASS(code) == BPF_JMP &&
+ BPF_OP(code) == BPF_JA;
+ has_branch = only_has_branch ||
+ (BPF_CLASS(code) == BPF_JMP &&
+ BPF_OP(code) != BPF_EXIT &&
+ BPF_OP(code) != BPF_CALL);
+
+ if (has_fallthrough) {
+ if (BPF_CLASS(code) == BPF_JMP &&
+ BPF_OP(code) == BPF_EXIT)
+ next_bb = exit_bb;
+ edge->dst = next_bb;
+ edge[1].dst = next_bb;
+ list_add_tail(&edge->l, &bb->e_succs);
+ list_add_tail(&edge[1].l, &edge[1].dst->e_prevs);
+ edge = NULL;
+ }
+
+ if (has_branch) {
+ struct bb_node *tgt;
+
+ if (!edge) {
+ edge = kcalloc(2, sizeof(*edge), GFP_KERNEL);
+ if (!edge)
+ return -ENOMEM;
+ edge->src = bb;
+ edge[1].src = bb;
+ }
+
+ tgt = search_bb_with_head(bb_list,
+ tail + insn.off + 1);
+ if (!tgt)
+ return -EINVAL;
+
+ edge->dst = tgt;
+ edge[1].dst = tgt;
+ list_add_tail(&edge->l, &bb->e_succs);
+ list_add_tail(&edge[1].l, &tgt->e_prevs);
+ }
+ }
+
+ return 0;
+}
+
+static void subprog_free_edge(struct bb_node *bb)
+{
+ struct list_head *succs = &bb->e_succs;
+ struct edge_node *e, *tmp;
+
+ /* prevs and succs are allocated as pair, succs is the start addr. */
+ list_for_each_entry_safe(e, tmp, succs, l) {
+ list_del(&e->l);
+ kfree(e);
+ }
+}
+
void subprog_free_bb(struct bpf_subprog_info *subprog, int end_idx)
{
int i = 0;
for (; i <= end_idx; i++) {
struct list_head *bbs = &subprog[i].bbs;
- struct bb_node *bb, *tmp;
+ struct bb_node *bb, *tmp, *exit;
- list_for_each_entry_safe(bb, tmp, bbs, l) {
+ bb = entry_bb(bbs);
+ exit = exit_bb(bbs);
+ list_for_each_entry_safe_from(bb, tmp, &exit->l, l) {
+ subprog_free_edge(bb);
list_del(&bb->l);
kfree(bb);
}
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
index 4092145..7a9d5dd 100644
--- a/kernel/bpf/cfg.h
+++ b/kernel/bpf/cfg.h
@@ -8,6 +8,7 @@
#ifndef __BPF_CFG_H__
#define __BPF_CFG_H__
+int subprog_add_bb_edges(struct bpf_insn *insns, struct list_head *bb_list);
int subprog_append_bb(struct list_head *bb_list, int head);
int subprog_fini_bb(struct list_head *bb_list, int subprog_end);
int subprog_init_bb(struct list_head *bb_list, int subprog_start);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 523e440..bcac7c8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -900,6 +900,9 @@ static int check_subprogs(struct bpf_verifier_env *env)
ret = subprog_fini_bb(cur_bb_list, subprog_end);
if (ret < 0)
goto free_nodes;
+ ret = subprog_add_bb_edges(insn, cur_bb_list);
+ if (ret < 0)
+ goto free_nodes;
subprog_start = subprog_end;
cur_subprog++;
if (cur_subprog < env->subprog_cnt) {
^ permalink raw reply related
* [RFC PATCH 01/16] bpf: cfg: partition basic blocks for each subprog
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
In-Reply-To: <20180601092646.15353.28269.stgit@john-Precision-Tower-5810>
From: Jiong Wang <jiong.wang@netronome.com>
"check_subprogs" has partitioned subprogs and is doing insn scan for each
subprog already, this patch extends it to also partition basic blocks (BB)
for each subprog.
- The first insn for each subprog start a BB.
- Branch (both cond and absolute) target start a BB.
- Insn immediately follows branch insn start a BB.
Insn immediately follows exit and within subprog start a BB.
BBs for each subprog are organized as a list in ascending head.
Two special BBs, entry and exit are added as well.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
---
include/linux/bpf_verifier.h | 1
kernel/bpf/Makefile | 2 -
kernel/bpf/cfg.c | 93 ++++++++++++++++++++++++++++++++++++++++++
kernel/bpf/cfg.h | 16 +++++++
kernel/bpf/verifier.c | 57 +++++++++++++++++++++++---
5 files changed, 162 insertions(+), 7 deletions(-)
create mode 100644 kernel/bpf/cfg.c
create mode 100644 kernel/bpf/cfg.h
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 38b04f5..c70faf5 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -177,6 +177,7 @@ static inline bool bpf_verifier_log_needed(const struct bpf_verifier_log *log)
struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u16 stack_depth; /* max. stack depth used by this function */
+ struct list_head bbs; /* basic blocks list. */
};
/* single container for all structs
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f27f549..3ee9036 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
obj-y := core.o
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o cfg.o
obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
obj-$(CONFIG_BPF_SYSCALL) += disasm.o
obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/cfg.c b/kernel/bpf/cfg.c
new file mode 100644
index 0000000..b1af714
--- /dev/null
+++ b/kernel/bpf/cfg.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#include <linux/bpf_verifier.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+
+#include "cfg.h"
+
+struct bb_node {
+ struct list_head l;
+ u16 head;
+};
+
+#define bb_prev(bb) list_prev_entry(bb, l)
+#define bb_next(bb) list_next_entry(bb, l)
+#define bb_first(bb_list) list_first_entry(bb_list, struct bb_node, l)
+#define bb_last(bb_list) list_last_entry(bb_list, struct bb_node, l)
+#define entry_bb(bb_list) bb_first(bb_list)
+#define exit_bb(bb_list) bb_last(bb_list)
+
+int subprog_append_bb(struct list_head *bb_list, int head)
+{
+ struct bb_node *new_bb, *bb;
+
+ list_for_each_entry(bb, bb_list, l) {
+ if (bb->head == head)
+ return 0;
+ else if (bb->head > head)
+ break;
+ }
+
+ bb = bb_prev(bb);
+ new_bb = kzalloc(sizeof(*new_bb), GFP_KERNEL);
+ if (!new_bb)
+ return -ENOMEM;
+
+ new_bb->head = head;
+ list_add(&new_bb->l, &bb->l);
+
+ return 0;
+}
+
+int subprog_fini_bb(struct list_head *bb_list, int subprog_end)
+{
+ struct bb_node *bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+
+ if (!bb)
+ return -ENOMEM;
+ /* entry bb. */
+ bb->head = -1;
+ list_add(&bb->l, bb_list);
+
+ bb = kzalloc(sizeof(*bb), GFP_KERNEL);
+ if (!bb)
+ return -ENOMEM;
+ /* exit bb. */
+ bb->head = subprog_end;
+ list_add_tail(&bb->l, bb_list);
+
+ return 0;
+}
+
+int subprog_init_bb(struct list_head *bb_list, int subprog_start)
+{
+ int ret;
+
+ INIT_LIST_HEAD(bb_list);
+ ret = subprog_append_bb(bb_list, subprog_start);
+ if (ret < 0)
+ return ret;
+
+ return 0;
+}
+
+void subprog_free_bb(struct bpf_subprog_info *subprog, int end_idx)
+{
+ int i = 0;
+
+ for (; i <= end_idx; i++) {
+ struct list_head *bbs = &subprog[i].bbs;
+ struct bb_node *bb, *tmp;
+
+ list_for_each_entry_safe(bb, tmp, bbs, l) {
+ list_del(&bb->l);
+ kfree(bb);
+ }
+ }
+}
diff --git a/kernel/bpf/cfg.h b/kernel/bpf/cfg.h
new file mode 100644
index 0000000..4092145
--- /dev/null
+++ b/kernel/bpf/cfg.h
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (C) 2018 Netronome Systems, Inc. */
+/* This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+
+#ifndef __BPF_CFG_H__
+#define __BPF_CFG_H__
+
+int subprog_append_bb(struct list_head *bb_list, int head);
+int subprog_fini_bb(struct list_head *bb_list, int subprog_end);
+int subprog_init_bb(struct list_head *bb_list, int subprog_start);
+void subprog_free_bb(struct bpf_subprog_info *subprog, int end_idx);
+
+#endif
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1fd9667b..523e440 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -24,6 +24,7 @@
#include <linux/sort.h>
#include <linux/perf_event.h>
+#include "cfg.h"
#include "disasm.h"
static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
@@ -807,6 +808,7 @@ static int check_subprogs(struct bpf_verifier_env *env)
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
int insn_cnt = env->prog->len;
+ struct list_head *cur_bb_list;
/* Add entry function. */
ret = add_subprog(env, 0);
@@ -841,20 +843,46 @@ static int check_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < env->subprog_cnt; i++)
verbose(env, "func#%d @%d\n", i, subprog[i].start);
- /* now check that all jumps are within the same subprog */
subprog_start = subprog[cur_subprog].start;
subprog_end = subprog[cur_subprog + 1].start;
+ cur_bb_list = &subprog[cur_subprog].bbs;
+ ret = subprog_init_bb(cur_bb_list, subprog_start);
+ if (ret < 0)
+ goto free_nodes;
+ /* now check that all jumps are within the same subprog */
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
if (BPF_CLASS(code) != BPF_JMP)
goto next;
- if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
+
+ if (BPF_OP(code) == BPF_EXIT) {
+ if (i + 1 < subprog_end) {
+ ret = subprog_append_bb(cur_bb_list, i + 1);
+ if (ret < 0)
+ goto free_nodes;
+ }
+ goto next;
+ }
+
+ if (BPF_OP(code) == BPF_CALL)
goto next;
+
off = i + insn[i].off + 1;
if (off < subprog_start || off >= subprog_end) {
verbose(env, "jump out of range from insn %d to %d\n", i, off);
- return -EINVAL;
+ ret = -EINVAL;
+ goto free_nodes;
+ }
+
+ ret = subprog_append_bb(cur_bb_list, off);
+ if (ret < 0)
+ goto free_nodes;
+
+ if (i + 1 < subprog_end) {
+ ret = subprog_append_bb(cur_bb_list, i + 1);
+ if (ret < 0)
+ goto free_nodes;
}
next:
if (i == subprog_end - 1) {
@@ -865,15 +893,32 @@ static int check_subprogs(struct bpf_verifier_env *env)
if (code != (BPF_JMP | BPF_EXIT) &&
code != (BPF_JMP | BPF_JA)) {
verbose(env, "last insn is not an exit or jmp\n");
- return -EINVAL;
+ ret = -EINVAL;
+ goto free_nodes;
}
+
+ ret = subprog_fini_bb(cur_bb_list, subprog_end);
+ if (ret < 0)
+ goto free_nodes;
subprog_start = subprog_end;
cur_subprog++;
- if (cur_subprog < env->subprog_cnt)
+ if (cur_subprog < env->subprog_cnt) {
subprog_end = subprog[cur_subprog + 1].start;
+ cur_bb_list = &subprog[cur_subprog].bbs;
+ ret = subprog_init_bb(cur_bb_list,
+ subprog_start);
+ if (ret < 0)
+ goto free_nodes;
+ }
}
}
- return 0;
+
+ ret = 0;
+
+free_nodes:
+ subprog_free_bb(subprog, cur_subprog == env->subprog_cnt ?
+ cur_subprog - 1 : cur_subprog);
+ return ret;
}
static
^ permalink raw reply related
* [RFC PATCH 00/16] bpf, bounded loop support work in progress
From: John Fastabend @ 2018-06-01 9:32 UTC (permalink / raw)
To: alexei.starovoitov, daniel, davem; +Cc: netdev
This series is an early preview of ongoing work to support loops
inside BPF verifier. The code is rough in spots (more rough the
further you get into the series) but we have some signs of life!
The following code runs and is verified,
SEC("classifier_tc_loop1")
int _tc_loop(struct __sk_buff *ctx)
{
void *data = (void *)(unsigned long)ctx->data;
void *data_end = (void *)(unsigned long)ctx->data_end;
__u8 i = 0, j = 0, k = 0, *p;
if (data + 2 > data_end)
return TC_ACT_OK;
p = data;
j = p[0];
if (data + 101 > data_end)
return TC_ACT_OK;
if (j < 100)
return TC_ACT_OK;
#pragma nounroll
while (i < j) {
k += p[i];
i++;
}
ctx->mark = k;
return TC_ACT_OK;
}
perhaps even more important many invalid loops are caught and
rejected. There are still a few cases that need to be handled
but largely things are working. And lots of code cleanup,
polishing and improvements sill needed but worth sharing
I think. Even if no one reads the code line by line the commit
messages have the description.
The series is broken into three parts.
Part1: Build the basic infrastruce. Basic Blocks, CFG and DOM are
built. (work done by Jiong).
Part2: Verify loops are bounded and encourage the state pruning
logic to prune as many states as possible.
Part3: Tests. Largely tbd at this point although I've been
doing lots of manual poking around with good/bad/ugly C programs.
Some high level details around design decisions for each part but
please see patch descriptions for more.
First Part1 Jiong chose to use Tarjan algorithm to build DOM.
(We also have an iterative solution if that is prefered but in
theory at least Tarjan has better running time.) Also we have
calculated the pdom as well but it is currently unused.
For all Part2 gory details please see patch,
"bpf: verifier, add initial support to allow bounded loops"
The high level design is as follows,
i. Use DOM to find loops
ii. Find induction variables in loop
iii. Verify termination by comparing induction variable (inc/dec)
against the JMP op. Propagate min/max values into insn aux
data.
iv. At do_check time ensure induction variable is bounded as
required to ensure loop termination. These are a form of
restriction on valid values for the registers.
v. Encourage state pruning by using known bounds from
previous induction step. e.g. if a scalar increments from
0 to 100 we know min/max values that are vali for every
iteration.
vi. Loop state is usually pruned early but if not we can simply
run the loop costing verifier complexity.
There is still lots of work to finish up all the pieces but we wanted
to share the current design.
Thanks,
John
---
Jiong Wang (11):
bpf: cfg: partition basic blocks for each subprog
bpf: cfg: add edges between basic blocks to form CFG
bpf: cfg: build domination tree using Tarjan algorithm
bpf: cfg: detect loop use domination information
bpf: cfg: detect unreachable basic blocks
bpf: cfg: move find_subprog/add_subprog to cfg.c
bpf: cfg: build call graph and detect unreachable/recursive call
bpf: cfg: remove push_insn and check_cfg
bpf: cfg: reduce k*alloc/free call by using memory pool for allocating nodes
bpf: cfg: reduce memory usage by using singly list + compat pointer
bpf: cfg: detect irreducible loop using Eric Stoltz algorithm
John Fastabend (5):
bpf: cfg: pretty print CFG and DOM
bpf: verifier, can ptr range be calculated with scalar ALU op
bpf: verifier, add initial support to allow bounded loops
bpf: verifier, simple loop examples
bpf: tools: dbg patch to turn on debugging and add primitive examples
include/linux/bpf_verifier.h | 28
kernel/bpf/Makefile | 2
kernel/bpf/cfg.c | 2060 ++++++++++++++++++++++++
kernel/bpf/cfg.h | 56 +
kernel/bpf/verifier.c | 499 +++---
samples/bpf/xdp2skb_meta_kern.c | 57 +
tools/lib/bpf/bpf.c | 2
tools/lib/bpf/libbpf.c | 7
tools/testing/selftests/bpf/Makefile | 5
tools/testing/selftests/bpf/test_bad_loop1.c | 47 +
tools/testing/selftests/bpf/test_bad_loop2.c | 45 +
tools/testing/selftests/bpf/test_bad_loop3.c | 47 +
tools/testing/selftests/bpf/test_basic_loop1.c | 44 +
tools/testing/selftests/bpf/test_basic_loop2.c | 48 +
tools/testing/selftests/bpf/test_basic_loop3.c | 51 +
15 files changed, 2711 insertions(+), 287 deletions(-)
create mode 100644 kernel/bpf/cfg.c
create mode 100644 kernel/bpf/cfg.h
create mode 100644 tools/testing/selftests/bpf/test_bad_loop1.c
create mode 100644 tools/testing/selftests/bpf/test_bad_loop2.c
create mode 100644 tools/testing/selftests/bpf/test_bad_loop3.c
create mode 100644 tools/testing/selftests/bpf/test_basic_loop1.c
create mode 100644 tools/testing/selftests/bpf/test_basic_loop2.c
create mode 100644 tools/testing/selftests/bpf/test_basic_loop3.c
^ permalink raw reply
* Re: [PATCH net-next 1/5] net: aquantia: Ethtool based ring size configuration
From: Igor Russkikh @ 2018-06-01 9:19 UTC (permalink / raw)
To: Jakub Kicinski
Cc: David S . Miller, netdev, David Arcari, Pavel Belous,
Anton Mikaev
In-Reply-To: <20180529120403.3f89a394@cakuba>
>> +
>> + spin_lock(&aq_nic->aq_spinlock);
>> +
>> + if (netif_running(ndev))
>> + dev_close(ndev);
>
> I don't think you can hold a spinlock around dev_close()/dev_open()
> calls.
Thanks Jakub, think you are right, will consider changing this lock to mutex.
>> + if (!netif_running(ndev))
>> + err = dev_open(ndev);
>
> Will this not open the device regardless if it was open before or not?
Correct, thanks!
BR, Igor
^ permalink raw reply
* Re: [PATCH 15/19] net: qualcomm: MODULE_DEVICE_TABLE(serdev)
From: Stefan Wahren @ 2018-06-01 8:59 UTC (permalink / raw)
To: Ricardo Ribalda Delgado, linux-kernel, linux-serial
Cc: Lino Sanfilippo, David S . Miller, Rob Herring, Johan Hovold,
netdev
In-Reply-To: <20180529131014.18641-16-ricardo.ribalda@gmail.com>
Hi Ricardo,
Am 29.05.2018 um 15:10 schrieb Ricardo Ribalda Delgado:
> Export serdev table to the module header, allowing module autoload via
> udev/modprobe.
>
> Cc: Lino Sanfilippo <LinoSanfilippo@gmx.de>
> Cc: David S. Miller <davem@davemloft.net>
> Cc: Stefan Wahren <stefan.wahren@i2se.com>
> Cc: Rob Herring <robh@kernel.org>
> Cc: Johan Hovold <johan@kernel.org>
> Cc: netdev@vger.kernel.org
> Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
> ---
> drivers/net/ethernet/qualcomm/qca_uart.c | 7 +++++++
> 1 file changed, 7 insertions(+)
>
> diff --git a/drivers/net/ethernet/qualcomm/qca_uart.c b/drivers/net/ethernet/qualcomm/qca_uart.c
> index db6068cd7a1f..bb7aed805083 100644
> --- a/drivers/net/ethernet/qualcomm/qca_uart.c
> +++ b/drivers/net/ethernet/qualcomm/qca_uart.c
> @@ -405,6 +405,12 @@ static void qca_uart_remove(struct serdev_device *serdev)
> free_netdev(qca->net_dev);
> }
>
> +static struct serdev_device_id qca_uart_serdev_id[] = {
> + { QCAUART_DRV_NAME, },
i guess the id must be stable, so in case someone changes the driver
name this has unexpected side effects?
Apart from that i'm fine with this patch.
Thanks
Stefan
> + {},
> +};
> +MODULE_DEVICE_TABLE(serdev, qca_uart_serdev_id);
> +
> static struct serdev_device_driver qca_uart_driver = {
> .probe = qca_uart_probe,
> .remove = qca_uart_remove,
> @@ -412,6 +418,7 @@ static struct serdev_device_driver qca_uart_driver = {
> .name = QCAUART_DRV_NAME,
> .of_match_table = of_match_ptr(qca_uart_of_match),
> },
> + .id_table = qca_uart_serdev_id,
> };
>
> module_serdev_device_driver(qca_uart_driver);
^ permalink raw reply
* Re: [PATCH] net: core: improve the tx_hash calculating
From: Sergei Shtylyov @ 2018-06-01 8:58 UTC (permalink / raw)
To: Tonghao Zhang, netdev
In-Reply-To: <1527761641-22034-1-git-send-email-xiangxia.m.yue@gmail.com>
Hello!
On 5/31/2018 1:14 PM, Tonghao Zhang wrote:
> Use the % instead of while, and it may simple code and improve
> the calculating. The real_num_tx_queues has been checked when
> allocating and setting it.
>
> Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
> ---
> net/core/dev.c | 8 +++-----
> 1 file changed, 3 insertions(+), 5 deletions(-)
>
> diff --git a/net/core/dev.c b/net/core/dev.c
> index 1844d9b..edc5b75 100644
> --- a/net/core/dev.c
> +++ b/net/core/dev.c
> @@ -2617,15 +2617,13 @@ void netif_device_attach(struct net_device *dev)
> */
> static u16 skb_tx_hash(const struct net_device *dev, struct sk_buff *skb)
> {
> - u32 hash;
> u16 qoffset = 0;
> u16 qcount = dev->real_num_tx_queues;
>
> if (skb_rx_queue_recorded(skb)) {
> - hash = skb_get_rx_queue(skb);
> - while (unlikely(hash >= qcount))
> - hash -= qcount;
> - return hash;
> + /* When setting the real_num_tx_queues, we make sure
> + * real_num_tx_queues != 0. */
In the networking code, multiline comments should look like:
/* bla
* bla
*/
> + return skb_get_rx_queue(skb) % qcount;
> }
>
> if (dev->num_tc) {
MBR, Sergei
^ permalink raw reply
* Re: linux-next: build failure after merge of the net-next tree
From: Alexei Starovoitov @ 2018-06-01 8:52 UTC (permalink / raw)
To: Stephen Rothwell
Cc: David Miller, Networking, Linux-Next Mailing List,
Linux Kernel Mailing List, Jakub Kicinski, Alexei Starovoitov,
Daniel Borkmann
In-Reply-To: <20180601135943.66b90890@canb.auug.org.au>
On Fri, Jun 01, 2018 at 01:59:43PM +1000, Stephen Rothwell wrote:
> Hi all,
>
> On Thu, 31 May 2018 07:38:55 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> >
> > On Tue, 29 May 2018 13:25:48 +1000 Stephen Rothwell <sfr@canb.auug.org.au> wrote:
> > >
> > > After merging the net-next tree, today's linux-next build (x86_64
> > > allmodconfig) failed like this:
> > >
> > > x86_64-linux-ld: unknown architecture of input file `net/bpfilter/bpfilter_umh.o' is incompatible with i386:x86-64 output
> > >
> > > Caused by commit
> > >
> > > d2ba09c17a06 ("net: add skeleton of bpfilter kernel module")
> > >
> > > In my builds, the host is PowerPC 64 LE ...
> > >
> > > I have reverted that commit along with
> > >
> > > 61a552eb487f ("bpfilter: fix build dependency")
> > > 13405468f49d ("bpfilter: don't pass O_CREAT when opening console for debug")
> > >
> > > for today.
> >
> > I am still getting this failure (well, at least yesterday I did).
>
> Still happened today. My guess is that bpfilter_umh needs to be built
> with the kernel compiler (not the host compiler - since ir is meant to
> run on the some machine as the kernel, right?) but will require the
> kernel architecture libc etc
>
>
> I replaced the reverts above with the following:
>
> From: Stephen Rothwell <sfr@canb.auug.org.au>
> Date: Fri, 1 Jun 2018 13:33:28 +1000
> Subject: [PATCH] net: bpfilter: mark as BROKEN for now
>
> This does not build in a cross compile environment
>
> Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
> ---
> net/bpfilter/Kconfig | 1 +
> 1 file changed, 1 insertion(+)
>
> diff --git a/net/bpfilter/Kconfig b/net/bpfilter/Kconfig
> index a948b072c28f..ea4be72fdf6e 100644
> --- a/net/bpfilter/Kconfig
> +++ b/net/bpfilter/Kconfig
> @@ -2,6 +2,7 @@ menuconfig BPFILTER
> bool "BPF based packet filtering framework (BPFILTER)"
> default n
> depends on NET && BPF && INET
> + depends on BROKEN
yeah. that's another option.
Sorry for delay. networking folks are traveling to netconf.
The issue is being discussed here:
https://patchwork.ozlabs.org/patch/921597/
Currently we're thinking how to add a check that hostcc's arch
is equal to cross-cc target arch and ideally equal to arch
of the host where it will be run.
^ permalink raw reply
* Re: [PATCH bpf 1/2] bpf: fix alignment of netns_dev/netns_ino fields in bpf_{map,prog}_info
From: Alexei Starovoitov @ 2018-06-01 8:41 UTC (permalink / raw)
To: Linus Torvalds, Eugene Syromiatnikov, netdev, linux-kernel,
Martin KaFai Lau, Daniel Borkmann, Alexei Starovoitov,
David S. Miller, Jiri Olsa, Ingo Molnar, Lawrence Brakmo,
Andrey Ignatov, Jakub Kicinski, John Fastabend
In-Reply-To: <20180601031210.GA30533@altlinux.org>
On Fri, Jun 01, 2018 at 06:12:10AM +0300, Dmitry V. Levin wrote:
> Hi,
>
> Looks like the ABI bug in bpf_map_info and bpf_prog info introduced
> in 4.16 is going to slip into 4.17, causing extra pain to 32-bit
> userspace. I'm adding Linus to this thread in hope it might help
> to get a fix applied before 4.17 is released.
The issue identified in patch 1 is valid.
These two fields won't be properly accessible by 32-bit user space
on 64-bit kernel.
But the fix in patch 1 is wrong.
The patch 2 is completely unnecessary.
Everyone can also see that the patches are still marked as 'new' in patchworks
meaning that we didn't process them.
At the moment many networking folks are traveling to netconf
and we only had a chance to discuss this set last night.
We'll try to send a fix in coming days.
But regardless whether 4.17 is realesed this sunday or not
we're not going to rush wrong patch in without proper code review
and discussion.
That future patch either will land in 4.17 (if it's dealyed into next sunday)
or it will be sent to stable.
To speed up the situation next time please report the issue that you find
to public netdev mailing list instead of using proprietary distro emails.
Thanks.
> On Wed, May 30, 2018 at 09:18:58PM +0300, Dmitry V. Levin wrote:
> > On Sun, May 27, 2018 at 01:28:42PM +0200, Eugene Syromiatnikov wrote:
> > > Recent introduction of netns_dev/netns_ino to bpf_map_info/bpf_prog info
> > > has broken compat, as offsets of these fields are different in 32-bit
> > > and 64-bit ABIs. One fix (other than implementing compat support in
> > > syscall in order to handle this discrepancy) is to use __aligned_u64
> > > instead of __u64 for these fields.
> > >
> > > Reported-by: Dmitry V. Levin <ldv@altlinux.org>
> > > Fixes: 52775b33bb507 ("bpf: offload: report device information about
> > > offloaded maps")
> > > Fixes: 675fc275a3a2d ("bpf: offload: report device information for
> > > offloaded programs")
> > >
> > > Signed-off-by: Eugene Syromiatnikov <esyr@redhat.com>
> >
> > Reviewed-by: "Dmitry V. Levin" <ldv@altlinux.org>
> > Cc: <stable@vger.kernel.org> # v4.16+
> >
> > Thanks,
> >
> > > ---
> > > include/uapi/linux/bpf.h | 8 ++++----
> > > tools/include/uapi/linux/bpf.h | 8 ++++----
> > > 2 files changed, 8 insertions(+), 8 deletions(-)
> > >
> > > diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
> > > index c5ec897..903010a 100644
> > > --- a/include/uapi/linux/bpf.h
> > > +++ b/include/uapi/linux/bpf.h
> > > @@ -1017,8 +1017,8 @@ struct bpf_prog_info {
> > > __aligned_u64 map_ids;
> > > char name[BPF_OBJ_NAME_LEN];
> > > __u32 ifindex;
> > > - __u64 netns_dev;
> > > - __u64 netns_ino;
> > > + __aligned_u64 netns_dev;
> > > + __aligned_u64 netns_ino;
> > > } __attribute__((aligned(8)));
> > >
> > > struct bpf_map_info {
> > > @@ -1030,8 +1030,8 @@ struct bpf_map_info {
> > > __u32 map_flags;
> > > char name[BPF_OBJ_NAME_LEN];
> > > __u32 ifindex;
> > > - __u64 netns_dev;
> > > - __u64 netns_ino;
> > > + __aligned_u64 netns_dev;
> > > + __aligned_u64 netns_ino;
> > > } __attribute__((aligned(8)));
> > >
> > > /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
> > > diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
> > > index c5ec897..903010a 100644
> > > --- a/tools/include/uapi/linux/bpf.h
> > > +++ b/tools/include/uapi/linux/bpf.h
> > > @@ -1017,8 +1017,8 @@ struct bpf_prog_info {
> > > __aligned_u64 map_ids;
> > > char name[BPF_OBJ_NAME_LEN];
> > > __u32 ifindex;
> > > - __u64 netns_dev;
> > > - __u64 netns_ino;
> > > + __aligned_u64 netns_dev;
> > > + __aligned_u64 netns_ino;
> > > } __attribute__((aligned(8)));
> > >
> > > struct bpf_map_info {
> > > @@ -1030,8 +1030,8 @@ struct bpf_map_info {
> > > __u32 map_flags;
> > > char name[BPF_OBJ_NAME_LEN];
> > > __u32 ifindex;
> > > - __u64 netns_dev;
> > > - __u64 netns_ino;
> > > + __aligned_u64 netns_dev;
> > > + __aligned_u64 netns_ino;
> > > } __attribute__((aligned(8)));
> > >
> > > /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
>
>
> --
> ldv
^ permalink raw reply
* Re: [PATCH net-next] rtnetlink: Fix null-ptr-deref in rtnl_newlink
From: Eric Dumazet @ 2018-06-01 8:26 UTC (permalink / raw)
To: bhole_prashant_q7
Cc: David Miller, Daniel Borkmann, Alexei Starovoitov, Kirill Tkhai,
Florian Westphal, netdev, Kees Cook
In-Reply-To: <20180601081658.6968-1-bhole_prashant_q7@lab.ntt.co.jp>
On Fri, Jun 1, 2018 at 4:18 AM Prashant Bhole
<bhole_prashant_q7@lab.ntt.co.jp> wrote:
>
> In rtnl_newlink(), NULL check is performed on m_ops however member of
> ops is accessed. Fixed by accessing member of m_ops instead of ops.
>
> [ 345.432629] BUG: KASAN: null-ptr-deref in rtnl_newlink+0x400/0x1110
> [ 345.432629] Read of size 4 at addr 0000000000000088 by task ip/986
> [ 345.432629]
> [ 345.432629] CPU: 1 PID: 986 Comm: ip Not tainted 4.17.0-rc6+ #9
> [ 345.432629] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
> [ 345.432629] Call Trace:
> [ 345.432629] dump_stack+0xc6/0x150
> [ 345.432629] ? dump_stack_print_info.cold.0+0x1b/0x1b
> [ 345.432629] ? kasan_report+0xb4/0x410
> [ 345.432629] kasan_report.cold.4+0x8f/0x91
> [ 345.432629] ? rtnl_newlink+0x400/0x1110
> [ 345.432629] rtnl_newlink+0x400/0x1110
> [...]
>
> Fixes: ccf8dbcd062a ("rtnetlink: Remove VLA usage")
> Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
> ---
> net/core/rtnetlink.c | 2 +-
> 1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
> index 8ca49a0e13fb..9a1ba2015ad8 100644
> --- a/net/core/rtnetlink.c
> +++ b/net/core/rtnetlink.c
> @@ -2936,7 +2936,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
> }
>
> if (m_ops) {
> - if (ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
> + if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
> return -EINVAL;
Oh nice
CC Kees Cook.
^ permalink raw reply
* [bpf PATCH] bpf: sockmap, fix crash when ipv6 sock is added
From: John Fastabend @ 2018-06-01 8:21 UTC (permalink / raw)
To: edumazet, ast, daniel; +Cc: netdev
This fixes a crash where we assign tcp_prot to IPv6 sockets instead
of tcpv6_prot.
Previously we overwrote the sk->prot field with tcp_prot even in the
AF_INET6 case. This patch ensures the correct tcp_prot and tcpv6_prot
are used.
Fixes: 174a79ff9515 ("bpf: sockmap with sk redirect support")
Reported-by: syzbot+5c063698bdbfac19f363@syzkaller.appspotmail.com
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Wei Wang <weiwan@google.com>
---
kernel/bpf/sockmap.c | 15 ++++++++++++++-
1 file changed, 14 insertions(+), 1 deletion(-)
diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c
index 95a84b2..d16b5a3 100644
--- a/kernel/bpf/sockmap.c
+++ b/kernel/bpf/sockmap.c
@@ -41,6 +41,7 @@
#include <linux/mm.h>
#include <net/strparser.h>
#include <net/tcp.h>
+#include <net/transp_v6.h>
#include <linux/ptr_ring.h>
#include <net/inet_common.h>
#include <linux/sched/signal.h>
@@ -134,6 +135,8 @@ static bool bpf_tcp_stream_read(const struct sock *sk)
}
static struct proto tcp_bpf_proto;
+static struct proto tcpv6_bpf_proto;
+
static int bpf_tcp_init(struct sock *sk)
{
struct smap_psock *psock;
@@ -154,13 +157,21 @@ static int bpf_tcp_init(struct sock *sk)
psock->sk_proto = sk->sk_prot;
if (psock->bpf_tx_msg) {
+ tcpv6_bpf_proto.sendmsg = bpf_tcp_sendmsg;
+ tcpv6_bpf_proto.sendpage = bpf_tcp_sendpage;
+ tcpv6_bpf_proto.recvmsg = bpf_tcp_recvmsg;
+ tcpv6_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
tcp_bpf_proto.sendmsg = bpf_tcp_sendmsg;
tcp_bpf_proto.sendpage = bpf_tcp_sendpage;
tcp_bpf_proto.recvmsg = bpf_tcp_recvmsg;
tcp_bpf_proto.stream_memory_read = bpf_tcp_stream_read;
}
- sk->sk_prot = &tcp_bpf_proto;
+ if (sk->sk_family == AF_INET6)
+ sk->sk_prot = &tcpv6_bpf_proto;
+ else
+ sk->sk_prot = &tcp_bpf_proto;
+
rcu_read_unlock();
return 0;
}
@@ -1072,6 +1083,8 @@ static int bpf_tcp_ulp_register(void)
{
tcp_bpf_proto = tcp_prot;
tcp_bpf_proto.close = bpf_tcp_close;
+ tcpv6_bpf_proto = tcpv6_prot;
+ tcpv6_bpf_proto.close = bpf_tcp_close;
/* Once BPF TX ULP is registered it is never unregistered. It
* will be in the ULP list for the lifetime of the system. Doing
* duplicate registers is not a problem.
^ permalink raw reply related
* [PATCH iproute2] devlink: don't enforce NETLINK_{CAP,EXT}_ACK sock opts
From: Ivan Vecera @ 2018-06-01 8:18 UTC (permalink / raw)
To: netdev; +Cc: Arkadi Sharshevsky, Stephen Hemminger
Since commit 049c58539f5d ("devlink: mnlg: Add support for extended ack")
devlink requires NETLINK_{CAP,EXT}_ACK. This prevents devlink from
working with older kernels that don't support these features.
host # ./devlink/devlink
Failed to connect to devlink Netlink
Fixes: 049c58539f5d ("devlink: mnlg: Add support for extended ack")
Cc: Arkadi Sharshevsky <arkadis@mellanox.com>
Cc: Stephen Hemminger <stephen@networkplumber.org>
Signed-off-by: Ivan Vecera <ivecera@redhat.com>
---
devlink/mnlg.c | 14 +++-----------
1 file changed, 3 insertions(+), 11 deletions(-)
diff --git a/devlink/mnlg.c b/devlink/mnlg.c
index 3d28453a..c33c90be 100644
--- a/devlink/mnlg.c
+++ b/devlink/mnlg.c
@@ -271,15 +271,9 @@ struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version)
if (!nlg->nl)
goto err_mnl_socket_open;
- err = mnl_socket_setsockopt(nlg->nl, NETLINK_CAP_ACK, &one,
- sizeof(one));
- if (err)
- goto err_mnl_set_ack;
-
- err = mnl_socket_setsockopt(nlg->nl, NETLINK_EXT_ACK, &one,
- sizeof(one));
- if (err)
- goto err_mnl_set_ext_ack;
+ /* Older kernels may no support capped/extended ACK reporting */
+ mnl_socket_setsockopt(nlg->nl, NETLINK_CAP_ACK, &one, sizeof(one));
+ mnl_socket_setsockopt(nlg->nl, NETLINK_EXT_ACK, &one, sizeof(one));
err = mnl_socket_bind(nlg->nl, 0, MNL_SOCKET_AUTOPID);
if (err < 0)
@@ -305,8 +299,6 @@ struct mnlg_socket *mnlg_socket_open(const char *family_name, uint8_t version)
err_mnlg_socket_recv_run:
err_mnlg_socket_send:
err_mnl_socket_bind:
-err_mnl_set_ext_ack:
-err_mnl_set_ack:
mnl_socket_close(nlg->nl);
err_mnl_socket_open:
free(nlg->buf);
--
2.16.1
^ permalink raw reply related
* [PATCH net-next] rtnetlink: Fix null-ptr-deref in rtnl_newlink
From: Prashant Bhole @ 2018-06-01 8:16 UTC (permalink / raw)
To: David S . Miller, Eric Dumazet
Cc: Prashant Bhole, Daniel Borkmann, Alexei Starovoitov, Kirill Tkhai,
Florian Westphal, netdev
In rtnl_newlink(), NULL check is performed on m_ops however member of
ops is accessed. Fixed by accessing member of m_ops instead of ops.
[ 345.432629] BUG: KASAN: null-ptr-deref in rtnl_newlink+0x400/0x1110
[ 345.432629] Read of size 4 at addr 0000000000000088 by task ip/986
[ 345.432629]
[ 345.432629] CPU: 1 PID: 986 Comm: ip Not tainted 4.17.0-rc6+ #9
[ 345.432629] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014
[ 345.432629] Call Trace:
[ 345.432629] dump_stack+0xc6/0x150
[ 345.432629] ? dump_stack_print_info.cold.0+0x1b/0x1b
[ 345.432629] ? kasan_report+0xb4/0x410
[ 345.432629] kasan_report.cold.4+0x8f/0x91
[ 345.432629] ? rtnl_newlink+0x400/0x1110
[ 345.432629] rtnl_newlink+0x400/0x1110
[...]
Fixes: ccf8dbcd062a ("rtnetlink: Remove VLA usage")
Signed-off-by: Prashant Bhole <bhole_prashant_q7@lab.ntt.co.jp>
---
net/core/rtnetlink.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 8ca49a0e13fb..9a1ba2015ad8 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2936,7 +2936,7 @@ static int rtnl_newlink(struct sk_buff *skb, struct nlmsghdr *nlh,
}
if (m_ops) {
- if (ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
+ if (m_ops->slave_maxtype > RTNL_SLAVE_MAX_TYPE)
return -EINVAL;
if (m_ops->slave_maxtype &&
--
2.17.0
^ permalink raw reply related
* Re: [PATCH net-next] netfilter: nf_tables: check msg_type before nft_trans_set(trans)
From: Pablo Neira Ayuso @ 2018-06-01 8:14 UTC (permalink / raw)
To: Florian Westphal
Cc: Alexey Kodanev, netfilter-devel, Jozsef Kadlecsik, coreteam,
netdev
In-Reply-To: <20180531190731.rrwayqgnnnd2fasn@breakpoint.cc>
On Thu, May 31, 2018 at 09:07:31PM +0200, Florian Westphal wrote:
> Alexey Kodanev <alexey.kodanev@oracle.com> wrote:
> > The patch moves the "trans->msg_type == NFT_MSG_NEWSET" check before
> > using nft_trans_set(trans). Otherwise we can get out of bounds read.
>
> Indeed, thanks for fixining this.
>
> Acked-by: Florian Westphal <fw@strlen.de>
Also applied to nf.git, thanks!
^ permalink raw reply
* Re: [PATCH bpf-next] bpf: prevent non-IPv4 socket to be added into sock hash
From: John Fastabend @ 2018-06-01 7:56 UTC (permalink / raw)
To: Eric Dumazet; +Cc: Wei Wang, netdev, Willem de Bruijn
In-Reply-To: <CANn89iLocj4Gth3R0+a4ppRhYHV=buWSgfEYLg+zJay25D477g@mail.gmail.com>
On 05/31/2018 06:00 PM, Eric Dumazet wrote:
> On Thu, May 31, 2018 at 7:32 PM John Fastabend <john.fastabend@gmail.com> wrote:
>>
>>
>> Hi Wei,
>>
>> Thanks for the report and fix. It would be better to fix the
>> root cause so that IPv6 works as intended.
>>
>> I'm testing the following now,
>>
>> Author: John Fastabend <john.fastabend@gmail.com>
>> Date: Thu May 31 14:38:59 2018 -0700
>>
>> sockmap: fix crash when ipv6 sock is added by adding support for IPv6
>>
>> Apparently we had a testing escape and missed IPv6. This fixes a crash
>> where we assign tcp_prot to IPv6 sockets instead of tcpv6_prot.
>>
>> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
>>
>
> Hi John
>
> In any case, please forward correct attribution for Wei's work, and
> syzbot 'Reported-by'
Will send update with tags in a moment.
>
> Are you sure you are handling IPv4 mapped in IPv6 sockets as well ?
>
No, will look into it. Although I didn't see any code to handle it
in the ./net/tls case either so if there is some issue with this it
could possibly exist in both ULPs. I guess if ipv4 mapped ipv6
changes prot or callbacks then we could stomp on it.
> Thanks.
>
^ permalink raw reply
* Re: [PATCH v2] netfilter: nfnetlink: Remove VLA usage
From: Pablo Neira Ayuso @ 2018-06-01 7:48 UTC (permalink / raw)
To: Kees Cook
Cc: Jozsef Kadlecsik, Florian Westphal, David S. Miller,
netfilter-devel, coreteam, netdev, linux-kernel
In-Reply-To: <20180530191756.GA39504@beast>
On Wed, May 30, 2018 at 12:17:56PM -0700, Kees Cook wrote:
> In the quest to remove all stack VLA usage from the kernel[1], this
> allocates the maximum size expected for all possible attrs and adds
> sanity-checks at both registration and usage to make sure nothing
> gets out of sync.
>
> [1] https://lkml.kernel.org/r/CA+55aFzCG-zNmZwX4A2FQpadafLfEzK6CC=qPXydAacU1RqZWA@mail.gmail.com
Applied, thanks Kees.
^ permalink raw reply
* Re: [PATCH] netfilter: nft_numgen: fix ptr_ret.cocci warnings
From: Pablo Neira Ayuso @ 2018-06-01 7:46 UTC (permalink / raw)
To: Laura Garcia
Cc: kbuild test robot, kbuild-all, Jozsef Kadlecsik, Florian Westphal,
Netfilter Development Mailing list, coreteam, netdev,
linux-kernel
In-Reply-To: <CAF90-WhsJTVcM+N_7n+4Zf_Knqzxc0UPv4i7bAM6Mvqu-GLHCw@mail.gmail.com>
On Thu, May 24, 2018 at 12:40:04PM +0200, Laura Garcia wrote:
> On Wed, May 23, 2018 at 12:58 PM, kbuild test robot
> <fengguang.wu@intel.com> wrote:
> > From: kbuild test robot <fengguang.wu@intel.com>
> >
> > net/netfilter/nft_numgen.c:117:1-3: WARNING: PTR_ERR_OR_ZERO can be used
> >
> >
> > Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR
> >
> > Generated by: scripts/coccinelle/api/ptr_ret.cocci
> >
> > Fixes: d734a2888922 ("netfilter: nft_numgen: add map lookups for numgen statements")
> > CC: Laura Garcia Liebana <nevola@gmail.com>
> > Signed-off-by: kbuild test robot <fengguang.wu@intel.com>
>
> Acked-by: Laura Garcia Liebana <nevola@gmail.com>
Applied, thanks.
^ permalink raw reply
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox