* [PATCH bpf-next 05/11] bpf: avoid retpoline for lookup/update/delete calls on maps
From: Daniel Borkmann @ 2018-05-28 0:43 UTC (permalink / raw)
To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>
While some of the BPF map lookup helpers provide a ->map_gen_lookup()
callback for inlining the map lookup altogether it is not available
for every map, so the remaining ones have to call bpf_map_lookup_elem()
helper which does a dispatch to map->ops->map_lookup_elem(). In
times of retpolines, this will control and trap speculative execution
rather than letting it do its work for the indirect call and will
therefore cause a slowdown. Likewise, bpf_map_update_elem() and
bpf_map_delete_elem() do not have an inlined version and need to call
into their map->ops->map_update_elem() resp. map->ops->map_delete_elem()
handlers.
Before:
# bpftool p d x i 1
0: (bf) r2 = r10
1: (07) r2 += -8
2: (7a) *(u64 *)(r2 +0) = 0
3: (18) r1 = map[id:1]
5: (85) call __htab_map_lookup_elem#232656
6: (15) if r0 == 0x0 goto pc+4
7: (71) r1 = *(u8 *)(r0 +35)
8: (55) if r1 != 0x0 goto pc+1
9: (72) *(u8 *)(r0 +35) = 1
10: (07) r0 += 56
11: (15) if r0 == 0x0 goto pc+4
12: (bf) r2 = r0
13: (18) r1 = map[id:1]
15: (85) call bpf_map_delete_elem#215008 <-- indirect call via
16: (95) exit helper
After:
# bpftool p d x i 1
0: (bf) r2 = r10
1: (07) r2 += -8
2: (7a) *(u64 *)(r2 +0) = 0
3: (18) r1 = map[id:1]
5: (85) call __htab_map_lookup_elem#233328
6: (15) if r0 == 0x0 goto pc+4
7: (71) r1 = *(u8 *)(r0 +35)
8: (55) if r1 != 0x0 goto pc+1
9: (72) *(u8 *)(r0 +35) = 1
10: (07) r0 += 56
11: (15) if r0 == 0x0 goto pc+4
12: (bf) r2 = r0
13: (18) r1 = map[id:1]
15: (85) call htab_lru_map_delete_elem#238240 <-- direct call
16: (95) exit
In all three lookup/update/delete cases however we can use the actual
address of the map callback directly if we find that there's only a
single path with a map pointer leading to the helper call, meaning
when the map pointer has not been poisoned from verifier side.
Example code can be seen above for the delete case.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
include/linux/filter.h | 3 +++
kernel/bpf/hashtab.c | 12 ++++++---
kernel/bpf/verifier.c | 67 +++++++++++++++++++++++++++++++++++++-------------
3 files changed, 62 insertions(+), 20 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index b443f70..d407ede 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -301,6 +301,9 @@ struct xdp_buff;
/* Function call */
+#define BPF_CAST_CALL(x) \
+ ((u64 (*)(u64, u64, u64, u64, u64))(x))
+
#define BPF_EMIT_CALL(FUNC) \
((struct bpf_insn) { \
.code = BPF_JMP | BPF_CALL, \
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index b76828f..3ca2198 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -503,7 +503,9 @@ static u32 htab_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
- *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+ BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+ (void *(*)(struct bpf_map *map, void *key))NULL));
+ *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 1);
*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
offsetof(struct htab_elem, key) +
@@ -530,7 +532,9 @@ static u32 htab_lru_map_gen_lookup(struct bpf_map *map,
const int ret = BPF_REG_0;
const int ref_reg = BPF_REG_1;
- *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+ BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+ (void *(*)(struct bpf_map *map, void *key))NULL));
+ *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 4);
*insn++ = BPF_LDX_MEM(BPF_B, ref_reg, ret,
offsetof(struct htab_elem, lru_node) +
@@ -1369,7 +1373,9 @@ static u32 htab_of_map_gen_lookup(struct bpf_map *map,
struct bpf_insn *insn = insn_buf;
const int ret = BPF_REG_0;
- *insn++ = BPF_EMIT_CALL((u64 (*)(u64, u64, u64, u64, u64))__htab_map_lookup_elem);
+ BUILD_BUG_ON(!__same_type(&__htab_map_lookup_elem,
+ (void *(*)(struct bpf_map *map, void *key))NULL));
+ *insn++ = BPF_EMIT_CALL(BPF_CAST_CALL(__htab_map_lookup_elem));
*insn++ = BPF_JMP_IMM(BPF_JEQ, ret, 0, 2);
*insn++ = BPF_ALU64_IMM(BPF_ADD, ret,
offsetof(struct htab_elem, key) +
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 4f4786e..5684b15 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2421,8 +2421,11 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
struct bpf_insn_aux_data *aux = &env->insn_aux_data[insn_idx];
if (func_id != BPF_FUNC_tail_call &&
- func_id != BPF_FUNC_map_lookup_elem)
+ func_id != BPF_FUNC_map_lookup_elem &&
+ func_id != BPF_FUNC_map_update_elem &&
+ func_id != BPF_FUNC_map_delete_elem)
return 0;
+
if (meta->map_ptr == NULL) {
verbose(env, "kernel subsystem misconfigured verifier\n");
return -EINVAL;
@@ -5586,6 +5589,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
struct bpf_insn *insn = prog->insnsi;
const struct bpf_func_proto *fn;
const int insn_cnt = prog->len;
+ const struct bpf_map_ops *ops;
struct bpf_insn_aux_data *aux;
struct bpf_insn insn_buf[16];
struct bpf_prog *new_prog;
@@ -5715,10 +5719,13 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
}
/* BPF_EMIT_CALL() assumptions in some of the map_gen_lookup
- * handlers are currently limited to 64 bit only.
+ * and other inlining handlers are currently limited to 64 bit
+ * only.
*/
if (prog->jit_requested && BITS_PER_LONG == 64 &&
- insn->imm == BPF_FUNC_map_lookup_elem) {
+ (insn->imm == BPF_FUNC_map_lookup_elem ||
+ insn->imm == BPF_FUNC_map_update_elem ||
+ insn->imm == BPF_FUNC_map_delete_elem)) {
aux = &env->insn_aux_data[i + delta];
if (bpf_map_ptr_poisoned(aux))
goto patch_call_imm;
@@ -5727,23 +5734,49 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
if (!map_ptr->ops->map_gen_lookup)
goto patch_call_imm;
- cnt = map_ptr->ops->map_gen_lookup(map_ptr, insn_buf);
- if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
- verbose(env, "bpf verifier is misconfigured\n");
- return -EINVAL;
- }
+ ops = map_ptr->ops;
+ if (insn->imm == BPF_FUNC_map_lookup_elem &&
+ ops->map_gen_lookup) {
+ cnt = ops->map_gen_lookup(map_ptr, insn_buf);
+ if (cnt == 0 || cnt >= ARRAY_SIZE(insn_buf)) {
+ verbose(env, "bpf verifier is misconfigured\n");
+ return -EINVAL;
+ }
- new_prog = bpf_patch_insn_data(env, i + delta, insn_buf,
- cnt);
- if (!new_prog)
- return -ENOMEM;
+ new_prog = bpf_patch_insn_data(env, i + delta,
+ insn_buf, cnt);
+ if (!new_prog)
+ return -ENOMEM;
- delta += cnt - 1;
+ delta += cnt - 1;
+ env->prog = prog = new_prog;
+ insn = new_prog->insnsi + i + delta;
+ continue;
+ }
- /* keep walking new program and skip insns we just inserted */
- env->prog = prog = new_prog;
- insn = new_prog->insnsi + i + delta;
- continue;
+ BUILD_BUG_ON(!__same_type(ops->map_lookup_elem,
+ (void *(*)(struct bpf_map *map, void *key))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_delete_elem,
+ (int (*)(struct bpf_map *map, void *key))NULL));
+ BUILD_BUG_ON(!__same_type(ops->map_update_elem,
+ (int (*)(struct bpf_map *map, void *key, void *value,
+ u64 flags))NULL));
+ switch (insn->imm) {
+ case BPF_FUNC_map_lookup_elem:
+ insn->imm = BPF_CAST_CALL(ops->map_lookup_elem) -
+ __bpf_call_base;
+ continue;
+ case BPF_FUNC_map_update_elem:
+ insn->imm = BPF_CAST_CALL(ops->map_update_elem) -
+ __bpf_call_base;
+ continue;
+ case BPF_FUNC_map_delete_elem:
+ insn->imm = BPF_CAST_CALL(ops->map_delete_elem) -
+ __bpf_call_base;
+ continue;
+ }
+
+ goto patch_call_imm;
}
if (insn->imm == BPF_FUNC_redirect_map) {
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 06/11] bpf: add bpf_skb_cgroup_id helper
From: Daniel Borkmann @ 2018-05-28 0:43 UTC (permalink / raw)
To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>
Add a new bpf_skb_cgroup_id() helper that allows to retrieve the
cgroup id from the skb's socket. This is useful in particular to
enable bpf_get_cgroup_classid()-like behavior for cgroup v1 in
cgroup v2 by allowing ID based matching on egress. This can in
particular be used in combination with applying policy e.g. from
map lookups, and also complements the older bpf_skb_under_cgroup()
interface. In user space the cgroup id for a given path can be
retrieved through the f_handle as demonstrated in [0] recently.
[0] https://lkml.org/lkml/2018/5/22/1190
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
include/uapi/linux/bpf.h | 17 ++++++++++++++++-
net/core/filter.c | 29 +++++++++++++++++++++++++++--
2 files changed, 43 insertions(+), 3 deletions(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9b8c6e3..e2853aa 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2004,6 +2004,20 @@ union bpf_attr {
* direct packet access.
* Return
* 0 on success, or a negative error in case of failure.
+ *
+ * uint64_t bpf_skb_cgroup_id(struct sk_buff *skb)
+ * Description
+ * Return the cgroup v2 id of the socket associated with the *skb*.
+ * This is roughly similar to the **bpf_get_cgroup_classid**\ ()
+ * helper for cgroup v1 by providing a tag resp. identifier that
+ * can be matched on or used for map lookups e.g. to implement
+ * policy. The cgroup v2 id of a given path in the hierarchy is
+ * exposed in user space through the f_handle API in order to get
+ * to the same 64-bit id.
+ *
+ * This helper can be used on TC egress path, but not on ingress.
+ * Return
+ * The id is returned or 0 in case the id could not be retrieved.
*/
#define __BPF_FUNC_MAPPER(FN) \
FN(unspec), \
@@ -2082,7 +2096,8 @@ union bpf_attr {
FN(lwt_push_encap), \
FN(lwt_seg6_store_bytes), \
FN(lwt_seg6_adjust_srh), \
- FN(lwt_seg6_action),
+ FN(lwt_seg6_action), \
+ FN(skb_cgroup_id),
/* integer value in 'imm' field of BPF_CALL instruction selects which helper
* function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index acf1f4f..717c740 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3661,6 +3661,27 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
.arg3_type = ARG_ANYTHING,
};
+#ifdef CONFIG_SOCK_CGROUP_DATA
+BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
+{
+ struct sock *sk = skb_to_full_sk(skb);
+ struct cgroup *cgrp;
+
+ if (!sk || !sk_fullsock(sk))
+ return 0;
+
+ cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+ return cgrp->kn->id.id;
+}
+
+static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
+ .func = bpf_skb_cgroup_id,
+ .gpl_only = false,
+ .ret_type = RET_INTEGER,
+ .arg1_type = ARG_PTR_TO_CTX,
+};
+#endif
+
static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
unsigned long off, unsigned long len)
{
@@ -4741,12 +4762,16 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
return &bpf_get_socket_cookie_proto;
case BPF_FUNC_get_socket_uid:
return &bpf_get_socket_uid_proto;
+ case BPF_FUNC_fib_lookup:
+ return &bpf_skb_fib_lookup_proto;
#ifdef CONFIG_XFRM
case BPF_FUNC_skb_get_xfrm_state:
return &bpf_skb_get_xfrm_state_proto;
#endif
- case BPF_FUNC_fib_lookup:
- return &bpf_skb_fib_lookup_proto;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+ case BPF_FUNC_skb_cgroup_id:
+ return &bpf_skb_cgroup_id_proto;
+#endif
default:
return bpf_base_func_proto(func_id);
}
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 07/11] bpf: make sure to clear unused fields in tunnel/xfrm state fetch
From: Daniel Borkmann @ 2018-05-28 0:43 UTC (permalink / raw)
To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>
Since the remaining bits are not filled in struct bpf_tunnel_key
resp. struct bpf_xfrm_state and originate from uninitialized stack
space, we should make sure to clear them before handing control
back to the program.
Also add a padding element to struct bpf_xfrm_state for future use
similar as we have in struct bpf_tunnel_key and clear it as well.
struct bpf_xfrm_state {
__u32 reqid; /* 0 4 */
__u32 spi; /* 4 4 */
__u16 family; /* 8 2 */
/* XXX 2 bytes hole, try to pack */
union {
__u32 remote_ipv4; /* 4 */
__u32 remote_ipv6[4]; /* 16 */
}; /* 12 16 */
/* size: 28, cachelines: 1, members: 4 */
/* sum members: 26, holes: 1, sum holes: 2 */
/* last cacheline: 28 bytes */
};
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
include/uapi/linux/bpf.h | 3 ++-
net/core/filter.c | 6 ++++++
2 files changed, 8 insertions(+), 1 deletion(-)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index e2853aa..7108711 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2214,7 +2214,7 @@ struct bpf_tunnel_key {
};
__u8 tunnel_tos;
__u8 tunnel_ttl;
- __u16 tunnel_ext;
+ __u16 tunnel_ext; /* Padding, future use. */
__u32 tunnel_label;
};
@@ -2225,6 +2225,7 @@ struct bpf_xfrm_state {
__u32 reqid;
__u32 spi; /* Stored in network byte order */
__u16 family;
+ __u16 ext; /* Padding, future use. */
union {
__u32 remote_ipv4; /* Stored in network byte order */
__u32 remote_ipv6[4]; /* Stored in network byte order */
diff --git a/net/core/filter.c b/net/core/filter.c
index 717c740..5ceb5e6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3445,6 +3445,7 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
to->tunnel_id = be64_to_cpu(info->key.tun_id);
to->tunnel_tos = info->key.tos;
to->tunnel_ttl = info->key.ttl;
+ to->tunnel_ext = 0;
if (flags & BPF_F_TUNINFO_IPV6) {
memcpy(to->remote_ipv6, &info->key.u.ipv6.src,
@@ -3452,6 +3453,8 @@ BPF_CALL_4(bpf_skb_get_tunnel_key, struct sk_buff *, skb, struct bpf_tunnel_key
to->tunnel_label = be32_to_cpu(info->key.label);
} else {
to->remote_ipv4 = be32_to_cpu(info->key.u.ipv4.src);
+ memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
+ to->tunnel_label = 0;
}
if (unlikely(size != sizeof(struct bpf_tunnel_key)))
@@ -4047,11 +4050,14 @@ BPF_CALL_5(bpf_skb_get_xfrm_state, struct sk_buff *, skb, u32, index,
to->reqid = x->props.reqid;
to->spi = x->id.spi;
to->family = x->props.family;
+ to->ext = 0;
+
if (to->family == AF_INET6) {
memcpy(to->remote_ipv6, x->props.saddr.a6,
sizeof(to->remote_ipv6));
} else {
to->remote_ipv4 = x->props.saddr.a4;
+ memset(&to->remote_ipv6[1], 0, sizeof(__u32) * 3);
}
return 0;
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 04/11] bpf: show prog and map id in fdinfo
From: Daniel Borkmann @ 2018-05-28 0:43 UTC (permalink / raw)
To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>
Its trivial and straight forward to expose it for scripts that can
then use it along with bpftool in order to inspect an individual
application's used maps and progs. Right now we dump some basic
information in the fdinfo file but with the help of the map/prog
id full introspection becomes possible now.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
kernel/bpf/syscall.c | 12 ++++++++----
1 file changed, 8 insertions(+), 4 deletions(-)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 388d4fe..79341e8 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -326,13 +326,15 @@ static void bpf_map_show_fdinfo(struct seq_file *m, struct file *filp)
"value_size:\t%u\n"
"max_entries:\t%u\n"
"map_flags:\t%#x\n"
- "memlock:\t%llu\n",
+ "memlock:\t%llu\n"
+ "map_id:\t%u\n",
map->map_type,
map->key_size,
map->value_size,
map->max_entries,
map->map_flags,
- map->pages * 1ULL << PAGE_SHIFT);
+ map->pages * 1ULL << PAGE_SHIFT,
+ map->id);
if (owner_prog_type) {
seq_printf(m, "owner_prog_type:\t%u\n",
@@ -1069,11 +1071,13 @@ static void bpf_prog_show_fdinfo(struct seq_file *m, struct file *filp)
"prog_type:\t%u\n"
"prog_jited:\t%u\n"
"prog_tag:\t%s\n"
- "memlock:\t%llu\n",
+ "memlock:\t%llu\n"
+ "prog_id:\t%u\n",
prog->type,
prog->jited,
prog_tag,
- prog->pages * 1ULL << PAGE_SHIFT);
+ prog->pages * 1ULL << PAGE_SHIFT,
+ prog->aux->id);
}
#endif
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 03/11] bpf: fixup error message from gpl helpers on license mismatch
From: Daniel Borkmann @ 2018-05-28 0:43 UTC (permalink / raw)
To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>
Stating 'proprietary program' in the error is just silly since it
can also be a different open source license than that which is just
not compatible.
Reference: https://twitter.com/majek04/status/998531268039102465
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
kernel/bpf/verifier.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 1fd9667b..4f4786e 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -2462,7 +2462,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
/* eBPF programs must be GPL compatible to use GPL-ed functions */
if (!env->prog->gpl_compatible && fn->gpl_only) {
- verbose(env, "cannot call GPL only function from proprietary program\n");
+ verbose(env, "cannot call GPL-restricted function from non-GPL compatible program\n");
return -EINVAL;
}
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 02/11] bpf: add also cbpf long jump test cases with heavy expansion
From: Daniel Borkmann @ 2018-05-28 0:43 UTC (permalink / raw)
To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>
We have one triggering on eBPF but lets also add a cBPF example to
make sure we keep tracking them. Also add anther cBPF test running
max number of MSH ops.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
lib/test_bpf.c | 63 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 63 insertions(+)
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 317f231..60aedc8 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -356,6 +356,52 @@ static int bpf_fill_maxinsns11(struct bpf_test *self)
return __bpf_fill_ja(self, BPF_MAXINSNS, 68);
}
+static int bpf_fill_maxinsns12(struct bpf_test *self)
+{
+ unsigned int len = BPF_MAXINSNS;
+ struct sock_filter *insn;
+ int i = 0;
+
+ insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+ if (!insn)
+ return -ENOMEM;
+
+ insn[0] = __BPF_JUMP(BPF_JMP | BPF_JA, len - 2, 0, 0);
+
+ for (i = 1; i < len - 1; i++)
+ insn[i] = __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0);
+
+ insn[len - 1] = __BPF_STMT(BPF_RET | BPF_K, 0xabababab);
+
+ self->u.ptr.insns = insn;
+ self->u.ptr.len = len;
+
+ return 0;
+}
+
+static int bpf_fill_maxinsns13(struct bpf_test *self)
+{
+ unsigned int len = BPF_MAXINSNS;
+ struct sock_filter *insn;
+ int i = 0;
+
+ insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL);
+ if (!insn)
+ return -ENOMEM;
+
+ for (i = 0; i < len - 3; i++)
+ insn[i] = __BPF_STMT(BPF_LDX | BPF_B | BPF_MSH, 0);
+
+ insn[len - 3] = __BPF_STMT(BPF_LD | BPF_IMM, 0xabababab);
+ insn[len - 2] = __BPF_STMT(BPF_ALU | BPF_XOR | BPF_X, 0);
+ insn[len - 1] = __BPF_STMT(BPF_RET | BPF_A, 0);
+
+ self->u.ptr.insns = insn;
+ self->u.ptr.len = len;
+
+ return 0;
+}
+
static int bpf_fill_ja(struct bpf_test *self)
{
/* Hits exactly 11 passes on x86_64 JIT. */
@@ -5290,6 +5336,23 @@ static struct bpf_test tests[] = {
.expected_errcode = -ENOTSUPP,
},
{
+ "BPF_MAXINSNS: jump over MSH",
+ { },
+ CLASSIC | FLAG_EXPECTED_FAIL,
+ { 0xfa, 0xfb, 0xfc, 0xfd, },
+ { { 4, 0xabababab } },
+ .fill_helper = bpf_fill_maxinsns12,
+ .expected_errcode = -EINVAL,
+ },
+ {
+ "BPF_MAXINSNS: exec all MSH",
+ { },
+ CLASSIC,
+ { 0xfa, 0xfb, 0xfc, 0xfd, },
+ { { 4, 0xababab83 } },
+ .fill_helper = bpf_fill_maxinsns13,
+ },
+ {
"BPF_MAXINSNS: ld_abs+get_processor_id",
{ },
CLASSIC,
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 01/11] bpf: test case for map pointer poison with calls/branches
From: Daniel Borkmann @ 2018-05-28 0:43 UTC (permalink / raw)
To: ast; +Cc: netdev, Daniel Borkmann
In-Reply-To: <20180528004344.3606-1-daniel@iogearbox.net>
Add several test cases where the same or different map pointers
originate from different paths in the program and execute a map
lookup or tail call at a common location.
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
include/linux/filter.h | 10 ++
tools/include/linux/filter.h | 10 ++
tools/testing/selftests/bpf/test_verifier.c | 185 ++++++++++++++++++++++++----
3 files changed, 178 insertions(+), 27 deletions(-)
diff --git a/include/linux/filter.h b/include/linux/filter.h
index d358d18..b443f70 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -289,6 +289,16 @@ struct xdp_buff;
.off = OFF, \
.imm = 0 })
+/* Relative call */
+
+#define BPF_CALL_REL(TGT) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = BPF_PSEUDO_CALL, \
+ .off = 0, \
+ .imm = TGT })
+
/* Function call */
#define BPF_EMIT_CALL(FUNC) \
diff --git a/tools/include/linux/filter.h b/tools/include/linux/filter.h
index c5e512d..af55acf 100644
--- a/tools/include/linux/filter.h
+++ b/tools/include/linux/filter.h
@@ -263,6 +263,16 @@
#define BPF_LD_MAP_FD(DST, MAP_FD) \
BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+/* Relative call */
+
+#define BPF_CALL_REL(TGT) \
+ ((struct bpf_insn) { \
+ .code = BPF_JMP | BPF_CALL, \
+ .dst_reg = 0, \
+ .src_reg = BPF_PSEUDO_CALL, \
+ .off = 0, \
+ .imm = TGT })
+
/* Program exit */
#define BPF_EXIT_INSN() \
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 4b4f015..7cb1d74 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
#define MAX_INSNS BPF_MAXINSNS
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 4
+#define MAX_NR_MAPS 7
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
@@ -66,7 +66,9 @@ struct bpf_test {
int fixup_map1[MAX_FIXUPS];
int fixup_map2[MAX_FIXUPS];
int fixup_map3[MAX_FIXUPS];
- int fixup_prog[MAX_FIXUPS];
+ int fixup_map4[MAX_FIXUPS];
+ int fixup_prog1[MAX_FIXUPS];
+ int fixup_prog2[MAX_FIXUPS];
int fixup_map_in_map[MAX_FIXUPS];
const char *errstr;
const char *errstr_unpriv;
@@ -2769,7 +2771,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_prog = { 1 },
+ .fixup_prog1 = { 1 },
.errstr_unpriv = "R3 leaks addr into helper",
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -2856,7 +2858,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .fixup_prog = { 1 },
+ .fixup_prog1 = { 1 },
.result = ACCEPT,
.retval = 42,
},
@@ -2870,7 +2872,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .fixup_prog = { 1 },
+ .fixup_prog1 = { 1 },
.result = ACCEPT,
.retval = 41,
},
@@ -2884,7 +2886,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .fixup_prog = { 1 },
+ .fixup_prog1 = { 1 },
.result = ACCEPT,
.retval = 1,
},
@@ -2898,7 +2900,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .fixup_prog = { 1 },
+ .fixup_prog1 = { 1 },
.result = ACCEPT,
.retval = 2,
},
@@ -2912,7 +2914,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .fixup_prog = { 1 },
+ .fixup_prog1 = { 1 },
.result = ACCEPT,
.retval = 2,
},
@@ -2926,7 +2928,7 @@ static struct bpf_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .fixup_prog = { 2 },
+ .fixup_prog1 = { 2 },
.result = ACCEPT,
.retval = 42,
},
@@ -11682,6 +11684,112 @@ static struct bpf_test tests[] = {
.prog_type = BPF_PROG_TYPE_XDP,
},
{
+ "calls: two calls returning different map pointers for lookup (hash, array)",
+ .insns = {
+ /* main prog */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_CALL_REL(11),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_CALL_REL(12),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* subprog 1 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* subprog 2 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map2 = { 13 },
+ .fixup_map4 = { 16 },
+ .result = ACCEPT,
+ .retval = 1,
+ },
+ {
+ "calls: two calls returning different map pointers for lookup (hash, map in map)",
+ .insns = {
+ /* main prog */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
+ BPF_CALL_REL(11),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_CALL_REL(12),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0,
+ offsetof(struct test_val, foo)),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* subprog 1 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* subprog 2 */
+ BPF_LD_MAP_FD(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_in_map = { 16 },
+ .fixup_map4 = { 13 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'map_ptr'",
+ },
+ {
+ "cond: two branches returning different map pointers for lookup (tail, tail)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog1 = { 5 },
+ .fixup_prog2 = { 2 },
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "tail_call abusing map_ptr",
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
+ "cond: two branches returning same map pointers for lookup (tail, tail)",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
+ offsetof(struct __sk_buff, mark)),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 2),
+ BPF_LD_MAP_FD(BPF_REG_2, 0),
+ BPF_MOV64_IMM(BPF_REG_3, 7),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
+ BPF_FUNC_tail_call),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_prog2 = { 2, 5 },
+ .result_unpriv = ACCEPT,
+ .result = ACCEPT,
+ .retval = 42,
+ },
+ {
"search pruning: all branches should be verified (nop operation)",
.insns = {
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
@@ -12162,12 +12270,13 @@ static int probe_filter_length(const struct bpf_insn *fp)
return len + 1;
}
-static int create_map(uint32_t size_value, uint32_t max_elem)
+static int create_map(uint32_t type, uint32_t size_key,
+ uint32_t size_value, uint32_t max_elem)
{
int fd;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
- size_value, max_elem, BPF_F_NO_PREALLOC);
+ fd = bpf_create_map(type, size_key, size_value, max_elem,
+ type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0);
if (fd < 0)
printf("Failed to create hash map '%s'!\n", strerror(errno));
@@ -12200,13 +12309,13 @@ static int create_prog_dummy2(int mfd, int idx)
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
}
-static int create_prog_array(void)
+static int create_prog_array(uint32_t max_elem, int p1key)
{
- int p1key = 0, p2key = 1;
+ int p2key = 1;
int mfd, p1fd, p2fd;
mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
- sizeof(int), 4, 0);
+ sizeof(int), max_elem, 0);
if (mfd < 0) {
printf("Failed to create prog array '%s'!\n", strerror(errno));
return -1;
@@ -12261,7 +12370,9 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
int *fixup_map1 = test->fixup_map1;
int *fixup_map2 = test->fixup_map2;
int *fixup_map3 = test->fixup_map3;
- int *fixup_prog = test->fixup_prog;
+ int *fixup_map4 = test->fixup_map4;
+ int *fixup_prog1 = test->fixup_prog1;
+ int *fixup_prog2 = test->fixup_prog2;
int *fixup_map_in_map = test->fixup_map_in_map;
if (test->fill_helper)
@@ -12272,7 +12383,8 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
* that really matters is value size in this case.
*/
if (*fixup_map1) {
- map_fds[0] = create_map(sizeof(long long), 1);
+ map_fds[0] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(long long), 1);
do {
prog[*fixup_map1].imm = map_fds[0];
fixup_map1++;
@@ -12280,7 +12392,8 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
}
if (*fixup_map2) {
- map_fds[1] = create_map(sizeof(struct test_val), 1);
+ map_fds[1] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(struct test_val), 1);
do {
prog[*fixup_map2].imm = map_fds[1];
fixup_map2++;
@@ -12288,25 +12401,43 @@ static void do_test_fixup(struct bpf_test *test, struct bpf_insn *prog,
}
if (*fixup_map3) {
- map_fds[1] = create_map(sizeof(struct other_val), 1);
+ map_fds[2] = create_map(BPF_MAP_TYPE_HASH, sizeof(long long),
+ sizeof(struct other_val), 1);
do {
- prog[*fixup_map3].imm = map_fds[1];
+ prog[*fixup_map3].imm = map_fds[2];
fixup_map3++;
} while (*fixup_map3);
}
- if (*fixup_prog) {
- map_fds[2] = create_prog_array();
+ if (*fixup_map4) {
+ map_fds[3] = create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
+ sizeof(struct test_val), 1);
+ do {
+ prog[*fixup_map4].imm = map_fds[3];
+ fixup_map4++;
+ } while (*fixup_map4);
+ }
+
+ if (*fixup_prog1) {
+ map_fds[4] = create_prog_array(4, 0);
+ do {
+ prog[*fixup_prog1].imm = map_fds[4];
+ fixup_prog1++;
+ } while (*fixup_prog1);
+ }
+
+ if (*fixup_prog2) {
+ map_fds[5] = create_prog_array(8, 7);
do {
- prog[*fixup_prog].imm = map_fds[2];
- fixup_prog++;
- } while (*fixup_prog);
+ prog[*fixup_prog2].imm = map_fds[5];
+ fixup_prog2++;
+ } while (*fixup_prog2);
}
if (*fixup_map_in_map) {
- map_fds[3] = create_map_in_map();
+ map_fds[6] = create_map_in_map();
do {
- prog[*fixup_map_in_map].imm = map_fds[3];
+ prog[*fixup_map_in_map].imm = map_fds[6];
fixup_map_in_map++;
} while (*fixup_map_in_map);
}
--
2.9.5
^ permalink raw reply related
* [PATCH bpf-next 00/11] Misc BPF improvements
From: Daniel Borkmann @ 2018-05-28 0:43 UTC (permalink / raw)
To: ast; +Cc: netdev, Daniel Borkmann
This set adds various patches I still had in my queue, first two
are test cases to provide coverage for the recent two fixes that
went to bpf tree, then a small improvement on the error message
for gpl helpers. Next, we expose prog and map id into fdinfo in
order to allow for inspection of these objections currently used
in applications. Patch after that removes a retpoline call for
map lookup/update/delete helpers. A new helper is added in the
subsequent patch to lookup the skb's socket's cgroup v2 id which
can be used in an efficient way for e.g. lookups on egress side.
Next one is a fix to fully clear state info in tunnel/xfrm helpers.
Given this is full cap_sys_admin from init ns and has same priv
requirements like tracing, bpf-next should be okay. A small bug
fix for bpf_asm follows, and next a fix for context access in
tracing which was recently reported. Lastly, a small update in
the maintainer's file to add patchwork url and missing files.
Daniel Borkmann (11):
bpf: test case for map pointer poison with calls/branches
bpf: add also cbpf long jump test cases with heavy expansion
bpf: fixup error message from gpl helpers on license mismatch
bpf: show prog and map id in fdinfo
bpf: avoid retpoline for lookup/update/delete calls on maps
bpf: add bpf_skb_cgroup_id helper
bpf: make sure to clear unused fields in tunnel/xfrm state fetch
bpf: fix cbpf parser bug for octal numbers
bpf: fix context access in tracing progs on 32 bit archs
bpf: sync bpf uapi header with tools
bpf, doc: add missing patchwork url and libbpf to maintainers
MAINTAINERS | 2 +
include/linux/filter.h | 43 ++++++-
include/uapi/linux/bpf.h | 20 ++-
kernel/bpf/hashtab.c | 12 +-
kernel/bpf/syscall.c | 12 +-
kernel/bpf/verifier.c | 69 ++++++++---
kernel/trace/bpf_trace.c | 10 +-
lib/test_bpf.c | 63 ++++++++++
net/core/filter.c | 35 +++++-
tools/bpf/bpf_exp.l | 2 +-
tools/include/linux/filter.h | 10 ++
tools/include/uapi/linux/bpf.h | 20 ++-
tools/testing/selftests/bpf/test_verifier.c | 185 ++++++++++++++++++++++++----
13 files changed, 416 insertions(+), 67 deletions(-)
--
2.9.5
^ permalink raw reply
* [PATCH] net: qmi_wwan: Add Netgear Aircard 779S
From: Josh Hill @ 2018-05-28 0:10 UTC (permalink / raw)
Cc: joshuajhill, josh, Bjørn Mork, David S. Miller, netdev,
linux-usb, linux-kernel
Add support for Netgear Aircard 779S
Signed-off-by: Josh Hill <josh@joshuajhill.com>
---
drivers/net/usb/qmi_wwan.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 42565dd..0946808 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -1103,6 +1103,7 @@ static const struct usb_device_id products[] = {
{QMI_FIXED_INTF(0x05c6, 0x920d, 5)},
{QMI_QUIRK_SET_DTR(0x05c6, 0x9625, 4)}, /* YUGA CLM920-NC5 */
{QMI_FIXED_INTF(0x0846, 0x68a2, 8)},
+ {QMI_FIXED_INTF(0x0846, 0x68d3, 8)}, /* Netgear Aircard 779S */
{QMI_FIXED_INTF(0x12d1, 0x140c, 1)}, /* Huawei E173 */
{QMI_FIXED_INTF(0x12d1, 0x14ac, 1)}, /* Huawei E1820 */
{QMI_FIXED_INTF(0x1435, 0xd181, 3)}, /* Wistron NeWeb D18Q1 */
--
2.7.4
^ permalink raw reply related
* Re: WARNING in bpf_int_jit_compile
From: Daniel Borkmann @ 2018-05-27 23:17 UTC (permalink / raw)
To: syzbot, ast, davem, hpa, kuznet, linux-kernel, mingo, netdev,
syzkaller-bugs, tglx, x86, yoshfuji
In-Reply-To: <000000000000d29a1c056d188355@google.com>
On 05/26/2018 11:29 AM, syzbot wrote:
> syzbot has found a reproducer for the following crash on:
>
> HEAD commit: 62d18ecfa641 Merge tag 'arm64-fixes' of git://git.kernel.o..
> git tree: upstream
> console output: https://syzkaller.appspot.com/x/log.txt?x=14c6bf57800000
> kernel config: https://syzkaller.appspot.com/x/.config?x=982e2df1b9e60b02
> dashboard link: https://syzkaller.appspot.com/bug?extid=9e762b52dd17e616a7a5
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
> syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=130e42b7800000
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+9e762b52dd17e616a7a5@syzkaller.appspotmail.com
>
> RAX: ffffffffffffffda RBX: 0000000002542914 RCX: 0000000000455a09
> RDX: 0000000000000048 RSI: 0000000020000240 RDI: 0000000000000005
> RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003
> R13: 0000000000000046 R14: 00000000006f4730 R15: 0000000000000023
> WARNING: CPU: 0 PID: 4752 at include/linux/filter.h:667 bpf_jit_binary_lock_ro include/linux/filter.h:667 [inline]
> WARNING: CPU: 0 PID: 4752 at include/linux/filter.h:667 bpf_int_jit_compile+0xbf7/0xef7 arch/x86/net/bpf_jit_comp.c:1271
> Kernel panic - not syncing: panic_on_warn set ...
>
> CPU: 0 PID: 4752 Comm: syz-executor0 Not tainted 4.17.0-rc6+ #67
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
> Call Trace:
> __dump_stack lib/dump_stack.c:77 [inline]
> dump_stack+0x1b9/0x294 lib/dump_stack.c:113
> panic+0x22f/0x4de kernel/panic.c:184
> __warn.cold.8+0x163/0x1b3 kernel/panic.c:536
> report_bug+0x252/0x2d0 lib/bug.c:186
> fixup_bug arch/x86/kernel/traps.c:178 [inline]
> do_error_trap+0x1de/0x490 arch/x86/kernel/traps.c:296
> do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315
> invalid_op+0x14/0x20 arch/x86/entry/entry_64.S:992
> RIP: 0010:bpf_jit_binary_lock_ro include/linux/filter.h:667 [inline]
Been looking into this last Friday already. What seems to happen here is that
there's fault injection from inside set_memory_ro(), meaning it will eventually
return an error there, and we throw a WARN_ON_ONCE() to bark that making the
memory read-only didn't work out. I'd be in preference to notify the user on
such issue rather than keeping completely silent about it so that there's
awareness that read-only protections are not in place / guaranteed.
> RIP: 0010:bpf_int_jit_compile+0xbf7/0xef7 arch/x86/net/bpf_jit_comp.c:1271
> RSP: 0018:ffff8801d85ff920 EFLAGS: 00010293
> RAX: ffff8801d78c40c0 RBX: 0000000000000046 RCX: ffffffff81445d89
> RDX: 0000000000000000 RSI: ffffffff81445d97 RDI: 0000000000000005
> RBP: ffff8801d85ffa40 R08: ffff8801d78c40c0 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000000 R12: ffffc9000194e002
> R13: ffff8801d85ffa18 R14: 00000000fffffff4 R15: 0000000000000003
> bpf_prog_select_runtime+0x131/0x640 kernel/bpf/core.c:1541
> bpf_prog_load+0x16c2/0x2070 kernel/bpf/syscall.c:1333
> __do_sys_bpf kernel/bpf/syscall.c:2073 [inline]
> __se_sys_bpf kernel/bpf/syscall.c:2035 [inline]
> __x64_sys_bpf+0x389/0x4c0 kernel/bpf/syscall.c:2035
> do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x455a09
> RSP: 002b:00007ffec3da2868 EFLAGS: 00000246 ORIG_RAX: 0000000000000141
> RAX: ffffffffffffffda RBX: 0000000002542914 RCX: 0000000000455a09
> RDX: 0000000000000048 RSI: 0000000020000240 RDI: 0000000000000005
> RBP: 000000000072bea0 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000003
> R13: 0000000000000046 R14: 00000000006f4730 R15: 0000000000000023
> Dumping ftrace buffer:
> (ftrace buffer empty)
> Kernel Offset: disabled
> Rebooting in 86400 seconds..
>
^ permalink raw reply
* Re: [PATCH v3 bpf-next 0/5] bpf: Hooks for sys_sendmsg
From: Daniel Borkmann @ 2018-05-27 22:59 UTC (permalink / raw)
To: Andrey Ignatov, netdev; +Cc: davem, kafai, ast, kernel-team
In-Reply-To: <e7adf3d3-2d1f-a348-7e4f-3ee3269dfe1c@iogearbox.net>
On 05/28/2018 12:56 AM, Daniel Borkmann wrote:
> On 05/25/2018 07:09 AM, Andrey Ignatov wrote:
>> v2 -> v3:
>> * place BPF logic under static key in udp_sendmsg, udpv6_sendmsg;
>> * rebase.
[...]
>
> Applied to bpf-next, thanks Andrey!
Woops, and I meant of course v4 [0] of the series not v3. ;-)
[0] https://patchwork.ozlabs.org/project/netdev/list/?series=46691&state=*
^ permalink raw reply
* Re: [PATCH v3 bpf-next 0/5] bpf: Hooks for sys_sendmsg
From: Daniel Borkmann @ 2018-05-27 22:56 UTC (permalink / raw)
To: Andrey Ignatov, netdev; +Cc: davem, kafai, ast, kernel-team
In-Reply-To: <cover.1527224903.git.rdna@fb.com>
On 05/25/2018 07:09 AM, Andrey Ignatov wrote:
> v2 -> v3:
> * place BPF logic under static key in udp_sendmsg, udpv6_sendmsg;
> * rebase.
>
> v1 -> v2:
> * return ENOTSUPP if bpf_prog rewrote IPv6-only with IPv4-mapped IPv6;
> * add test for IPv4-mapped IPv6 use-case;
> * fix build for CONFIG_CGROUP_BPF=n;
> * rebase.
>
> This path set adds BPF hooks for sys_sendmsg similar to existing hooks for
> sys_bind and sys_connect.
>
> Hooks allow to override source IP (including the case when it's set via
> cmsg(3)) and destination IP:port for unconnected UDP (slow path). TCP and
> connected UDP (fast path) are not affected. This makes UDP support
> complete: connected UDP is handled by sys_connect hooks, unconnected by
> sys_sendmsg ones.
>
> Similar to sys_connect hooks, sys_sendmsg ones can be used to make system
> calls such as sendmsg(2) and sendto(2) return EPERM.
>
> Please see patch 0001 for more details.
>
>
> Andrey Ignatov (5):
> bpf: Hooks for sys_sendmsg
> bpf: Sync bpf.h to tools/
> libbpf: Support guessing sendmsg{4,6} progs
> selftests/bpf: Prepare test_sock_addr for extension
> selftests/bpf: Selftest for sys_sendmsg hooks
>
> include/linux/bpf-cgroup.h | 23 +-
> include/linux/filter.h | 1 +
> include/uapi/linux/bpf.h | 8 +
> kernel/bpf/cgroup.c | 11 +-
> kernel/bpf/syscall.c | 8 +
> net/core/filter.c | 39 +
> net/ipv4/udp.c | 20 +-
> net/ipv6/udp.c | 24 +
> tools/include/uapi/linux/bpf.h | 8 +
> tools/lib/bpf/libbpf.c | 2 +
> tools/testing/selftests/bpf/Makefile | 2 +-
> tools/testing/selftests/bpf/sendmsg4_prog.c | 49 ++
> tools/testing/selftests/bpf/sendmsg6_prog.c | 60 ++
> tools/testing/selftests/bpf/test_sock_addr.c | 1155 +++++++++++++++++++++-----
> 14 files changed, 1214 insertions(+), 196 deletions(-)
> create mode 100644 tools/testing/selftests/bpf/sendmsg4_prog.c
> create mode 100644 tools/testing/selftests/bpf/sendmsg6_prog.c
>
Applied to bpf-next, thanks Andrey!
^ permalink raw reply
* Re: [PATCH bpf-next v2] selftests/bpf: missing headers test_lwt_seg6local
From: Daniel Borkmann @ 2018-05-27 22:37 UTC (permalink / raw)
To: Mathieu Xhonneux, netdev; +Cc: alexei.starovoitov, ys114321
In-Reply-To: <20180526144408.2829-1-m.xhonneux@gmail.com>
On 05/26/2018 04:44 PM, Mathieu Xhonneux wrote:
> Previous patch "selftests/bpf: test for seg6local End.BPF action" lacks
> some UAPI headers in tools/.
>
> clang -I. -I./include/uapi -I../../../include/uapi -idirafter
> /usr/local/include -idirafter
> /data/users/yhs/work/llvm/build/install/lib/clang/7.0.0/include
> -idirafter /usr/include -Wno-compare-distinct-pointer-types \
> -O2 -target bpf -emit-llvm -c test_lwt_seg6local.c -o - | \
> llc -march=bpf -mcpu=generic -filetype=obj -o
> [...]/net-next/tools/testing/selftests/bpf/test_lwt_seg6local.o
> test_lwt_seg6local.c:4:10: fatal error: 'linux/seg6_local.h' file not found
> ^~~~~~~~~~~~~~~~~~~~
> 1 error generated.
> make: Leaving directory
> `/data/users/yhs/work/net-next/tools/testing/selftests/bpf'
>
> v2: moving the headers to tools/include/uapi/.
>
> Reported-by: Y Song <ys114321@gmail.com>
> Signed-off-by: Mathieu Xhonneux <m.xhonneux@gmail.com>
Applied to bpf-next, thanks Mathieu!
^ permalink raw reply
* Re: [PATCH, net-next 2/2] bpf: avoid -Wmaybe-uninitialized warning
From: Daniel Borkmann @ 2018-05-27 22:37 UTC (permalink / raw)
To: Arnd Bergmann, Alexei Starovoitov
Cc: Yonghong Song, David S. Miller, Song Liu, Martin KaFai Lau,
Chenbo Feng, Jakub Kicinski, netdev, linux-kernel
In-Reply-To: <20180525213331.2115471-2-arnd@arndb.de>
On 05/25/2018 11:33 PM, Arnd Bergmann wrote:
> The stack_map_get_build_id_offset() function is too long for gcc to track
> whether 'work' may or may not be initialized at the end of it, leading
> to a false-positive warning:
>
> kernel/bpf/stackmap.c: In function 'stack_map_get_build_id_offset':
> kernel/bpf/stackmap.c:334:13: error: 'work' may be used uninitialized in this function [-Werror=maybe-uninitialized]
>
> This removes the 'in_nmi_ctx' flag and uses the state of that variable
> itself to see if it got initialized.
>
> Fixes: bae77c5eb5b2 ("bpf: enable stackmap with build_id in nmi context")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Applied to bpf-next, thanks Arnd!
^ permalink raw reply
* Re: [PATCH, net-next 1/2] bpf: btf: avoid -Wreturn-type warning
From: Daniel Borkmann @ 2018-05-27 22:36 UTC (permalink / raw)
To: Arnd Bergmann, Alexei Starovoitov
Cc: Martin KaFai Lau, Song Liu, netdev, linux-kernel
In-Reply-To: <20180525213331.2115471-1-arnd@arndb.de>
On 05/25/2018 11:33 PM, Arnd Bergmann wrote:
> gcc warns about a noreturn function possibly returning in
> some configurations:
>
> kernel/bpf/btf.c: In function 'env_type_is_resolve_sink':
> kernel/bpf/btf.c:729:1: error: control reaches end of non-void function [-Werror=return-type]
>
> Using BUG() instead of BUG_ON() avoids that warning and otherwise
> does the exact same thing.
>
> Fixes: eb3f595dab40 ("bpf: btf: Validate type reference")
> Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Applied to bpf-next, thanks Arnd!
^ permalink raw reply
* Re: [bpf-next PATCH] bpf: sockhash fix race with bpf_tcp_close and map delete
From: Daniel Borkmann @ 2018-05-27 22:36 UTC (permalink / raw)
To: John Fastabend, ast; +Cc: netdev
In-Reply-To: <20180525173712.4004.70590.stgit@john-Precision-Tower-5810>
On 05/25/2018 07:37 PM, John Fastabend wrote:
> syzbot reported two related splats, a use after free and null
> pointer dereference, when a TCP socket is closed while the map is
> also being removed.
>
> The psock keeps a reference to all map slots that have a reference
> to the sock so that when the sock is closed we can clean up any
> outstanding sock{map|hash} entries. This avoids pinning a sock
> forever if the map owner fails to do proper cleanup. However, the
> result is we have two paths that can free an entry in the map. Even
> the comment in the sock{map|hash} tear down function, sock_hash_free()
> notes this:
>
> At this point no update, lookup or delete operations can happen.
> However, be aware we can still get a socket state event updates,
> and data ready callbacks that reference the psock from sk_user_data.
>
> Both removal paths omitted taking the hash bucket lock resulting
> in the case where we have two references that are in the process
> of being free'd.
>
> Reported-by: syzbot+a761b81c211794fa1072@syzkaller.appspotmail.com
> Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Applied to bpf-next, thanks John!
^ permalink raw reply
* Re: [PATCH bpf-next] libbpf: Install btf.h with libbpf
From: Daniel Borkmann @ 2018-05-27 22:35 UTC (permalink / raw)
To: Andrey Ignatov, netdev; +Cc: kafai, ast, kernel-team
In-Reply-To: <20180525172313.1043567-1-rdna@fb.com>
On 05/25/2018 07:23 PM, Andrey Ignatov wrote:
> install_headers target should contain all headers that are part of
> libbpf. Add missing btf.h
>
> Signed-off-by: Andrey Ignatov <rdna@fb.com>
Applied to bpf-next, thanks Andrey!
^ permalink raw reply
* Proposal
From: Miss Zeliha Omer Faruk @ 2018-05-27 22:29 UTC (permalink / raw)
--
Hello
I have been trying to contact you. Did you get my business proposal?
Best Regards,
Miss.Zeliha ömer faruk
Esentepe Mahallesi Büyükdere
Caddesi Kristal Kule Binasi
No:215
Sisli - Istanbul, Turke
^ permalink raw reply
* Re: aio poll and a new in-kernel poll API V13
From: Al Viro @ 2018-05-27 22:27 UTC (permalink / raw)
To: Christoph Hellwig
Cc: Avi Kivity, linux-aio, linux-fsdevel, netdev, linux-api,
linux-kernel
In-Reply-To: <20180526072306.GA17313@lst.de>
OK, it's in -next now; there are several cleanups I'd put
into vfs.git#work.aio:
aio: all callers of aio_{read,write,fsync,poll} treat 0 and -EIOCBQUEUED the same way
aio_read_events_ring(): make a bit more readable
aio: shift copyin of iocb into io_submit_one()
aio: fold do_io_submit() into callers
Those are *not* on -next yet and if anybody has objections against
any of those, please yell. Individual patches in followups...
--
To unsubscribe, send a message with 'unsubscribe linux-aio' in
the body to majordomo@kvack.org. For more info on Linux AIO,
see: http://www.kvack.org/aio/
Don't email: <a href=mailto:"aart@kvack.org">aart@kvack.org</a>
^ permalink raw reply
* Re: KASAN: use-after-free Read in bpf_tcp_close
From: Daniel Borkmann @ 2018-05-27 22:16 UTC (permalink / raw)
To: syzbot, ast, linux-kernel, netdev, syzkaller-bugs, john.fastabend
In-Reply-To: <000000000000ac9069056d1806c4@google.com>
[ +John ]
On 05/26/2018 10:54 AM, syzbot wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit: 3fb48d881dbe Merge branch 'bpf-fib-mtu-check'
> git tree: bpf-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=15fc1977800000
> kernel config: https://syzkaller.appspot.com/x/.config?x=b632d8e2c2ab2c1
> dashboard link: https://syzkaller.appspot.com/bug?extid=fce8f2462c403d02af98
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
> syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=1310c857800000
> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=17de7177800000
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+fce8f2462c403d02af98@syzkaller.appspotmail.com
Should be fixed by: https://patchwork.ozlabs.org/patch/920695/
> ==================================================================
> BUG: KASAN: use-after-free in hlist_del_rcu include/linux/rculist.h:427 [inline]
> BUG: KASAN: use-after-free in bpf_tcp_close+0xd7f/0xf80 kernel/bpf/sockmap.c:271
> Read of size 8 at addr ffff8801c884cf90 by task syz-executor330/11778
>
> CPU: 1 PID: 11778 Comm: syz-executor330 Not tainted 4.17.0-rc4+ #18
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
> Call Trace:
> __dump_stack lib/dump_stack.c:77 [inline]
> dump_stack+0x1b9/0x294 lib/dump_stack.c:113
> print_address_description+0x6c/0x20b mm/kasan/report.c:256
> kasan_report_error mm/kasan/report.c:354 [inline]
> kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
> __asan_report_load8_noabort+0x14/0x20 mm/kasan/report.c:433
> hlist_del_rcu include/linux/rculist.h:427 [inline]
> bpf_tcp_close+0xd7f/0xf80 kernel/bpf/sockmap.c:271
> inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
> inet6_release+0x50/0x70 net/ipv6/af_inet6.c:459
> sock_release+0x96/0x1b0 net/socket.c:594
> sock_close+0x16/0x20 net/socket.c:1149
> __fput+0x34d/0x890 fs/file_table.c:209
> ____fput+0x15/0x20 fs/file_table.c:243
> task_work_run+0x1e4/0x290 kernel/task_work.c:113
> exit_task_work include/linux/task_work.h:22 [inline]
> do_exit+0x1aee/0x2730 kernel/exit.c:865
> do_group_exit+0x16f/0x430 kernel/exit.c:968
> get_signal+0x886/0x1960 kernel/signal.c:2469
> do_signal+0x98/0x2040 arch/x86/kernel/signal.c:810
> exit_to_usermode_loop+0x28a/0x310 arch/x86/entry/common.c:162
> prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline]
> syscall_return_slowpath arch/x86/entry/common.c:265 [inline]
> do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x445ed9
> RSP: 002b:00007f0078c0adb8 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
> RAX: fffffffffffffe00 RBX: 00000000006dbc24 RCX: 0000000000445ed9
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000006dbc24
> RBP: 00000000006dbc20 R08: 0000000000000000 R09: 0000000000000000
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
> R13: 00007ffcd147dbef R14: 00007f0078c0b9c0 R15: 0000000000000007
>
> Allocated by task 11787:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:448
> set_track mm/kasan/kasan.c:460 [inline]
> kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
> __do_kmalloc_node mm/slab.c:3682 [inline]
> __kmalloc_node+0x47/0x70 mm/slab.c:3689
> kmalloc_node include/linux/slab.h:554 [inline]
> alloc_sock_hash_elem kernel/bpf/sockmap.c:2114 [inline]
> sock_hash_ctx_update_elem.isra.23+0xa57/0x1560 kernel/bpf/sockmap.c:2245
> sock_hash_update_elem+0x14f/0x2d0 kernel/bpf/sockmap.c:2303
> map_update_elem+0x5c4/0xc90 kernel/bpf/syscall.c:760
> __do_sys_bpf kernel/bpf/syscall.c:2134 [inline]
> __se_sys_bpf kernel/bpf/syscall.c:2105 [inline]
> __x64_sys_bpf+0x32a/0x4f0 kernel/bpf/syscall.c:2105
> do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> Freed by task 8998:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:448
> set_track mm/kasan/kasan.c:460 [inline]
> __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
> kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
> __cache_free mm/slab.c:3498 [inline]
> kfree+0xd9/0x260 mm/slab.c:3813
> sock_hash_free+0x24e/0x6e0 kernel/bpf/sockmap.c:2093
> bpf_map_free_deferred+0xba/0xf0 kernel/bpf/syscall.c:259
> process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
> worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
> kthread+0x345/0x410 kernel/kthread.c:238
> ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
>
> The buggy address belongs to the object at ffff8801c884cf80
> which belongs to the cache kmalloc-64 of size 64
> The buggy address is located 16 bytes inside of
> 64-byte region [ffff8801c884cf80, ffff8801c884cfc0)
> The buggy address belongs to the page:
> page:ffffea0007221300 count:1 mapcount:0 mapping:ffff8801c884c000 index:0x0
> flags: 0x2fffc0000000100(slab)
> raw: 02fffc0000000100 ffff8801c884c000 0000000000000000 0000000100000020
> raw: ffffea00072e08e0 ffffea0006e99660 ffff8801da800340 0000000000000000
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
> ffff8801c884ce80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
> ffff8801c884cf00: 00 00 00 00 00 fc fc fc fc fc fc fc fc fc fc fc
>> ffff8801c884cf80: fb fb fb fb fb fb fb fb fc fc fc fc fc fc fc fc
> ^
> ffff8801c884d000: fc fc fc fc fc fc fc fc fb fb fb fb fb fb fb fb
> ffff8801c884d080: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ==================================================================
>
>
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report. See:
> https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with syzbot.
> syzbot can test patches for this bug, for details see:
> https://goo.gl/tpsmEJ#testing-patches
^ permalink raw reply
* Re: general protection fault in bpf_tcp_close
From: Daniel Borkmann @ 2018-05-27 22:15 UTC (permalink / raw)
To: syzbot, ast, linux-kernel, netdev, syzkaller-bugs, john.fastabend
In-Reply-To: <00000000000098e65b056d184a11@google.com>
[ +John ]
On 05/26/2018 11:13 AM, syzbot wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit: fd0bfa8d6e04 Merge branch 'bpf-af-xdp-cleanups'
> git tree: bpf-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=11da9427800000
> kernel config: https://syzkaller.appspot.com/x/.config?x=b632d8e2c2ab2c1
> dashboard link: https://syzkaller.appspot.com/bug?extid=0ce137753c78f7b6acc1
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
>
> Unfortunately, I don't have any reproducer for this crash yet.
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+0ce137753c78f7b6acc1@syzkaller.appspotmail.com
Should be fixed by: https://patchwork.ozlabs.org/patch/920695/
> kasan: CONFIG_KASAN_INLINE enabled
> kasan: GPF could be caused by NULL-ptr deref or user memory access
> general protection fault: 0000 [#1] SMP KASAN
> Dumping ftrace buffer:
> (ftrace buffer empty)
> Modules linked in:
> CPU: 0 PID: 12139 Comm: syz-executor2 Not tainted 4.17.0-rc4+ #17
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
> RIP: 0010:__hlist_del include/linux/list.h:649 [inline]
> RIP: 0010:hlist_del_rcu include/linux/rculist.h:427 [inline]
> RIP: 0010:bpf_tcp_close+0x7d2/0xf80 kernel/bpf/sockmap.c:271
> RSP: 0018:ffff8801a8f8ef70 EFLAGS: 00010a02
> RAX: ffffed00351f1dfd RBX: dffffc0000000000 RCX: dead000000000200
> RDX: 0000000000000000 RSI: 1bd5a00000000040 RDI: ffff8801cb710910
> RBP: ffff8801a8f8f110 R08: ffffed003350ac9d R09: ffffed003350ac9c
> R10: ffffed003350ac9c R11: ffff88019a8564e3 R12: ffff8801cb710380
> R13: ffff8801b17ea6e0 R14: ffff8801cb710398 R15: ffff8801cb710900
> FS: 00007f9890c43700(0000) GS:ffff8801dae00000(0000) knlGS:0000000000000000
> CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> CR2: 00007fde1a668000 CR3: 000000019dca2000 CR4: 00000000001406f0
> DR0: 00000000200001c0 DR1: 00000000200001c0 DR2: 0000000000000000
> DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000600
> Call Trace:
> inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
> inet6_release+0x50/0x70 net/ipv6/af_inet6.c:459
> sock_release+0x96/0x1b0 net/socket.c:594
> sock_close+0x16/0x20 net/socket.c:1149
> __fput+0x34d/0x890 fs/file_table.c:209
> ____fput+0x15/0x20 fs/file_table.c:243
> task_work_run+0x1e4/0x290 kernel/task_work.c:113
> exit_task_work include/linux/task_work.h:22 [inline]
> do_exit+0x1aee/0x2730 kernel/exit.c:865
> do_group_exit+0x16f/0x430 kernel/exit.c:968
> get_signal+0x886/0x1960 kernel/signal.c:2469
> do_signal+0x98/0x2040 arch/x86/kernel/signal.c:810
> exit_to_usermode_loop+0x28a/0x310 arch/x86/entry/common.c:162
> prepare_exit_to_usermode arch/x86/entry/common.c:196 [inline]
> syscall_return_slowpath arch/x86/entry/common.c:265 [inline]
> do_syscall_64+0x6ac/0x800 arch/x86/entry/common.c:290
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x455a09
> RSP: 002b:00007f9890c42ce8 EFLAGS: 00000246 ORIG_RAX: 00000000000000ca
> RAX: fffffffffffffe00 RBX: 000000000072bec8 RCX: 0000000000455a09
> RDX: 0000000000000000 RSI: 0000000000000000 RDI: 000000000072bec8
> RBP: 000000000072bec8 R08: 0000000000000000 R09: 000000000072bea0
> R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000
> R13: 00007ffcb48ac3ff R14: 00007f9890c439c0 R15: 0000000000000000
> Code: ff 48 c1 e9 03 80 3c 19 00 0f 85 a9 05 00 00 49 8b 4f 18 48 8b 85 98 fe ff ff 48 89 ce c6 00 00 48 c1 ee 03 48 89 95 d8 fe ff ff <80> 3c 1e 00 0f 85 c6 05 00 00 48 8b 85 98 fe ff ff 48 85 d2 48
> RIP: __hlist_del include/linux/list.h:649 [inline] RSP: ffff8801a8f8ef70
> RIP: hlist_del_rcu include/linux/rculist.h:427 [inline] RSP: ffff8801a8f8ef70
> RIP: bpf_tcp_close+0x7d2/0xf80 kernel/bpf/sockmap.c:271 RSP: ffff8801a8f8ef70
> ---[ end trace e81227e93c7e7b75 ]---
>
>
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report. See:
> https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with syzbot.
^ permalink raw reply
* Re: KASAN: use-after-free Write in bpf_tcp_close
From: Daniel Borkmann @ 2018-05-27 22:15 UTC (permalink / raw)
To: syzbot, ast, linux-kernel, netdev, syzkaller-bugs, john.fastabend
In-Reply-To: <000000000000cb4149056d3587f5@google.com>
[ +John ]
On 05/27/2018 10:06 PM, syzbot wrote:
> Hello,
>
> syzbot found the following crash on:
>
> HEAD commit: ff4fb475cea8 Merge branch 'btf-uapi-cleanups'
> git tree: bpf-next
> console output: https://syzkaller.appspot.com/x/log.txt?x=12b3d577800000
> kernel config: https://syzkaller.appspot.com/x/.config?x=b632d8e2c2ab2c1
> dashboard link: https://syzkaller.appspot.com/bug?extid=31025a5f3f7650081204
> compiler: gcc (GCC) 8.0.1 20180413 (experimental)
> syzkaller repro:https://syzkaller.appspot.com/x/repro.syz?x=109a2f37800000
> C reproducer: https://syzkaller.appspot.com/x/repro.c?x=171a727b800000
>
> IMPORTANT: if you fix the bug, please add the following tag to the commit:
> Reported-by: syzbot+31025a5f3f7650081204@syzkaller.appspotmail.com
Should be fixed by: https://patchwork.ozlabs.org/patch/920695/
> ==================================================================
> BUG: KASAN: use-after-free in cmpxchg_size include/asm-generic/atomic-instrumented.h:355 [inline]
> BUG: KASAN: use-after-free in bpf_tcp_close+0x6f5/0xf80 kernel/bpf/sockmap.c:265
> Write of size 8 at addr ffff8801ca277680 by task syz-executor749/9723
>
> CPU: 0 PID: 9723 Comm: syz-executor749 Not tainted 4.17.0-rc4+ #19
> Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011
> Call Trace:
> __dump_stack lib/dump_stack.c:77 [inline]
> dump_stack+0x1b9/0x294 lib/dump_stack.c:113
> print_address_description+0x6c/0x20b mm/kasan/report.c:256
> kasan_report_error mm/kasan/report.c:354 [inline]
> kasan_report.cold.7+0x242/0x2fe mm/kasan/report.c:412
> check_memory_region_inline mm/kasan/kasan.c:260 [inline]
> check_memory_region+0x13e/0x1b0 mm/kasan/kasan.c:267
> kasan_check_write+0x14/0x20 mm/kasan/kasan.c:278
> cmpxchg_size include/asm-generic/atomic-instrumented.h:355 [inline]
> bpf_tcp_close+0x6f5/0xf80 kernel/bpf/sockmap.c:265
> inet_release+0x104/0x1f0 net/ipv4/af_inet.c:427
> inet6_release+0x50/0x70 net/ipv6/af_inet6.c:459
> sock_release+0x96/0x1b0 net/socket.c:594
> sock_close+0x16/0x20 net/socket.c:1149
> __fput+0x34d/0x890 fs/file_table.c:209
> ____fput+0x15/0x20 fs/file_table.c:243
> task_work_run+0x1e4/0x290 kernel/task_work.c:113
> exit_task_work include/linux/task_work.h:22 [inline]
> do_exit+0x1aee/0x2730 kernel/exit.c:865
> do_group_exit+0x16f/0x430 kernel/exit.c:968
> __do_sys_exit_group kernel/exit.c:979 [inline]
> __se_sys_exit_group kernel/exit.c:977 [inline]
> __x64_sys_exit_group+0x3e/0x50 kernel/exit.c:977
> do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
> RIP: 0033:0x440a59
> RSP: 002b:00007ffdadf92488 EFLAGS: 00000206 ORIG_RAX: 00000000000000e7
> RAX: ffffffffffffffda RBX: 0000000000000000 RCX: 0000000000440a59
> RDX: 0000000000440a59 RSI: 0000000000000020 RDI: 0000000000000000
> RBP: 0000000000000000 R08: 00000000004002c8 R09: 0000000000401ea0
> R10: 00000000004002c8 R11: 0000000000000206 R12: 000000000001b5ac
> R13: 0000000000401ea0 R14: 0000000000000000 R15: 0000000000000000
>
> Allocated by task 9723:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:448
> set_track mm/kasan/kasan.c:460 [inline]
> kasan_kmalloc+0xc4/0xe0 mm/kasan/kasan.c:553
> __do_kmalloc_node mm/slab.c:3682 [inline]
> __kmalloc_node+0x47/0x70 mm/slab.c:3689
> kmalloc_node include/linux/slab.h:554 [inline]
> bpf_map_area_alloc+0x3f/0x90 kernel/bpf/syscall.c:144
> sock_map_alloc+0x376/0x410 kernel/bpf/sockmap.c:1555
> find_and_alloc_map kernel/bpf/syscall.c:126 [inline]
> map_create+0x393/0x1010 kernel/bpf/syscall.c:448
> __do_sys_bpf kernel/bpf/syscall.c:2128 [inline]
> __se_sys_bpf kernel/bpf/syscall.c:2105 [inline]
> __x64_sys_bpf+0x300/0x4f0 kernel/bpf/syscall.c:2105
> do_syscall_64+0x1b1/0x800 arch/x86/entry/common.c:287
> entry_SYSCALL_64_after_hwframe+0x49/0xbe
>
> Freed by task 4521:
> save_stack+0x43/0xd0 mm/kasan/kasan.c:448
> set_track mm/kasan/kasan.c:460 [inline]
> __kasan_slab_free+0x11a/0x170 mm/kasan/kasan.c:521
> kasan_slab_free+0xe/0x10 mm/kasan/kasan.c:528
> __cache_free mm/slab.c:3498 [inline]
> kfree+0xd9/0x260 mm/slab.c:3813
> kvfree+0x61/0x70 mm/util.c:440
> bpf_map_area_free+0x15/0x20 kernel/bpf/syscall.c:155
> sock_map_remove_complete kernel/bpf/sockmap.c:1443 [inline]
> sock_map_free+0x408/0x540 kernel/bpf/sockmap.c:1619
> bpf_map_free_deferred+0xba/0xf0 kernel/bpf/syscall.c:259
> process_one_work+0xc1e/0x1b50 kernel/workqueue.c:2145
> worker_thread+0x1cc/0x1440 kernel/workqueue.c:2279
> kthread+0x345/0x410 kernel/kthread.c:238
> ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:412
>
> The buggy address belongs to the object at ffff8801ca277680
> which belongs to the cache kmalloc-1024 of size 1024
> The buggy address is located 0 bytes inside of
> 1024-byte region [ffff8801ca277680, ffff8801ca277a80)
> The buggy address belongs to the page:
> page:ffffea0007289d80 count:1 mapcount:0 mapping:ffff8801ca276000 index:0x0 compound_mapcount: 0
> flags: 0x2fffc0000008100(slab|head)
> raw: 02fffc0000008100 ffff8801ca276000 0000000000000000 0000000100000007
> raw: ffffea0006d12b20 ffffea000763bba0 ffff8801da800ac0 0000000000000000
> page dumped because: kasan: bad access detected
>
> Memory state around the buggy address:
> ffff8801ca277580: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
> ffff8801ca277600: fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc fc
>> ffff8801ca277680: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ^
> ffff8801ca277700: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ffff8801ca277780: fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb fb
> ==================================================================
>
>
> ---
> This bug is generated by a bot. It may contain errors.
> See https://goo.gl/tpsmEJ for more information about syzbot.
> syzbot engineers can be reached at syzkaller@googlegroups.com.
>
> syzbot will keep track of this bug report. See:
> https://goo.gl/tpsmEJ#bug-status-tracking for how to communicate with syzbot.
> syzbot can test patches for this bug, for details see:
> https://goo.gl/tpsmEJ#testing-patches
^ permalink raw reply
* [PATCH v3 09/11] net: sched: use reference counting action init
From: Vlad Buslov @ 2018-05-27 21:17 UTC (permalink / raw)
To: jiri; +Cc: netdev, jhs, xiyou.wangcong, davem, ast, daniel, kliteyn,
Vlad Buslov
In-Reply-To: <1527455849-22327-1-git-send-email-vladbu@mellanox.com>
Change action API to assume that action init function always takes
reference to action, even when overwriting existing action. This is
necessary because action API continues to use action pointer after init
function is done. At this point action becomes accessible for concurrent
modifications, so user must always hold reference to it.
Implement helper put list function to atomically release list of actions
after action API init code is done using them.
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
Changes from V1 to V2:
- Resplit action lookup/release code to prevent memory leaks in
individual patches.
net/sched/act_api.c | 35 +++++++++++++++++------------------
1 file changed, 17 insertions(+), 18 deletions(-)
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index f019f0464cec..eefe8c2fe667 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -627,6 +627,18 @@ static int tcf_action_put(struct tc_action *p)
return __tcf_action_put(p, false);
}
+static void tcf_action_put_lst(struct list_head *actions)
+{
+ struct tc_action *a, *tmp;
+
+ list_for_each_entry_safe(a, tmp, actions, list) {
+ const struct tc_action_ops *ops = a->ops;
+
+ if (tcf_action_put(a))
+ module_put(ops->owner);
+ }
+}
+
int
tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
{
@@ -835,17 +847,6 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
return ERR_PTR(err);
}
-static void cleanup_a(struct list_head *actions, int ovr)
-{
- struct tc_action *a;
-
- if (!ovr)
- return;
-
- list_for_each_entry(a, actions, list)
- refcount_dec(&a->tcfa_refcnt);
-}
-
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
struct list_head *actions, size_t *attr_size,
@@ -874,11 +875,6 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
}
*attr_size = tcf_action_full_attrs_size(sz);
-
- /* Remove the temp refcnt which was necessary to protect against
- * destroying an existing action which was being replaced
- */
- cleanup_a(actions, ovr);
return 0;
err:
@@ -1209,7 +1205,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
return ret;
}
err:
- tcf_action_destroy(&actions, 0);
+ tcf_action_put_lst(&actions);
return ret;
}
@@ -1251,8 +1247,11 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
&attr_size, true, extack);
if (ret)
return ret;
+ ret = tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+ if (ovr)
+ tcf_action_put_lst(&actions);
- return tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+ return ret;
}
static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
--
2.7.5
^ permalink raw reply related
* [PATCH v3 11/11] net: sched: change action API to use array of pointers to actions
From: Vlad Buslov @ 2018-05-27 21:17 UTC (permalink / raw)
To: jiri; +Cc: netdev, jhs, xiyou.wangcong, davem, ast, daniel, kliteyn,
Vlad Buslov
In-Reply-To: <1527455849-22327-1-git-send-email-vladbu@mellanox.com>
Act API used linked list to pass set of actions to functions. It is
intrusive data structure that stores list nodes inside action structure
itself, which means it is not safe to modify such list concurrently.
However, action API doesn't use any linked list specific operations on this
set of actions, so it can be safely refactored into plain pointer array.
Refactor action API to use array of pointers to tc_actions instead of
linked list. Change argument 'actions' type of exported action init,
destroy and dump functions.
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
include/net/act_api.h | 7 ++---
net/sched/act_api.c | 74 ++++++++++++++++++++++++++++-----------------------
net/sched/cls_api.c | 21 +++++----------
3 files changed, 50 insertions(+), 52 deletions(-)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index cd4547476074..43dfa5e1b3b3 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -168,19 +168,20 @@ static inline int tcf_idr_release(struct tc_action *a, bool bind)
int tcf_register_action(struct tc_action_ops *a, struct pernet_operations *ops);
int tcf_unregister_action(struct tc_action_ops *a,
struct pernet_operations *ops);
-int tcf_action_destroy(struct list_head *actions, int bind);
+int tcf_action_destroy(struct tc_action *actions[], int bind);
int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
int nr_actions, struct tcf_result *res);
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
- struct list_head *actions, size_t *attr_size,
+ struct tc_action *actions[], size_t *attr_size,
bool rtnl_held, struct netlink_ext_ack *extack);
struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
struct nlattr *nla, struct nlattr *est,
char *name, int ovr, int bind,
bool rtnl_held,
struct netlink_ext_ack *extack);
-int tcf_action_dump(struct sk_buff *skb, struct list_head *, int, int);
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
+ int ref);
int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
int tcf_action_copy_stats(struct sk_buff *, struct tc_action *, int);
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 9511502e1cbb..7f904bb84aab 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -657,13 +657,14 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions,
}
EXPORT_SYMBOL(tcf_action_exec);
-int tcf_action_destroy(struct list_head *actions, int bind)
+int tcf_action_destroy(struct tc_action *actions[], int bind)
{
const struct tc_action_ops *ops;
- struct tc_action *a, *tmp;
- int ret = 0;
+ struct tc_action *a;
+ int ret = 0, i;
- list_for_each_entry_safe(a, tmp, actions, list) {
+ for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+ a = actions[i];
ops = a->ops;
ret = __tcf_idr_release(a, bind, true);
if (ret == ACT_P_DELETED)
@@ -679,11 +680,12 @@ static int tcf_action_put(struct tc_action *p)
return __tcf_action_put(p, false);
}
-static void tcf_action_put_lst(struct list_head *actions)
+static void tcf_action_put_many(struct tc_action *actions[])
{
- struct tc_action *a, *tmp;
+ int i;
- list_for_each_entry_safe(a, tmp, actions, list) {
+ for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+ struct tc_action *a = actions[i];
const struct tc_action_ops *ops = a->ops;
if (tcf_action_put(a))
@@ -735,14 +737,15 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
}
EXPORT_SYMBOL(tcf_action_dump_1);
-int tcf_action_dump(struct sk_buff *skb, struct list_head *actions,
+int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
int bind, int ref)
{
struct tc_action *a;
- int err = -EINVAL;
+ int err = -EINVAL, i;
struct nlattr *nest;
- list_for_each_entry(a, actions, list) {
+ for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+ a = actions[i];
nest = nla_nest_start(skb, a->order);
if (nest == NULL)
goto nla_put_failure;
@@ -878,10 +881,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
if (TC_ACT_EXT_CMP(a->tcfa_action, TC_ACT_GOTO_CHAIN)) {
err = tcf_action_goto_chain_init(a, tp);
if (err) {
- LIST_HEAD(actions);
+ struct tc_action *actions[TCA_ACT_MAX_PRIO] = { a };
- list_add_tail(&a->list, &actions);
- tcf_action_destroy(&actions, bind);
+ tcf_action_destroy(actions, bind);
NL_SET_ERR_MSG(extack, "Failed to init TC action chain");
return ERR_PTR(err);
}
@@ -899,9 +901,11 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
return ERR_PTR(err);
}
+/* Returns numbers of initialized actions or negative error. */
+
int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
struct nlattr *est, char *name, int ovr, int bind,
- struct list_head *actions, size_t *attr_size,
+ struct tc_action *actions[], size_t *attr_size,
bool rtnl_held, struct netlink_ext_ack *extack)
{
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
@@ -923,11 +927,12 @@ int tcf_action_init(struct net *net, struct tcf_proto *tp, struct nlattr *nla,
}
act->order = i;
sz += tcf_action_fill_size(act);
- list_add_tail(&act->list, actions);
+ /* Start from index 0 */
+ actions[i - 1] = act;
}
*attr_size = tcf_action_full_attrs_size(sz);
- return 0;
+ return i - 1;
err:
tcf_action_destroy(actions, bind);
@@ -978,7 +983,7 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
return -1;
}
-static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
+static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
u32 portid, u32 seq, u16 flags, int event, int bind,
int ref)
{
@@ -1014,7 +1019,7 @@ static int tca_get_fill(struct sk_buff *skb, struct list_head *actions,
static int
tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
- struct list_head *actions, int event,
+ struct tc_action *actions[], int event,
struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
@@ -1150,14 +1155,14 @@ static int tca_action_flush(struct net *net, struct nlattr *nla,
return err;
}
-static int tcf_action_delete(struct net *net, struct list_head *actions,
+static int tcf_action_delete(struct net *net, struct tc_action *actions[],
struct netlink_ext_ack *extack)
{
- struct tc_action *a, *tmp;
u32 act_index;
- int ret;
+ int ret, i;
- list_for_each_entry_safe(a, tmp, actions, list) {
+ for (i = 0; i < TCA_ACT_MAX_PRIO && actions[i]; i++) {
+ struct tc_action *a = actions[i];
const struct tc_action_ops *ops = a->ops;
/* Actions can be deleted concurrently so we must save their
@@ -1180,7 +1185,7 @@ static int tcf_action_delete(struct net *net, struct list_head *actions,
}
static int
-tcf_del_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
+tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
int ret;
@@ -1221,7 +1226,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
struct nlattr *tb[TCA_ACT_MAX_PRIO + 1];
struct tc_action *act;
size_t attr_size = 0;
- LIST_HEAD(actions);
+ struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
ret = nla_parse_nested(tb, TCA_ACT_MAX_PRIO, nla, NULL, extack);
if (ret < 0)
@@ -1243,26 +1248,27 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n,
}
act->order = i;
attr_size += tcf_action_fill_size(act);
- list_add_tail(&act->list, &actions);
+ actions[i - 1] = act;
}
attr_size = tcf_action_full_attrs_size(attr_size);
if (event == RTM_GETACTION)
- ret = tcf_get_notify(net, portid, n, &actions, event, extack);
+ ret = tcf_get_notify(net, portid, n, actions, event, extack);
else { /* delete */
- ret = tcf_del_notify(net, n, &actions, portid, attr_size, extack);
+ ret = tcf_del_notify(net, n, actions, portid, attr_size,
+ extack);
if (ret)
goto err;
return ret;
}
err:
- tcf_action_put_lst(&actions);
+ tcf_action_put_many(actions);
return ret;
}
static int
-tcf_add_notify(struct net *net, struct nlmsghdr *n, struct list_head *actions,
+tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
u32 portid, size_t attr_size, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
@@ -1293,15 +1299,15 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
{
size_t attr_size = 0;
int ret = 0;
- LIST_HEAD(actions);
+ struct tc_action *actions[TCA_ACT_MAX_PRIO] = {};
- ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, &actions,
+ ret = tcf_action_init(net, NULL, nla, NULL, NULL, ovr, 0, actions,
&attr_size, true, extack);
- if (ret)
+ if (ret < 0)
return ret;
- ret = tcf_add_notify(net, n, &actions, portid, attr_size, extack);
+ ret = tcf_add_notify(net, n, actions, portid, attr_size, extack);
if (ovr)
- tcf_action_put_lst(&actions);
+ tcf_action_put_many(actions);
return ret;
}
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index dd37d0ae3fce..b2bb6b7b038a 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1415,10 +1415,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
void tcf_exts_destroy(struct tcf_exts *exts)
{
#ifdef CONFIG_NET_CLS_ACT
- LIST_HEAD(actions);
-
- tcf_exts_to_list(exts, &actions);
- tcf_action_destroy(&actions, TCA_ACT_UNBIND);
+ tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
kfree(exts->actions);
exts->nr_actions = 0;
#endif
@@ -1445,18 +1442,15 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb,
exts->actions[0] = act;
exts->nr_actions = 1;
} else if (exts->action && tb[exts->action]) {
- LIST_HEAD(actions);
- int err, i = 0;
+ int err;
err = tcf_action_init(net, tp, tb[exts->action],
rate_tlv, NULL, ovr, TCA_ACT_BIND,
- &actions, &attr_size, true,
+ exts->actions, &attr_size, true,
extack);
- if (err)
+ if (err < 0)
return err;
- list_for_each_entry(act, &actions, list)
- exts->actions[i++] = act;
- exts->nr_actions = i;
+ exts->nr_actions = err;
}
exts->net = net;
}
@@ -1505,14 +1499,11 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
* tc data even if iproute2 was newer - jhs
*/
if (exts->type != TCA_OLD_COMPAT) {
- LIST_HEAD(actions);
-
nest = nla_nest_start(skb, exts->action);
if (nest == NULL)
goto nla_put_failure;
- tcf_exts_to_list(exts, &actions);
- if (tcf_action_dump(skb, &actions, 0, 0) < 0)
+ if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
goto nla_put_failure;
nla_nest_end(skb, nest);
} else if (exts->police) {
--
2.7.5
^ permalink raw reply related
* [PATCH v3 06/11] net: sched: add 'delete' function to action ops
From: Vlad Buslov @ 2018-05-27 21:17 UTC (permalink / raw)
To: jiri; +Cc: netdev, jhs, xiyou.wangcong, davem, ast, daniel, kliteyn,
Vlad Buslov
In-Reply-To: <1527455849-22327-1-git-send-email-vladbu@mellanox.com>
Extend action ops with 'delete' function. Each action type to implements
its own delete function that doesn't depend on rtnl lock.
Implement delete function that is required to delete actions without
holding rtnl lock. Use action API function that atomically deletes action
only if it is still in action idr. This implementation prevents concurrent
threads from deleting same action twice.
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
---
Changes from V1 to V2:
- Merge action ops delete definition and implementation.
include/net/act_api.h | 1 +
net/sched/act_bpf.c | 8 ++++++++
net/sched/act_connmark.c | 8 ++++++++
net/sched/act_csum.c | 8 ++++++++
net/sched/act_gact.c | 8 ++++++++
net/sched/act_ife.c | 8 ++++++++
net/sched/act_ipt.c | 16 ++++++++++++++++
net/sched/act_mirred.c | 8 ++++++++
net/sched/act_nat.c | 8 ++++++++
net/sched/act_pedit.c | 8 ++++++++
net/sched/act_police.c | 8 ++++++++
net/sched/act_sample.c | 8 ++++++++
net/sched/act_simple.c | 8 ++++++++
net/sched/act_skbedit.c | 8 ++++++++
net/sched/act_skbmod.c | 8 ++++++++
net/sched/act_tunnel_key.c | 8 ++++++++
net/sched/act_vlan.c | 8 ++++++++
17 files changed, 137 insertions(+)
diff --git a/include/net/act_api.h b/include/net/act_api.h
index d94ec6400673..d256e20507b9 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -101,6 +101,7 @@ struct tc_action_ops {
void (*stats_update)(struct tc_action *, u64, u32, u64);
size_t (*get_fill_size)(const struct tc_action *act);
struct net_device *(*get_dev)(const struct tc_action *a);
+ int (*delete)(struct net *net, u32 index);
};
struct tc_action_net {
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
index 8ebf40a3506c..7941dd66ff83 100644
--- a/net/sched/act_bpf.c
+++ b/net/sched/act_bpf.c
@@ -388,6 +388,13 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_bpf_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, bpf_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_bpf_ops __read_mostly = {
.kind = "bpf",
.type = TCA_ACT_BPF,
@@ -398,6 +405,7 @@ static struct tc_action_ops act_bpf_ops __read_mostly = {
.init = tcf_bpf_init,
.walk = tcf_bpf_walker,
.lookup = tcf_bpf_search,
+ .delete = tcf_bpf_delete,
.size = sizeof(struct tcf_bpf),
};
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index e3787aa0025a..143c2d3de723 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -193,6 +193,13 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_connmark_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, connmark_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.type = TCA_ACT_CONNMARK,
@@ -202,6 +209,7 @@ static struct tc_action_ops act_connmark_ops = {
.init = tcf_connmark_init,
.walk = tcf_connmark_walker,
.lookup = tcf_connmark_search,
+ .delete = tcf_connmark_delete,
.size = sizeof(struct tcf_connmark_info),
};
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index 334261943f9f..3768539340e0 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -654,6 +654,13 @@ static size_t tcf_csum_get_fill_size(const struct tc_action *act)
return nla_total_size(sizeof(struct tc_csum));
}
+static int tcf_csum_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, csum_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_csum_ops = {
.kind = "csum",
.type = TCA_ACT_CSUM,
@@ -665,6 +672,7 @@ static struct tc_action_ops act_csum_ops = {
.walk = tcf_csum_walker,
.lookup = tcf_csum_search,
.get_fill_size = tcf_csum_get_fill_size,
+ .delete = tcf_csum_delete,
.size = sizeof(struct tcf_csum),
};
diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c
index b4dfb2b4addc..a431a711f0dd 100644
--- a/net/sched/act_gact.c
+++ b/net/sched/act_gact.c
@@ -231,6 +231,13 @@ static size_t tcf_gact_get_fill_size(const struct tc_action *act)
return sz;
}
+static int tcf_gact_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, gact_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_gact_ops = {
.kind = "gact",
.type = TCA_ACT_GACT,
@@ -242,6 +249,7 @@ static struct tc_action_ops act_gact_ops = {
.walk = tcf_gact_walker,
.lookup = tcf_gact_search,
.get_fill_size = tcf_gact_get_fill_size,
+ .delete = tcf_gact_delete,
.size = sizeof(struct tcf_gact),
};
diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c
index 3dccc4e1d378..027c305dcb37 100644
--- a/net/sched/act_ife.c
+++ b/net/sched/act_ife.c
@@ -846,6 +846,13 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_ife_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, ife_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_ife_ops = {
.kind = "ife",
.type = TCA_ACT_IFE,
@@ -856,6 +863,7 @@ static struct tc_action_ops act_ife_ops = {
.init = tcf_ife_init,
.walk = tcf_ife_walker,
.lookup = tcf_ife_search,
+ .delete = tcf_ife_delete,
.size = sizeof(struct tcf_ife_info),
};
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 9c21663a86a6..6c234411c771 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -324,6 +324,13 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_ipt_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, ipt_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_ipt_ops = {
.kind = "ipt",
.type = TCA_ACT_IPT,
@@ -334,6 +341,7 @@ static struct tc_action_ops act_ipt_ops = {
.init = tcf_ipt_init,
.walk = tcf_ipt_walker,
.lookup = tcf_ipt_search,
+ .delete = tcf_ipt_delete,
.size = sizeof(struct tcf_ipt),
};
@@ -374,6 +382,13 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_xt_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, xt_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_xt_ops = {
.kind = "xt",
.type = TCA_ACT_XT,
@@ -384,6 +399,7 @@ static struct tc_action_ops act_xt_ops = {
.init = tcf_xt_init,
.walk = tcf_xt_walker,
.lookup = tcf_xt_search,
+ .delete = tcf_xt_delete,
.size = sizeof(struct tcf_ipt),
};
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 5434f08f2eb7..3d8300bce7e4 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -322,6 +322,13 @@ static struct net_device *tcf_mirred_get_dev(const struct tc_action *a)
return rtnl_dereference(m->tcfm_dev);
}
+static int tcf_mirred_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, mirred_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_mirred_ops = {
.kind = "mirred",
.type = TCA_ACT_MIRRED,
@@ -335,6 +342,7 @@ static struct tc_action_ops act_mirred_ops = {
.lookup = tcf_mirred_search,
.size = sizeof(struct tcf_mirred),
.get_dev = tcf_mirred_get_dev,
+ .delete = tcf_mirred_delete,
};
static __net_init int mirred_init_net(struct net *net)
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index e6487ad1e4a8..9eb27c89dc46 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -294,6 +294,13 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_nat_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, nat_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.type = TCA_ACT_NAT,
@@ -303,6 +310,7 @@ static struct tc_action_ops act_nat_ops = {
.init = tcf_nat_init,
.walk = tcf_nat_walker,
.lookup = tcf_nat_search,
+ .delete = tcf_nat_delete,
.size = sizeof(struct tcf_nat),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index 7c9a3f24edba..b8857035e3f8 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -436,6 +436,13 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_pedit_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, pedit_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_pedit_ops = {
.kind = "pedit",
.type = TCA_ACT_PEDIT,
@@ -446,6 +453,7 @@ static struct tc_action_ops act_pedit_ops = {
.init = tcf_pedit_init,
.walk = tcf_pedit_walker,
.lookup = tcf_pedit_search,
+ .delete = tcf_pedit_delete,
.size = sizeof(struct tcf_pedit),
};
diff --git a/net/sched/act_police.c b/net/sched/act_police.c
index 0e1c2fb0ebea..c955fb0d4f3f 100644
--- a/net/sched/act_police.c
+++ b/net/sched/act_police.c
@@ -314,6 +314,13 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_police_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, police_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
MODULE_AUTHOR("Alexey Kuznetsov");
MODULE_DESCRIPTION("Policing actions");
MODULE_LICENSE("GPL");
@@ -327,6 +334,7 @@ static struct tc_action_ops act_police_ops = {
.init = tcf_act_police_init,
.walk = tcf_act_police_walker,
.lookup = tcf_police_search,
+ .delete = tcf_police_delete,
.size = sizeof(struct tcf_police),
};
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index 316fc645595d..6f79d2afcba2 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -220,6 +220,13 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_sample_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, sample_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_sample_ops = {
.kind = "sample",
.type = TCA_ACT_SAMPLE,
@@ -230,6 +237,7 @@ static struct tc_action_ops act_sample_ops = {
.cleanup = tcf_sample_cleanup,
.walk = tcf_sample_walker,
.lookup = tcf_sample_search,
+ .delete = tcf_sample_delete,
.size = sizeof(struct tcf_sample),
};
diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
index 23fa893ea092..b570e7ca7e33 100644
--- a/net/sched/act_simple.c
+++ b/net/sched/act_simple.c
@@ -187,6 +187,13 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_simp_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, simp_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_simp_ops = {
.kind = "simple",
.type = TCA_ACT_SIMP,
@@ -197,6 +204,7 @@ static struct tc_action_ops act_simp_ops = {
.init = tcf_simp_init,
.walk = tcf_simp_walker,
.lookup = tcf_simp_search,
+ .delete = tcf_simp_delete,
.size = sizeof(struct tcf_defact),
};
diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c
index 85ed9d603dc1..dc0cb350aa45 100644
--- a/net/sched/act_skbedit.c
+++ b/net/sched/act_skbedit.c
@@ -226,6 +226,13 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_skbedit_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, skbedit_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_skbedit_ops = {
.kind = "skbedit",
.type = TCA_ACT_SKBEDIT,
@@ -235,6 +242,7 @@ static struct tc_action_ops act_skbedit_ops = {
.init = tcf_skbedit_init,
.walk = tcf_skbedit_walker,
.lookup = tcf_skbedit_search,
+ .delete = tcf_skbedit_delete,
.size = sizeof(struct tcf_skbedit),
};
diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c
index 026d6f58eda1..30be3f767495 100644
--- a/net/sched/act_skbmod.c
+++ b/net/sched/act_skbmod.c
@@ -253,6 +253,13 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_skbmod_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, skbmod_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_skbmod_ops = {
.kind = "skbmod",
.type = TCA_ACT_SKBMOD,
@@ -263,6 +270,7 @@ static struct tc_action_ops act_skbmod_ops = {
.cleanup = tcf_skbmod_cleanup,
.walk = tcf_skbmod_walker,
.lookup = tcf_skbmod_search,
+ .delete = tcf_skbmod_delete,
.size = sizeof(struct tcf_skbmod),
};
diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c
index ed698fcb0e5a..4b7f9a3b47d7 100644
--- a/net/sched/act_tunnel_key.c
+++ b/net/sched/act_tunnel_key.c
@@ -310,6 +310,13 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tunnel_key_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, tunnel_key_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_tunnel_key_ops = {
.kind = "tunnel_key",
.type = TCA_ACT_TUNNEL_KEY,
@@ -320,6 +327,7 @@ static struct tc_action_ops act_tunnel_key_ops = {
.cleanup = tunnel_key_release,
.walk = tunnel_key_walker,
.lookup = tunnel_key_search,
+ .delete = tunnel_key_delete,
.size = sizeof(struct tcf_tunnel_key),
};
diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c
index 72d2d78fb942..b44377c951b6 100644
--- a/net/sched/act_vlan.c
+++ b/net/sched/act_vlan.c
@@ -285,6 +285,13 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index,
return tcf_idr_search(tn, a, index);
}
+static int tcf_vlan_delete(struct net *net, u32 index)
+{
+ struct tc_action_net *tn = net_generic(net, vlan_net_id);
+
+ return tcf_idr_delete_index(tn, index);
+}
+
static struct tc_action_ops act_vlan_ops = {
.kind = "vlan",
.type = TCA_ACT_VLAN,
@@ -295,6 +302,7 @@ static struct tc_action_ops act_vlan_ops = {
.cleanup = tcf_vlan_cleanup,
.walk = tcf_vlan_walker,
.lookup = tcf_vlan_search,
+ .delete = tcf_vlan_delete,
.size = sizeof(struct tcf_vlan),
};
--
2.7.5
^ permalink raw reply related
page: next (older) | prev (newer) | latest
- recent:[subjects (threaded)|topics (new)|topics (active)]
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox