From: Matt Mullins <mmullins@fb.com>
To: <hall@fb.com>, <mmullins@fb.com>, <ast@kernel.org>,
<bpf@vger.kernel.org>, <netdev@vger.kernel.org>
Cc: <linux-kernel@vger.kernel.org>,
Daniel Borkmann <daniel@iogearbox.net>,
Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
"Yonghong Song" <yhs@fb.com>,
Steven Rostedt <rostedt@goodmis.org>,
Ingo Molnar <mingo@redhat.com>
Subject: [PATCH bpf-next v3 1/5] bpf: add writable context for raw tracepoints
Date: Fri, 19 Apr 2019 14:04:05 -0700 [thread overview]
Message-ID: <20190419210409.5021-2-mmullins@fb.com> (raw)
In-Reply-To: <20190419210409.5021-1-mmullins@fb.com>
This is an opt-in interface that allows a tracepoint to provide a safe
buffer that can be written from a BPF_PROG_TYPE_RAW_TRACEPOINT program.
The size of the buffer must be a compile-time constant, and is checked
before allowing a BPF program to attach to a tracepoint that uses this
feature.
The pointer to this buffer will be the first argument of tracepoints
that opt in; the buffer is readable by both BPF_PROG_TYPE_RAW_TRACEPOINT
and BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE programs that attach to such a
tracepoint, but the buffer to which it points may only be written by the
latter.
Signed-off-by: Matt Mullins <mmullins@fb.com>
---
include/linux/bpf.h | 2 ++
include/linux/bpf_types.h | 1 +
include/linux/tracepoint-defs.h | 1 +
include/trace/bpf_probe.h | 27 +++++++++++++++++++++++++--
include/uapi/linux/bpf.h | 1 +
kernel/bpf/syscall.c | 8 ++++++--
kernel/bpf/verifier.c | 31 +++++++++++++++++++++++++++++++
kernel/trace/bpf_trace.c | 21 +++++++++++++++++++++
8 files changed, 88 insertions(+), 4 deletions(-)
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index a2132e09dc1c..d3c71fd67476 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -263,6 +263,7 @@ enum bpf_reg_type {
PTR_TO_SOCK_COMMON_OR_NULL, /* reg points to sock_common or NULL */
PTR_TO_TCP_SOCK, /* reg points to struct tcp_sock */
PTR_TO_TCP_SOCK_OR_NULL, /* reg points to struct tcp_sock or NULL */
+ PTR_TO_TP_BUFFER, /* reg points to a writable raw tp's buffer */
};
/* The information passed from prog-specific *_is_valid_access
@@ -352,6 +353,7 @@ struct bpf_prog_aux {
u32 used_map_cnt;
u32 max_ctx_offset;
u32 max_pkt_offset;
+ u32 max_tp_access;
u32 stack_depth;
u32 id;
u32 func_cnt; /* used by non-func prog as the number of func progs */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 08bf2f1fe553..c766108608cb 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -25,6 +25,7 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe)
BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint)
BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event)
BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT, raw_tracepoint)
+BPF_PROG_TYPE(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, raw_tracepoint_writable)
#endif
#ifdef CONFIG_CGROUP_BPF
BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev)
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index 49ba9cde7e4b..b29950a19205 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -45,6 +45,7 @@ struct bpf_raw_event_map {
struct tracepoint *tp;
void *bpf_func;
u32 num_args;
+ u32 writable_size;
} __aligned(32);
#endif
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 505dae0bed80..d6e556c0a085 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -69,8 +69,7 @@ __bpf_trace_##call(void *__data, proto) \
* to make sure that if the tracepoint handling changes, the
* bpf probe will fail to compile unless it too is updated.
*/
-#undef DEFINE_EVENT
-#define DEFINE_EVENT(template, call, proto, args) \
+#define __DEFINE_EVENT(template, call, proto, args, size) \
static inline void bpf_test_probe_##call(void) \
{ \
check_trace_callback_type_##call(__bpf_trace_##template); \
@@ -81,12 +80,36 @@ __bpf_trace_tp_map_##call = { \
.tp = &__tracepoint_##call, \
.bpf_func = (void *)__bpf_trace_##template, \
.num_args = COUNT_ARGS(args), \
+ .writable_size = size, \
};
+#define FIRST(x, ...) x
+
+#undef DEFINE_EVENT_WRITABLE
+#define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \
+static inline void bpf_test_buffer_##call(void) \
+{ \
+ /* BUILD_BUG_ON() is ignored if the code is completely eliminated, but \
+ * BUILD_BUG_ON_ZERO() uses a different mechanism that is not \
+ * dead-code-eliminated. \
+ */ \
+ FIRST(proto); \
+ (void)BUILD_BUG_ON_ZERO(size != sizeof(*FIRST(args))); \
+} \
+__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
+
+#undef DEFINE_EVENT
+#define DEFINE_EVENT(template, call, proto, args) \
+ __DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
#undef DEFINE_EVENT_PRINT
#define DEFINE_EVENT_PRINT(template, name, proto, args, print) \
DEFINE_EVENT(template, name, PARAMS(proto), PARAMS(args))
#include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
+
+#undef DEFINE_EVENT_WRITABLE
+#undef __DEFINE_EVENT
+#undef FIRST
+
#endif /* CONFIG_BPF_EVENTS */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3c38ac9a92a7..c5335d53ce82 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -166,6 +166,7 @@ enum bpf_prog_type {
BPF_PROG_TYPE_LIRC_MODE2,
BPF_PROG_TYPE_SK_REUSEPORT,
BPF_PROG_TYPE_FLOW_DISSECTOR,
+ BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
};
enum bpf_attach_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 62f6bced3a3c..27e2f22879a4 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1720,12 +1720,16 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
}
raw_tp->btp = btp;
- prog = bpf_prog_get_type(attr->raw_tracepoint.prog_fd,
- BPF_PROG_TYPE_RAW_TRACEPOINT);
+ prog = bpf_prog_get(attr->raw_tracepoint.prog_fd);
if (IS_ERR(prog)) {
err = PTR_ERR(prog);
goto out_free_tp;
}
+ if (prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT &&
+ prog->type != BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE) {
+ err = -EINVAL;
+ goto out_put_prog;
+ }
err = bpf_probe_register(raw_tp->btp, prog);
if (err)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index ce166a002d16..32f706e99319 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -411,6 +411,7 @@ static const char * const reg_type_str[] = {
[PTR_TO_SOCK_COMMON_OR_NULL] = "sock_common_or_null",
[PTR_TO_TCP_SOCK] = "tcp_sock",
[PTR_TO_TCP_SOCK_OR_NULL] = "tcp_sock_or_null",
+ [PTR_TO_TP_BUFFER] = "tp_buffer",
};
static char slot_type_char[] = {
@@ -1958,6 +1959,32 @@ static int check_ctx_reg(struct bpf_verifier_env *env,
return 0;
}
+static int check_tp_buffer_access(struct bpf_verifier_env *env,
+ const struct bpf_reg_state *reg,
+ int regno, int off, int size)
+{
+ if (off < 0) {
+ verbose(env,
+ "R%d invalid tracepoint buffer access: off=%d, size=%d",
+ regno, off, size);
+ return -EACCES;
+ }
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+ char tn_buf[48];
+
+ tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
+ verbose(env,
+ "R%d invalid variable buffer offset: off=%d, var_off=%s",
+ regno, off, tn_buf);
+ return -EACCES;
+ }
+ if (off + size > env->prog->aux->max_tp_access)
+ env->prog->aux->max_tp_access = off + size;
+
+ return 0;
+}
+
+
/* truncate register to smaller size (in bytes)
* must be called with size < BPF_REG_SIZE
*/
@@ -2100,6 +2127,10 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
err = check_sock_access(env, insn_idx, regno, off, size, t);
if (!err && value_regno >= 0)
mark_reg_unknown(env, regs, value_regno);
+ } else if (reg->type == PTR_TO_TP_BUFFER) {
+ err = check_tp_buffer_access(env, reg, regno, off, size);
+ if (!err && t == BPF_READ && value_regno >= 0)
+ mark_reg_unknown(env, regs, value_regno);
} else {
verbose(env, "R%d invalid mem access '%s'\n", regno,
reg_type_str[reg->type]);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d64c00afceb5..a2dd79dc6871 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -909,6 +909,24 @@ const struct bpf_verifier_ops raw_tracepoint_verifier_ops = {
const struct bpf_prog_ops raw_tracepoint_prog_ops = {
};
+static bool raw_tp_writable_prog_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ if (off == 0 && size == sizeof(u64))
+ info->reg_type = PTR_TO_TP_BUFFER;
+ return raw_tp_prog_is_valid_access(off, size, type, prog, info);
+}
+
+const struct bpf_verifier_ops raw_tracepoint_writable_verifier_ops = {
+ .get_func_proto = raw_tp_prog_func_proto,
+ .is_valid_access = raw_tp_writable_prog_is_valid_access,
+};
+
+const struct bpf_prog_ops raw_tracepoint_writable_prog_ops = {
+};
+
static bool pe_prog_is_valid_access(int off, int size, enum bpf_access_type type,
const struct bpf_prog *prog,
struct bpf_insn_access_aux *info)
@@ -1198,6 +1216,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
if (prog->aux->max_ctx_offset > btp->num_args * sizeof(u64))
return -EINVAL;
+ if (prog->aux->max_tp_access > btp->writable_size)
+ return -EINVAL;
+
return tracepoint_probe_register(tp, (void *)btp->bpf_func, prog);
}
--
2.17.1
next prev parent reply other threads:[~2019-04-19 21:05 UTC|newest]
Thread overview: 24+ messages / expand[flat|nested] mbox.gz Atom feed top
2019-04-19 21:04 [PATCH bpf-next v3 0/5] writable contexts for bpf raw tracepoints Matt Mullins
2019-04-19 21:04 ` Matt Mullins [this message]
2019-04-22 18:12 ` [PATCH bpf-next v3 1/5] bpf: add writable context for " Yonghong Song
2019-04-22 19:23 ` Matt Mullins
2019-04-22 21:17 ` Yonghong Song
2019-04-22 23:01 ` Matt Mullins
2019-04-22 23:16 ` Yonghong Song
2019-04-19 21:04 ` [PATCH bpf-next v3 2/5] nbd: trace sending nbd requests Matt Mullins
2019-04-19 21:16 ` Josef Bacik
2019-04-19 21:04 ` [PATCH bpf-next v3 3/5] nbd: add tracepoints for send/receive timing Matt Mullins
2019-04-19 21:16 ` Josef Bacik
2019-04-19 21:04 ` [PATCH bpf-next v3 4/5] tools: sync bpf.h Matt Mullins
2019-04-19 21:04 ` [PATCH bpf-next v3 5/5] selftests: bpf: test writable buffers in raw tps Matt Mullins
2019-04-19 21:04 ` Matt Mullins
2019-04-19 21:04 ` mmullins
2019-04-22 18:32 ` Yonghong Song
2019-04-22 18:32 ` Yonghong Song
2019-04-22 18:32 ` yhs
2019-04-22 19:27 ` Matt Mullins
2019-04-22 19:27 ` Matt Mullins
2019-04-22 19:27 ` mmullins
2019-04-22 21:13 ` Yonghong Song
2019-04-22 21:13 ` Yonghong Song
2019-04-22 21:13 ` yhs
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=20190419210409.5021-2-mmullins@fb.com \
--to=mmullins@fb.com \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=hall@fb.com \
--cc=kafai@fb.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@redhat.com \
--cc=netdev@vger.kernel.org \
--cc=rostedt@goodmis.org \
--cc=songliubraving@fb.com \
--cc=yhs@fb.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.