All of lore.kernel.org
 help / color / mirror / Atom feed
From: Hao Luo <haoluo@google.com>
To: Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Daniel Borkmann <daniel@iogearbox.net>
Cc: Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>, KP Singh <kpsingh@kernel.org>,
	Shakeel Butt <shakeelb@google.com>,
	Joe Burton <jevburton.kernel@gmail.com>,
	Tejun Heo <tj@kernel.org>,
	joshdon@google.com, sdf@google.com, bpf@vger.kernel.org,
	linux-kernel@vger.kernel.org, Hao Luo <haoluo@google.com>
Subject: [PATCH bpf-next v1 4/9] bpf: Introduce sleepable tracepoints
Date: Fri, 25 Feb 2022 15:43:34 -0800	[thread overview]
Message-ID: <20220225234339.2386398-5-haoluo@google.com> (raw)
In-Reply-To: <20220225234339.2386398-1-haoluo@google.com>

Add a new type of bpf tracepoints: sleepable tracepoints, which allows
the handler to make calls that may sleep. With sleepable tracepoints, a
set of syscall helpers (which may sleep) may also be called from
sleepable tracepoints.

In the following patches, we will whitelist some tracepoints to be
sleepable.

Signed-off-by: Hao Luo <haoluo@google.com>
---
 include/linux/bpf.h             | 10 +++++++-
 include/linux/tracepoint-defs.h |  1 +
 include/trace/bpf_probe.h       | 22 ++++++++++++++----
 kernel/bpf/syscall.c            | 41 +++++++++++++++++++++++----------
 kernel/trace/bpf_trace.c        |  5 ++++
 5 files changed, 61 insertions(+), 18 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c36eeced3838..759ade7b24b3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1810,6 +1810,9 @@ struct bpf_prog *bpf_prog_by_id(u32 id);
 struct bpf_link *bpf_link_by_id(u32 id);
 
 const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
+const struct bpf_func_proto *
+tracing_prog_syscall_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog);
+
 void bpf_task_storage_free(struct task_struct *task);
 bool bpf_prog_has_kfunc_call(const struct bpf_prog *prog);
 const struct btf_func_model *
@@ -1822,7 +1825,6 @@ struct bpf_core_ctx {
 
 int bpf_core_apply(struct bpf_core_ctx *ctx, const struct bpf_core_relo *relo,
 		   int relo_idx, void *insn);
-
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -2011,6 +2013,12 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 	return NULL;
 }
 
+static inline struct bpf_func_proto *
+tracing_prog_syscall_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	return NULL;
+}
+
 static inline void bpf_task_storage_free(struct task_struct *task)
 {
 }
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index e7c2276be33e..c73c7ab3680e 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -51,6 +51,7 @@ struct bpf_raw_event_map {
 	void			*bpf_func;
 	u32			num_args;
 	u32			writable_size;
+	u32			sleepable;
 } __aligned(32);
 
 /*
diff --git a/include/trace/bpf_probe.h b/include/trace/bpf_probe.h
index 7660a7846586..4edfc6df2f52 100644
--- a/include/trace/bpf_probe.h
+++ b/include/trace/bpf_probe.h
@@ -88,7 +88,7 @@ __bpf_trace_##call(void *__data, proto)					\
  * to make sure that if the tracepoint handling changes, the
  * bpf probe will fail to compile unless it too is updated.
  */
-#define __DEFINE_EVENT(template, call, proto, args, size)		\
+#define __DEFINE_EVENT(template, call, proto, args, size, sleep)	\
 static inline void bpf_test_probe_##call(void)				\
 {									\
 	check_trace_callback_type_##call(__bpf_trace_##template);	\
@@ -104,6 +104,7 @@ __section("__bpf_raw_tp_map") = {					\
 		.bpf_func	= __bpf_trace_##template,		\
 		.num_args	= COUNT_ARGS(args),			\
 		.writable_size	= size,					\
+		.sleepable	= sleep,				\
 	},								\
 };
 
@@ -123,11 +124,15 @@ static inline void bpf_test_buffer_##call(void)				\
 #undef DEFINE_EVENT_WRITABLE
 #define DEFINE_EVENT_WRITABLE(template, call, proto, args, size) \
 	__CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \
-	__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size)
+	__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), size, 0)
+
+#undef DEFINE_EVENT_SLEEPABLE
+#define DEFINE_EVENT_SLEEPABLE(template, call, proto, args)	\
+	__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0, 1)
 
 #undef DEFINE_EVENT
 #define DEFINE_EVENT(template, call, proto, args)			\
-	__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0)
+	__DEFINE_EVENT(template, call, PARAMS(proto), PARAMS(args), 0, 0)
 
 #undef DEFINE_EVENT_PRINT
 #define DEFINE_EVENT_PRINT(template, name, proto, args, print)	\
@@ -136,19 +141,26 @@ static inline void bpf_test_buffer_##call(void)				\
 #undef DECLARE_TRACE
 #define DECLARE_TRACE(call, proto, args)				\
 	__BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))		\
-	__DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), 0)
+	__DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), 0, 0)
 
 #undef DECLARE_TRACE_WRITABLE
 #define DECLARE_TRACE_WRITABLE(call, proto, args, size) \
 	__CHECK_WRITABLE_BUF_SIZE(call, PARAMS(proto), PARAMS(args), size) \
 	__BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args)) \
-	__DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), size)
+	__DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), size, 0)
+
+#undef DECLARE_TRACE_SLEEPABLE
+#define DECLARE_TRACE_SLEEPABLE(call, proto, args)			\
+	__BPF_DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))		\
+	__DEFINE_EVENT(call, call, PARAMS(proto), PARAMS(args), 0, 1)
 
 #include TRACE_INCLUDE(TRACE_INCLUDE_FILE)
 
 #undef DECLARE_TRACE_WRITABLE
 #undef DEFINE_EVENT_WRITABLE
 #undef __CHECK_WRITABLE_BUF_SIZE
+#undef DECLARE_TRACE_SLEEPABLE
+#undef DEFINE_EVENT_SLEEPABLE
 #undef __DEFINE_EVENT
 #undef FIRST
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9e6d8d0c8af5..0a12f52fe8a9 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -4827,12 +4827,6 @@ static const struct bpf_func_proto bpf_sys_bpf_proto = {
 	.arg3_type	= ARG_CONST_SIZE,
 };
 
-const struct bpf_func_proto * __weak
-tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-{
-	return bpf_base_func_proto(func_id);
-}
-
 BPF_CALL_1(bpf_sys_close, u32, fd)
 {
 	/* When bpf program calls this helper there should not be
@@ -5045,24 +5039,47 @@ const struct bpf_func_proto bpf_unlink_proto = {
 	.arg2_type	= ARG_CONST_SIZE_OR_ZERO,
 };
 
-static const struct bpf_func_proto *
-syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+/* Syscall helpers that are also allowed in sleepable tracing prog. */
+const struct bpf_func_proto *
+tracing_prog_syscall_func_proto(enum bpf_func_id func_id,
+				const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_sys_bpf:
 		return &bpf_sys_bpf_proto;
-	case BPF_FUNC_btf_find_by_name_kind:
-		return &bpf_btf_find_by_name_kind_proto;
 	case BPF_FUNC_sys_close:
 		return &bpf_sys_close_proto;
-	case BPF_FUNC_kallsyms_lookup_name:
-		return &bpf_kallsyms_lookup_name_proto;
 	case BPF_FUNC_mkdir:
 		return &bpf_mkdir_proto;
 	case BPF_FUNC_rmdir:
 		return &bpf_rmdir_proto;
 	case BPF_FUNC_unlink:
 		return &bpf_unlink_proto;
+	default:
+		return NULL;
+	}
+}
+
+const struct bpf_func_proto * __weak
+tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	const struct bpf_func_proto *fn;
+
+	fn = tracing_prog_syscall_func_proto(func_id, prog);
+	if (fn)
+		return fn;
+
+	return bpf_base_func_proto(func_id);
+}
+
+static const struct bpf_func_proto *
+syscall_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
+{
+	switch (func_id) {
+	case BPF_FUNC_btf_find_by_name_kind:
+		return &bpf_btf_find_by_name_kind_proto;
+	case BPF_FUNC_kallsyms_lookup_name:
+		return &bpf_kallsyms_lookup_name_proto;
 	default:
 		return tracing_prog_func_proto(func_id, prog);
 	}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index a2024ba32a20..c816e0e0d4a0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1691,6 +1691,8 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		fn = raw_tp_prog_func_proto(func_id, prog);
 		if (!fn && prog->expected_attach_type == BPF_TRACE_ITER)
 			fn = bpf_iter_get_func_proto(func_id, prog);
+		if (!fn && prog->aux->sleepable)
+			fn = tracing_prog_syscall_func_proto(func_id, prog);
 		return fn;
 	}
 }
@@ -2053,6 +2055,9 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
 	if (prog->aux->max_tp_access > btp->writable_size)
 		return -EINVAL;
 
+	if (prog->aux->sleepable && !btp->sleepable)
+		return -EPERM;
+
 	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
 						   prog);
 }
-- 
2.35.1.574.g5d30c73bfb-goog


  parent reply	other threads:[~2022-02-25 23:44 UTC|newest]

Thread overview: 54+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-02-25 23:43 [PATCH bpf-next v1 0/9] Extend cgroup interface with bpf Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 1/9] bpf: Add mkdir, rmdir, unlink syscalls for prog_bpf_syscall Hao Luo
2022-02-27  5:18   ` Kumar Kartikeya Dwivedi
2022-02-28 22:10     ` Hao Luo
2022-03-02 19:34       ` Alexei Starovoitov
2022-03-03 18:50         ` Hao Luo
2022-03-04 18:37           ` Hao Luo
2022-03-05 23:47             ` Alexei Starovoitov
2022-03-08 21:08               ` Hao Luo
2022-03-02 20:55   ` Yonghong Song
2022-03-03 18:56     ` Hao Luo
2022-03-03 19:13       ` Yonghong Song
2022-03-03 19:15         ` Hao Luo
2022-03-12  3:46   ` Al Viro
2022-03-14 17:07     ` Hao Luo
2022-03-14 23:10       ` Al Viro
2022-03-15 17:27         ` Hao Luo
2022-03-15 18:59           ` Alexei Starovoitov
2022-03-15 19:03             ` Alexei Starovoitov
2022-03-15 19:00           ` Al Viro
2022-03-15 19:47             ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 2/9] bpf: Add BPF_OBJ_PIN and BPF_OBJ_GET in the bpf_sys_bpf helper Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 3/9] selftests/bpf: tests mkdir, rmdir, unlink and pin in syscall Hao Luo
2022-02-25 23:43 ` Hao Luo [this message]
2022-03-02 19:41   ` [PATCH bpf-next v1 4/9] bpf: Introduce sleepable tracepoints Alexei Starovoitov
2022-03-03 19:37     ` Hao Luo
2022-03-03 19:59       ` Alexei Starovoitov
2022-03-02 21:23   ` Yonghong Song
2022-03-02 21:30     ` Alexei Starovoitov
2022-03-03  1:08       ` Yonghong Song
2022-03-03  2:29         ` Alexei Starovoitov
2022-03-03 19:43           ` Hao Luo
2022-03-03 20:02             ` Alexei Starovoitov
2022-03-03 20:04               ` Alexei Starovoitov
2022-03-03 22:06                 ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 5/9] cgroup: Sleepable cgroup tracepoints Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 6/9] libbpf: Add sleepable tp_btf Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 7/9] bpf: Lift permission check in __sys_bpf when called from kernel Hao Luo
2022-03-02 20:01   ` Alexei Starovoitov
2022-03-03 19:14     ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 8/9] bpf: Introduce cgroup iter Hao Luo
2022-02-26  2:32   ` kernel test robot
2022-02-26  2:32   ` kernel test robot
2022-02-26  2:53   ` kernel test robot
2022-03-02 21:59   ` Yonghong Song
2022-03-03 20:02     ` Hao Luo
2022-03-02 22:45   ` Kumar Kartikeya Dwivedi
2022-03-03  2:03     ` Yonghong Song
2022-03-03  3:03       ` Kumar Kartikeya Dwivedi
2022-03-03  4:00         ` Alexei Starovoitov
2022-03-03  7:33         ` Yonghong Song
2022-03-03  8:13           ` Kumar Kartikeya Dwivedi
2022-03-03 21:52           ` Hao Luo
2022-02-25 23:43 ` [PATCH bpf-next v1 9/9] selftests/bpf: Tests using sleepable tracepoints to monitor cgroup events Hao Luo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220225234339.2386398-5-haoluo@google.com \
    --to=haoluo@google.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=jevburton.kernel@gmail.com \
    --cc=joshdon@google.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=sdf@google.com \
    --cc=shakeelb@google.com \
    --cc=songliubraving@fb.com \
    --cc=tj@kernel.org \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.