All of lore.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@gmail.com>
To: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Cc: Jiri Olsa <olsajiri@gmail.com>,
	Andrii Nakryiko <andrii.nakryiko@gmail.com>,
	Daniel Borkmann <daniel@iogearbox.net>,
	Alexei Starovoitov <ast@kernel.org>,
	Andrii Nakryiko <andrii@kernel.org>,
	Hao Sun <sunhao.th@gmail.com>, bpf <bpf@vger.kernel.org>,
	Martin KaFai Lau <kafai@fb.com>, Song Liu <songliubraving@fb.com>,
	Yonghong Song <yhs@fb.com>,
	John Fastabend <john.fastabend@gmail.com>,
	KP Singh <kpsingh@chromium.org>,
	Stanislav Fomichev <sdf@google.com>, Hao Luo <haoluo@google.com>
Subject: Re: [PATCH bpf-next] bpf: Restrict attachment of bpf program to some tracepoints
Date: Tue, 6 Dec 2022 18:14:06 -0800	[thread overview]
Message-ID: <Y4/27g8EHQ9F3bDr@google.com> (raw)
In-Reply-To: <CAADnVQLo1JBTg6iquCFj44AEuAhxj-V7a0T1gwejy1oDBDXcbA@mail.gmail.com>

On Tue, Dec 06, 2022 at 12:09:51PM -0800, Alexei Starovoitov wrote:
> On Mon, Dec 5, 2022 at 4:28 AM Jiri Olsa <olsajiri@gmail.com> wrote:
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index 3bbd3f0c810c..d27b7dc77894 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -2252,9 +2252,8 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
> >  }
> >
> >  static __always_inline
> > -void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
> > +void __bpf_trace_prog_run(struct bpf_prog *prog, u64 *args)
> >  {
> > -       cant_sleep();
> >         if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
> >                 bpf_prog_inc_misses_counter(prog);
> >                 goto out;
> > @@ -2266,6 +2265,22 @@ void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
> >         this_cpu_dec(*(prog->active));
> >  }
> >
> > +static __always_inline
> > +void __bpf_trace_run(struct bpf_raw_event_data *data, u64 *args)
> > +{
> > +       struct bpf_prog *prog = data->prog;
> > +
> > +       cant_sleep();
> > +       if (unlikely(!data->recursion))
> > +               return __bpf_trace_prog_run(prog, args);
> > +
> > +       if (unlikely(this_cpu_inc_return(*(data->recursion))))
> > +               goto out;
> > +       __bpf_trace_prog_run(prog, args);
> > +out:
> > +       this_cpu_dec(*(data->recursion));
> > +}
> 
> This is way too much run-time and memory overhead to address
> this corner case. Pls come up with some other approach.
> Sorry I don't have decent suggestions at the moment.
> For now we can simply disallow attaching to contention_begin.
> 

How about this?  It seems to work for me.

Thanks,
Namhyung

---
 include/linux/trace_events.h    | 14 +++++++
 include/linux/tracepoint-defs.h |  5 +++
 kernel/bpf/syscall.c            | 18 ++++++++-
 kernel/trace/bpf_trace.c        | 65 ++++++++++++++++++++++++++++++---
 4 files changed, 95 insertions(+), 7 deletions(-)

diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h
index 20749bd9db71..461468210a77 100644
--- a/include/linux/trace_events.h
+++ b/include/linux/trace_events.h
@@ -742,6 +742,10 @@ void perf_event_detach_bpf_prog(struct perf_event *event);
 int perf_event_query_prog_array(struct perf_event *event, void __user *info);
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog);
+int bpf_probe_register_norecurse(struct bpf_raw_event_map *btp, struct bpf_prog *prog,
+				 struct bpf_raw_event_data *data);
+int bpf_probe_unregister_norecurse(struct bpf_raw_event_map *btp,
+				   struct bpf_raw_event_data *data);
 struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name);
 void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp);
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
@@ -775,6 +779,16 @@ static inline int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf
 {
 	return -EOPNOTSUPP;
 }
+static inline int bpf_probe_register_norecurse(struct bpf_raw_event_map *btp, struct bpf_prog *p,
+					       struct bpf_raw_event_data *data)
+{
+	return -EOPNOTSUPP;
+}
+static inline int bpf_probe_unregister_norecurse(struct bpf_raw_event_map *btp,
+						 struct bpf_raw_event_data *data)
+{
+	return -EOPNOTSUPP;
+}
 static inline struct bpf_raw_event_map *bpf_get_raw_tracepoint(const char *name)
 {
 	return NULL;
diff --git a/include/linux/tracepoint-defs.h b/include/linux/tracepoint-defs.h
index e7c2276be33e..e5adfe606888 100644
--- a/include/linux/tracepoint-defs.h
+++ b/include/linux/tracepoint-defs.h
@@ -53,6 +53,11 @@ struct bpf_raw_event_map {
 	u32			writable_size;
 } __aligned(32);
 
+struct bpf_raw_event_data {
+	struct bpf_prog		*prog;
+	int __percpu		*active;
+};
+
 /*
  * If a tracepoint needs to be called from a header file, it is not
  * recommended to call it directly, as tracepoints in header files
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 35972afb6850..a8be9c443306 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3144,14 +3144,24 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog,
 struct bpf_raw_tp_link {
 	struct bpf_link link;
 	struct bpf_raw_event_map *btp;
+	struct bpf_raw_event_data data;
 };
 
+static bool needs_recursion_check(struct bpf_raw_event_map *btp)
+{
+	return !strcmp(btp->tp->name, "contention_begin");
+}
+
 static void bpf_raw_tp_link_release(struct bpf_link *link)
 {
 	struct bpf_raw_tp_link *raw_tp =
 		container_of(link, struct bpf_raw_tp_link, link);
 
-	bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
+	if (needs_recursion_check(raw_tp->btp))
+		bpf_probe_unregister_norecurse(raw_tp->btp, &raw_tp->data);
+	else
+		bpf_probe_unregister(raw_tp->btp, raw_tp->link.prog);
+
 	bpf_put_raw_tracepoint(raw_tp->btp);
 }
 
@@ -3348,7 +3358,11 @@ static int bpf_raw_tp_link_attach(struct bpf_prog *prog,
 		goto out_put_btp;
 	}
 
-	err = bpf_probe_register(link->btp, prog);
+	if (needs_recursion_check(link->btp))
+		err = bpf_probe_register_norecurse(link->btp, prog, &link->data);
+	else
+		err = bpf_probe_register(link->btp, prog);
+
 	if (err) {
 		bpf_link_cleanup(&link_primer);
 		goto out_put_btp;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3bbd3f0c810c..edbfeff029aa 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2297,7 +2297,20 @@ void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
 		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
 		__bpf_trace_run(prog, args);				\
 	}								\
-	EXPORT_SYMBOL_GPL(bpf_trace_run##x)
+	EXPORT_SYMBOL_GPL(bpf_trace_run##x);				\
+									\
+	static void bpf_trace_run_norecurse##x(struct bpf_raw_event_data *data,	\
+			      REPEAT(x, SARG, __DL_COM, __SEQ_0_11))	\
+	{								\
+		u64 args[x];						\
+		if (unlikely(this_cpu_inc_return(*(data->active)) != 1)) \
+			goto out;					\
+		REPEAT(x, COPY, __DL_SEM, __SEQ_0_11);			\
+		__bpf_trace_run(data->prog, args);			\
+	out:								\
+		this_cpu_dec(*(data->active));				\
+	}
+
 BPF_TRACE_DEFN_x(1);
 BPF_TRACE_DEFN_x(2);
 BPF_TRACE_DEFN_x(3);
@@ -2311,7 +2324,23 @@ BPF_TRACE_DEFN_x(10);
 BPF_TRACE_DEFN_x(11);
 BPF_TRACE_DEFN_x(12);
 
-static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
+static void *bpf_trace_norecurse_funcs[12] = {
+	(void *)bpf_trace_run_norecurse1,
+	(void *)bpf_trace_run_norecurse2,
+	(void *)bpf_trace_run_norecurse3,
+	(void *)bpf_trace_run_norecurse4,
+	(void *)bpf_trace_run_norecurse5,
+	(void *)bpf_trace_run_norecurse6,
+	(void *)bpf_trace_run_norecurse7,
+	(void *)bpf_trace_run_norecurse8,
+	(void *)bpf_trace_run_norecurse9,
+	(void *)bpf_trace_run_norecurse10,
+	(void *)bpf_trace_run_norecurse11,
+	(void *)bpf_trace_run_norecurse12,
+};
+
+static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog,
+				void *func, void *data)
 {
 	struct tracepoint *tp = btp->tp;
 
@@ -2325,13 +2354,12 @@ static int __bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *
 	if (prog->aux->max_tp_access > btp->writable_size)
 		return -EINVAL;
 
-	return tracepoint_probe_register_may_exist(tp, (void *)btp->bpf_func,
-						   prog);
+	return tracepoint_probe_register_may_exist(tp, func, data);
 }
 
 int bpf_probe_register(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 {
-	return __bpf_probe_register(btp, prog);
+	return __bpf_probe_register(btp, prog, btp->bpf_func, prog);
 }
 
 int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
@@ -2339,6 +2367,33 @@ int bpf_probe_unregister(struct bpf_raw_event_map *btp, struct bpf_prog *prog)
 	return tracepoint_probe_unregister(btp->tp, (void *)btp->bpf_func, prog);
 }
 
+int bpf_probe_register_norecurse(struct bpf_raw_event_map *btp, struct bpf_prog *prog,
+				 struct bpf_raw_event_data *data)
+{
+	void *bpf_func;
+
+	data->active = alloc_percpu_gfp(int, GFP_KERNEL);
+	if (!data->active)
+		return -ENOMEM;
+
+	data->prog = prog;
+	bpf_func = bpf_trace_norecurse_funcs[btp->num_args];
+	return __bpf_probe_register(btp, prog, bpf_func, data);
+}
+
+int bpf_probe_unregister_norecurse(struct bpf_raw_event_map *btp,
+				   struct bpf_raw_event_data *data)
+{
+	int err;
+	void *bpf_func;
+
+	bpf_func = bpf_trace_norecurse_funcs[btp->num_args];
+	err = tracepoint_probe_unregister(btp->tp, bpf_func, data);
+	free_percpu(data->active);
+
+	return err;
+}
+
 int bpf_get_perf_event_info(const struct perf_event *event, u32 *prog_id,
 			    u32 *fd_type, const char **buf,
 			    u64 *probe_offset, u64 *probe_addr)
-- 
2.39.0.rc0.267.gcb52ba06e7-goog


  reply	other threads:[~2022-12-07  2:14 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-11-21 21:31 [PATCH bpf-next] bpf: Restrict attachment of bpf program to some tracepoints Jiri Olsa
2022-11-24  0:41 ` Daniel Borkmann
2022-11-24  9:42   ` Jiri Olsa
2022-11-24 17:17     ` Alexei Starovoitov
2022-11-25  9:35       ` Jiri Olsa
2022-11-30 23:29         ` Andrii Nakryiko
2022-12-03 17:58           ` Namhyung Kim
2022-12-05 12:28             ` Jiri Olsa
2022-12-06  4:00               ` Namhyung Kim
2022-12-06  8:14                 ` Jiri Olsa
2022-12-06 18:20                   ` Namhyung Kim
2022-12-06 20:09               ` Alexei Starovoitov
2022-12-07  2:14                 ` Namhyung Kim [this message]
2022-12-07  5:23                   ` Hao Sun
2022-12-07 22:58                     ` Namhyung Kim
2022-12-07  8:18                   ` Jiri Olsa
2022-12-07 19:08                     ` Namhyung Kim
2022-12-08  6:15                       ` Namhyung Kim
2022-12-08 12:04                         ` Jiri Olsa
2022-12-04 21:44           ` Jiri Olsa
2022-12-07 13:39             ` Jiri Olsa
2022-12-07 19:10               ` Alexei Starovoitov
2022-12-08  2:47               ` Hao Sun
2022-12-03 17:42       ` Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Y4/27g8EHQ9F3bDr@google.com \
    --to=namhyung@gmail.com \
    --cc=alexei.starovoitov@gmail.com \
    --cc=andrii.nakryiko@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=kafai@fb.com \
    --cc=kpsingh@chromium.org \
    --cc=olsajiri@gmail.com \
    --cc=sdf@google.com \
    --cc=songliubraving@fb.com \
    --cc=sunhao.th@gmail.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.