linux-trace-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Yafang Shao <laoar.shao@gmail.com>
To: ast@kernel.org, daniel@iogearbox.net, andrii@kernel.org,
	kafai@fb.com, songliubraving@fb.com, yhs@fb.com,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@google.com,
	haoluo@google.com, jolsa@kernel.org, rostedt@goodmis.org,
	mhiramat@kernel.org
Cc: bpf@vger.kernel.org, linux-trace-kernel@vger.kernel.org,
	linux-kernel@vger.kernel.org, Yafang Shao <laoar.shao@gmail.com>
Subject: [PATCH bpf-next 5/6] bpf: Improve tracing recursion prevention mechanism
Date: Mon, 17 Apr 2023 15:47:36 +0000	[thread overview]
Message-ID: <20230417154737.12740-6-laoar.shao@gmail.com> (raw)
In-Reply-To: <20230417154737.12740-1-laoar.shao@gmail.com>

Currently we use prog->active to prevent tracing recursion, but it has
some downsides,

- It can't identify different contexts
  That said, if a process context is interrupted by a irq context and
  the irq context runs the same code path, it will be considered as
  recursion. For example,
    normal:
      this_cpu_inc_return(*(prog->active)) == 1 <- OK

      irq:
        this_cpu_inc_return(*(prog->active)) == 1 <- FAIL!
        [ Considered as recusion ]

- It has to maintain a percpu area
  A percpu area will be allocated for each prog when the prog is loaded
  and be freed when the prog is destroyed.

Let's replace it with the generic tracing recursion prevention mechanism,
which can work fine with anything. In the above example, the irq context
won't be considered as recursion again,
  normal:
    test_recursion_try_acquire() <- OK

    softirq:
      test_recursion_try_acquire() <- OK

      irq:
        test_recursion_try_acquire() <- OK

Note that, currently one single recursion in process context is allowed
due to the TRACE_CTX_TRANSITION workaround, which can be fixed in the
future. That said, below behavior is expected currently,
  normal:
    test_recursion_try_acquire() <- OK
    [ recursion happens ]        <- one single recursion is allowed
    test_recursion_try_acquire() <- OK
    [ recursion happens ]
    test_recursion_try_acquire() <- RECURSION!

Signed-off-by: Yafang Shao <laoar.shao@gmail.com>
---
 include/linux/bpf.h      |  2 +-
 kernel/bpf/core.c        | 10 ----------
 kernel/bpf/trampoline.c  | 44 +++++++++++++++++++++++++++++++++-----------
 kernel/trace/bpf_trace.c | 12 +++++++-----
 4 files changed, 41 insertions(+), 27 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 18b592f..c42ff90 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1467,7 +1467,6 @@ struct bpf_prog {
 	u32			jited_len;	/* Size of jited insns in bytes */
 	u8			tag[BPF_TAG_SIZE];
 	struct bpf_prog_stats __percpu *stats;
-	int __percpu		*active;
 	unsigned int		(*bpf_func)(const void *ctx,
 					    const struct bpf_insn *insn);
 	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
@@ -1813,6 +1812,7 @@ struct bpf_tramp_run_ctx {
 	struct bpf_run_ctx run_ctx;
 	u64 bpf_cookie;
 	struct bpf_run_ctx *saved_run_ctx;
+	int recursion_bit;
 };
 
 static inline struct bpf_run_ctx *bpf_set_run_ctx(struct bpf_run_ctx *new_ctx)
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7421487..0942ab2 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -103,12 +103,6 @@ struct bpf_prog *bpf_prog_alloc_no_stats(unsigned int size, gfp_t gfp_extra_flag
 		vfree(fp);
 		return NULL;
 	}
-	fp->active = alloc_percpu_gfp(int, bpf_memcg_flags(GFP_KERNEL | gfp_extra_flags));
-	if (!fp->active) {
-		vfree(fp);
-		kfree(aux);
-		return NULL;
-	}
 
 	fp->pages = size / PAGE_SIZE;
 	fp->aux = aux;
@@ -138,7 +132,6 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 
 	prog->stats = alloc_percpu_gfp(struct bpf_prog_stats, gfp_flags);
 	if (!prog->stats) {
-		free_percpu(prog->active);
 		kfree(prog->aux);
 		vfree(prog);
 		return NULL;
@@ -256,7 +249,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 		 */
 		fp_old->aux = NULL;
 		fp_old->stats = NULL;
-		fp_old->active = NULL;
 		__bpf_prog_free(fp_old);
 	}
 
@@ -272,7 +264,6 @@ void __bpf_prog_free(struct bpf_prog *fp)
 		kfree(fp->aux);
 	}
 	free_percpu(fp->stats);
-	free_percpu(fp->active);
 	vfree(fp);
 }
 
@@ -1385,7 +1376,6 @@ static void bpf_prog_clone_free(struct bpf_prog *fp)
 	 */
 	fp->aux = NULL;
 	fp->stats = NULL;
-	fp->active = NULL;
 	__bpf_prog_free(fp);
 }
 
diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c
index f61d513..3df39a5 100644
--- a/kernel/bpf/trampoline.c
+++ b/kernel/bpf/trampoline.c
@@ -842,15 +842,21 @@ static __always_inline u64 notrace bpf_prog_start_time(void)
 static u64 notrace __bpf_prog_enter_recur(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx)
 	__acquires(RCU)
 {
-	rcu_read_lock();
-	migrate_disable();
-
-	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
+	int bit;
 
-	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
+	rcu_read_lock();
+	bit = test_recursion_try_acquire(_THIS_IP_, _RET_IP_);
+	run_ctx->recursion_bit = bit;
+	if (bit < 0) {
+		preempt_disable_notrace();
 		bpf_prog_inc_misses_counter(prog);
+		preempt_enable_notrace();
 		return 0;
 	}
+
+	migrate_disable();
+
+	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 	return bpf_prog_start_time();
 }
 
@@ -880,11 +886,16 @@ static void notrace __bpf_prog_exit_recur(struct bpf_prog *prog, u64 start,
 					  struct bpf_tramp_run_ctx *run_ctx)
 	__releases(RCU)
 {
+	if (run_ctx->recursion_bit < 0)
+		goto out;
+
 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 
 	update_prog_stats(prog, start);
-	this_cpu_dec(*(prog->active));
 	migrate_enable();
+	test_recursion_release(run_ctx->recursion_bit);
+
+out:
 	rcu_read_unlock();
 }
 
@@ -916,15 +927,21 @@ static void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start,
 u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
 					     struct bpf_tramp_run_ctx *run_ctx)
 {
-	rcu_read_lock_trace();
-	migrate_disable();
-	might_fault();
+	int bit;
 
-	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
+	rcu_read_lock_trace();
+	bit = test_recursion_try_acquire(_THIS_IP_, _RET_IP_);
+	run_ctx->recursion_bit = bit;
+	if (bit < 0) {
+		preempt_disable_notrace();
 		bpf_prog_inc_misses_counter(prog);
+		preempt_enable_notrace();
 		return 0;
 	}
 
+	migrate_disable();
+	might_fault();
+
 	run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx);
 
 	return bpf_prog_start_time();
@@ -933,11 +950,16 @@ u64 notrace __bpf_prog_enter_sleepable_recur(struct bpf_prog *prog,
 void notrace __bpf_prog_exit_sleepable_recur(struct bpf_prog *prog, u64 start,
 					     struct bpf_tramp_run_ctx *run_ctx)
 {
+	if (run_ctx->recursion_bit < 0)
+		goto out;
+
 	bpf_reset_run_ctx(run_ctx->saved_run_ctx);
 
 	update_prog_stats(prog, start);
-	this_cpu_dec(*(prog->active));
 	migrate_enable();
+	test_recursion_release(run_ctx->recursion_bit);
+
+out:
 	rcu_read_unlock_trace();
 }
 
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index bcf91bc..bb9a4c9 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -2250,16 +2250,18 @@ void bpf_put_raw_tracepoint(struct bpf_raw_event_map *btp)
 static __always_inline
 void __bpf_trace_run(struct bpf_prog *prog, u64 *args)
 {
-	cant_sleep();
-	if (unlikely(this_cpu_inc_return(*(prog->active)) != 1)) {
+	int bit;
+
+	bit = test_recursion_try_acquire(_THIS_IP_, _RET_IP_);
+	if (bit < 0) {
 		bpf_prog_inc_misses_counter(prog);
-		goto out;
+		return;
 	}
+	cant_sleep();
 	rcu_read_lock();
 	(void) bpf_prog_run(prog, args);
 	rcu_read_unlock();
-out:
-	this_cpu_dec(*(prog->active));
+	test_recursion_release(bit);
 }
 
 #define UNPACK(...)			__VA_ARGS__
-- 
1.8.3.1


  parent reply	other threads:[~2023-04-17 15:48 UTC|newest]

Thread overview: 25+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2023-04-17 15:47 [PATCH bpf-next 0/6] bpf: Tracing recursion prevention mechanism improvement Yafang Shao
2023-04-17 15:47 ` [PATCH bpf-next 1/6] bpf: Add __rcu_read_{lock,unlock} into btf id deny list Yafang Shao
2023-04-17 15:47 ` [PATCH bpf-next 2/6] tracing: Add generic test_recursion_try_acquire() Yafang Shao
2023-04-20  6:51   ` Masami Hiramatsu
2023-04-17 15:47 ` [PATCH bpf-next 3/6] tracing: Add the comment for allowing one single recursion in process context Yafang Shao
2023-04-17 15:47 ` [PATCH bpf-next 4/6] selftests/bpf: Allow one single recursion in fentry recursion test Yafang Shao
2023-04-17 15:47 ` Yafang Shao [this message]
2023-04-17 20:14   ` [PATCH bpf-next 5/6] bpf: Improve tracing recursion prevention mechanism Alexei Starovoitov
2023-04-18  1:49     ` Yafang Shao
2023-04-18 15:38       ` Alexei Starovoitov
2023-04-19 11:46         ` Yafang Shao
     [not found]           ` <CAADnVQ+FO-+1OALTtgVkcpH3Adc6xS9qjzORyq2vwVtwY2UoxQ@mail.gmail.com>
2023-04-24 21:40             ` Steven Rostedt
2023-04-27  9:57               ` Yafang Shao
2023-04-27 12:15                 ` Yafang Shao
2023-04-27 12:35                   ` Yafang Shao
2023-04-17 23:29   ` kernel test robot
2023-04-27 13:26   ` Steven Rostedt
2023-04-27 14:22     ` Yafang Shao
2023-04-27 15:18       ` Steven Rostedt
2023-04-27 15:23         ` Yafang Shao
2023-04-27 15:36           ` Steven Rostedt
2023-04-27 15:39             ` Alexei Starovoitov
2023-04-27 15:43               ` Yafang Shao
2023-04-27 15:46                 ` Steven Rostedt
2023-04-17 15:47 ` [PATCH bpf-next 6/6] bpf: Remove some denied functions from the btf id deny list Yafang Shao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20230417154737.12740-6-laoar.shao@gmail.com \
    --to=laoar.shao@gmail.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=daniel@iogearbox.net \
    --cc=haoluo@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kafai@fb.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-trace-kernel@vger.kernel.org \
    --cc=mhiramat@kernel.org \
    --cc=rostedt@goodmis.org \
    --cc=sdf@google.com \
    --cc=songliubraving@fb.com \
    --cc=yhs@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).