linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>, Jiri Olsa <jolsa@kernel.org>
Cc: Ingo Molnar <mingo@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Andi Kleen <ak@linux.intel.com>, Ian Rogers <irogers@google.com>,
	Song Liu <songliubraving@fb.com>, Hao Luo <haoluo@google.com>,
	Milian Wolff <milian.wolff@kdab.com>,
	bpf@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Blake Jones <blakejones@google.com>
Subject: [PATCH 4/4] perf record: Handle argument change in sched_switch
Date: Fri,  6 May 2022 13:16:27 -0700	[thread overview]
Message-ID: <20220506201627.85598-5-namhyung@kernel.org> (raw)
In-Reply-To: <20220506201627.85598-1-namhyung@kernel.org>

Recently sched_switch tracepoint added a new argument for prev_state,
but it's hard to handle the change in a BPF program.  Instead, we can
check the function prototype in BTF before loading the program.

Thus I make two copies of the tracepoint handler and select one based
on the BTF info.

Signed-off-by: Namhyung Kim <namhyung@kernel.org>
---
 tools/perf/util/bpf_off_cpu.c          | 28 +++++++++++++++
 tools/perf/util/bpf_skel/off_cpu.bpf.c | 48 ++++++++++++++++++++------
 2 files changed, 65 insertions(+), 11 deletions(-)

diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
index 89f36229041d..31343db68ed3 100644
--- a/tools/perf/util/bpf_off_cpu.c
+++ b/tools/perf/util/bpf_off_cpu.c
@@ -86,6 +86,33 @@ static void off_cpu_finish(void *arg __maybe_unused)
 	off_cpu_bpf__destroy(skel);
 }
 
+/* recent kernel added prev_state arg, so it needs to call the proper function */
+static void check_sched_switch_args(void)
+{
+	const struct btf *btf = bpf_object__btf(skel->obj);
+	const struct btf_type *t1, *t2, *t3;
+	u32 type_id;
+
+	type_id = btf__find_by_name_kind(btf, "bpf_trace_sched_switch",
+					 BTF_KIND_TYPEDEF);
+	if ((s32)type_id < 0)
+		return;
+
+	t1 = btf__type_by_id(btf, type_id);
+	if (t1 == NULL)
+		return;
+
+	t2 = btf__type_by_id(btf, t1->type);
+	if (t2 == NULL || !btf_is_ptr(t2))
+		return;
+
+	t3 = btf__type_by_id(btf, t2->type);
+	if (t3 && btf_is_func_proto(t3) && btf_vlen(t3) == 4) {
+		/* new format: pass prev_state as 2nd arg */
+		skel->rodata->has_prev_state = true;
+	}
+}
+
 int off_cpu_prepare(struct evlist *evlist, struct target *target)
 {
 	int err, fd, i;
@@ -114,6 +141,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target)
 	}
 
 	set_max_rlimit();
+	check_sched_switch_args();
 
 	err = off_cpu_bpf__load(skel);
 	if (err) {
diff --git a/tools/perf/util/bpf_skel/off_cpu.bpf.c b/tools/perf/util/bpf_skel/off_cpu.bpf.c
index c35106b9e20b..98eaba95924f 100644
--- a/tools/perf/util/bpf_skel/off_cpu.bpf.c
+++ b/tools/perf/util/bpf_skel/off_cpu.bpf.c
@@ -72,6 +72,8 @@ int enabled = 0;
 int has_cpu = 0;
 int has_task = 0;
 
+const volatile bool has_prev_state = false;
+
 /*
  * Old kernel used to call it task_struct->state and now it's '__state'.
  * Use BPF CO-RE "ignored suffix rule" to deal with it like below:
@@ -121,22 +123,13 @@ static inline int can_record(struct task_struct *t, int state)
 	return 1;
 }
 
-SEC("tp_btf/sched_switch")
-int on_switch(u64 *ctx)
+static int off_cpu_stat(u64 *ctx, struct task_struct *prev,
+			struct task_struct *next, int state)
 {
 	__u64 ts;
-	int state;
 	__u32 stack_id;
-	struct task_struct *prev, *next;
 	struct tstamp_data *pelem;
 
-	if (!enabled)
-		return 0;
-
-	prev = (struct task_struct *)ctx[1];
-	next = (struct task_struct *)ctx[2];
-	state = get_task_state(prev);
-
 	ts = bpf_ktime_get_ns();
 
 	if (!can_record(prev, state))
@@ -180,4 +173,37 @@ int on_switch(u64 *ctx)
 	return 0;
 }
 
+SEC("tp_btf/sched_switch")
+int on_switch(u64 *ctx)
+{
+	struct task_struct *prev, *next;
+	int prev_state;
+
+	if (!enabled)
+		return 0;
+
+	/*
+	 * For v5.18+:
+	 *   TP_PROTO(bool preempt, int prev_state,
+	 *            struct task_struct *prev,
+	 *            struct task_struct *next)
+	 *
+	 * On older kernels:
+	 *   TP_PROTO(bool preempt, struct task_struct *prev,
+	 *            struct task_struct *next)
+	 */
+	if (has_prev_state) {
+		prev = (struct task_struct *)ctx[2];
+		next = (struct task_struct *)ctx[3];
+		prev_state = (int)ctx[1];
+	} else {
+		prev = (struct task_struct *)ctx[1];
+		next = (struct task_struct *)ctx[2];
+
+		prev_state = get_task_state(prev);
+	}
+
+	return off_cpu_stat(ctx, prev, next, prev_state);
+}
+
 char LICENSE[] SEC("license") = "Dual BSD/GPL";
-- 
2.36.0.512.ge40c2bad7a-goog


  parent reply	other threads:[~2022-05-06 20:16 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2022-05-06 20:16 [RFC 0/4] perf record: Implement off-cpu profiling with BPF (v2) Namhyung Kim
2022-05-06 20:16 ` [PATCH 1/4] perf report: Do not extend sample type of bpf-output event Namhyung Kim
2022-05-10 16:49   ` Arnaldo Carvalho de Melo
2022-05-12  6:00     ` Namhyung Kim
2022-05-06 20:16 ` [PATCH 2/4] perf record: Enable off-cpu analysis with BPF Namhyung Kim
2022-05-10 17:02   ` Arnaldo Carvalho de Melo
2022-05-12  6:13     ` Namhyung Kim
2022-05-06 20:16 ` [PATCH 3/4] perf record: Implement basic filtering for off-cpu Namhyung Kim
2022-05-06 20:16 ` Namhyung Kim [this message]
  -- strict thread matches above, loose matches on Subject: below --
2022-04-22 15:05 [RFC RESEND 0/4] perf record: Implement off-cpu profiling with BPF (v1) Namhyung Kim
2022-04-22 15:05 ` [PATCH 4/4] perf record: Handle argument change in sched_switch Namhyung Kim
2022-04-26 23:55   ` Andrii Nakryiko
2022-04-27 18:14     ` Namhyung Kim
2022-04-27 19:26       ` Andrii Nakryiko
2022-04-28 23:58         ` Namhyung Kim
2022-05-07  0:14           ` Andrii Nakryiko
2022-04-22  5:33 [RFC 0/4] perf record: Implement off-cpu profiling with BPF (v1) Namhyung Kim
2022-04-22  5:34 ` [PATCH 4/4] perf record: Handle argument change in sched_switch Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20220506201627.85598-5-namhyung@kernel.org \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=blakejones@google.com \
    --cc=bpf@vger.kernel.org \
    --cc=haoluo@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=milian.wolff@kdab.com \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    --cc=songliubraving@fb.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).