From: Namhyung Kim <namhyung@kernel.org>
To: Howard Chu <howardchu95@gmail.com>
Cc: peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
mark.rutland@arm.com, alexander.shishkin@linux.intel.com,
jolsa@kernel.org, irogers@google.com, adrian.hunter@intel.com,
kan.liang@linux.intel.com, linux-perf-users@vger.kernel.org,
linux-kernel@vger.kernel.org
Subject: Re: [PATCH v6 5/8] perf record --off-cpu: Dump total off-cpu time at the end.
Date: Sun, 29 Sep 2024 23:39:29 -0700 [thread overview]
Message-ID: <ZvpHocNuQRBehUWp@google.com> (raw)
In-Reply-To: <20240927202736.767941-6-howardchu95@gmail.com>
On Fri, Sep 27, 2024 at 01:27:33PM -0700, Howard Chu wrote:
> By setting a placeholder sample_type and then writing real data into
> raw_data, we mimic the direct sample method to write data at the end.
>
> Note that some data serve only as placeholders and will be overwritten
> by the data in raw_data. Additionally, since the IP will be updated in
> evsel__parse_sample(), there is no need to handle it in off_cpu_write().
>
> Suggested-by: Namhyung Kim <namhyung@kernel.org>
> Signed-off-by: Howard Chu <howardchu95@gmail.com>
> ---
> tools/perf/util/bpf_off_cpu.c | 116 +++++++++++++++++++++-------------
> 1 file changed, 72 insertions(+), 44 deletions(-)
>
> diff --git a/tools/perf/util/bpf_off_cpu.c b/tools/perf/util/bpf_off_cpu.c
> index f7233a09ec77..2a1cfd7e0b09 100644
> --- a/tools/perf/util/bpf_off_cpu.c
> +++ b/tools/perf/util/bpf_off_cpu.c
> @@ -138,12 +138,19 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
> int ncpus = 1, ntasks = 1, ncgrps = 1;
> struct strlist *pid_slist = NULL;
> struct str_node *pos;
> + struct evsel *evsel;
>
> if (off_cpu_config(evlist) < 0) {
> pr_err("Failed to config off-cpu BPF event\n");
> return -1;
> }
>
> + evsel = evlist__find_evsel_by_str(evlist, OFFCPU_EVENT);
> + if (evsel == NULL) {
> + pr_err("%s evsel not found\n", OFFCPU_EVENT);
> + return -1 ;
> + }
> +
> skel = off_cpu_bpf__open();
> if (!skel) {
> pr_err("Failed to open off-cpu BPF skeleton\n");
> @@ -259,7 +266,6 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
> }
>
> if (evlist__first(evlist)->cgrp) {
> - struct evsel *evsel;
> u8 val = 1;
>
> fd = bpf_map__fd(skel->maps.cgroup_filter);
> @@ -280,6 +286,7 @@ int off_cpu_prepare(struct evlist *evlist, struct target *target,
> }
> }
>
> + skel->bss->sample_type = OFFCPU_EMBEDDED_SAMPLE_TYPES;
Hmm.. you set the sample_type unconditionally which means the embedded
data in the raw area has the fixed format. Then I think you don't need
the sample_type variable at all.
> skel->bss->offcpu_thresh = opts->off_cpu_thresh * 1000;
>
> err = off_cpu_bpf__attach(skel);
> @@ -305,7 +312,8 @@ int off_cpu_write(struct perf_session *session)
> {
> int bytes = 0, size;
> int fd, stack;
> - u64 sample_type, val, sid = 0;
> + u32 raw_size;
> + u64 sample_type_off_cpu, sample_type_bpf_output, val, sid = 0, tstamp = OFF_CPU_TIMESTAMP;
> struct evsel *evsel;
> struct perf_data_file *file = &session->data->file;
> struct off_cpu_key prev, key;
> @@ -315,7 +323,6 @@ int off_cpu_write(struct perf_session *session)
> .misc = PERF_RECORD_MISC_USER,
> },
> };
> - u64 tstamp = OFF_CPU_TIMESTAMP;
>
> skel->bss->enabled = 0;
>
> @@ -325,15 +332,10 @@ int off_cpu_write(struct perf_session *session)
> return 0;
> }
>
> - sample_type = evsel->core.attr.sample_type;
> -
> - if (sample_type & ~OFFCPU_SAMPLE_TYPES) {
> - pr_err("not supported sample type: %llx\n",
> - (unsigned long long)sample_type);
> - return -1;
> - }
> + sample_type_off_cpu = OFFCPU_EMBEDDED_SAMPLE_TYPES;
> + sample_type_bpf_output = evsel->core.attr.sample_type;
>
> - if (sample_type & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
> + if (sample_type_bpf_output & (PERF_SAMPLE_ID | PERF_SAMPLE_IDENTIFIER)) {
> if (evsel->core.id)
> sid = evsel->core.id[0];
> }
> @@ -344,49 +346,75 @@ int off_cpu_write(struct perf_session *session)
>
> while (!bpf_map_get_next_key(fd, &prev, &key)) {
> int n = 1; /* start from perf_event_header */
> - int ip_pos = -1;
> + int i = 0; /* raw data index */
>
> bpf_map_lookup_elem(fd, &key, &val);
>
> - if (sample_type & PERF_SAMPLE_IDENTIFIER)
> + /*
> + * Zero-fill some of these fields first, they will be overwritten by the dummy
> + * embedded data (in raw_data) below, when parsing the samples. And because embedded
> + * data is in BPF output, perf script -F without bpf-output field will not work
> + * properly.
> + */
> + if (sample_type_bpf_output & PERF_SAMPLE_IDENTIFIER)
> data.array[n++] = sid;
> - if (sample_type & PERF_SAMPLE_IP) {
> - ip_pos = n;
> - data.array[n++] = 0; /* will be updated */
> - }
> - if (sample_type & PERF_SAMPLE_TID)
> - data.array[n++] = (u64)key.pid << 32 | key.tgid;
> - if (sample_type & PERF_SAMPLE_TIME)
> - data.array[n++] = tstamp;
> - if (sample_type & PERF_SAMPLE_ID)
> - data.array[n++] = sid;
> - if (sample_type & PERF_SAMPLE_CPU)
> + if (sample_type_bpf_output & PERF_SAMPLE_IP)
> data.array[n++] = 0;
> - if (sample_type & PERF_SAMPLE_PERIOD)
> - data.array[n++] = val;
> - if (sample_type & PERF_SAMPLE_CALLCHAIN) {
> - int len = 0;
> -
> - /* data.array[n] is callchain->nr (updated later) */
> - data.array[n + 1] = PERF_CONTEXT_USER;
> - data.array[n + 2] = 0;
> + if (sample_type_bpf_output & PERF_SAMPLE_TID)
> + data.array[n++] = 0;
I'm not sure if it works correctly. You haven't set the callchain
length and 'n' is not updated.
> + if (sample_type_bpf_output & PERF_SAMPLE_TIME)
> + data.array[n++] = tstamp; /* we won't overwrite time */
> + if (sample_type_bpf_output & PERF_SAMPLE_CPU)
> + data.array[n++] = 0;
> + if (sample_type_bpf_output & PERF_SAMPLE_PERIOD)
> + data.array[n++] = 0;
> + if (sample_type_bpf_output & PERF_SAMPLE_RAW) {
> + /*
> + * the format of raw data is as follows:
> + *
> + * [ size ][ data ]
> + * [ data ]
> + * [ data ]
> + * [ data ]
> + * [ data ][ empty]
> + *
> + */
> + if (sample_type_off_cpu & PERF_SAMPLE_TID)
As I said, you can get rid of the sample_type_off_cpu check if it's
always true.
Thanks,
Namhyung
> + off_cpu_raw_data[i++] = (u64)key.pid << 32 | key.tgid;
> + if (sample_type_off_cpu & PERF_SAMPLE_PERIOD)
> + off_cpu_raw_data[i++] = val;
> + if (sample_type_off_cpu & PERF_SAMPLE_CALLCHAIN) {
> + int len = 0;
> +
> + /* off_cpu_raw_data[n] is callchain->nr (updated later) */
> + off_cpu_raw_data[i + 1] = PERF_CONTEXT_USER;
> + off_cpu_raw_data[i + 2] = 0;
> +
> + bpf_map_lookup_elem(stack, &key.stack_id, &off_cpu_raw_data[i + 2]);
> + while (off_cpu_raw_data[i + 2 + len])
> + len++;
> +
> + /* update length of callchain */
> + off_cpu_raw_data[i] = len + 1;
> +
> + /* calculate sample callchain off_cpu_raw_data length */
> + i += len + 2;
> + }
> + if (sample_type_off_cpu & PERF_SAMPLE_CGROUP)
> + off_cpu_raw_data[i++] = key.cgroup_id;
>
> - bpf_map_lookup_elem(stack, &key.stack_id, &data.array[n + 2]);
> - while (data.array[n + 2 + len])
> - len++;
> + raw_size = i * sizeof(u64) + sizeof(u32); /* 4 empty bytes for alignment */
>
> - /* update length of callchain */
> - data.array[n] = len + 1;
> + /* raw_size */
> + memcpy((void *)data.array + n * sizeof(u64), &raw_size, sizeof(raw_size));
>
> - /* update sample ip with the first callchain entry */
> - if (ip_pos >= 0)
> - data.array[ip_pos] = data.array[n + 2];
> + /* raw_data */
> + memcpy((void *)data.array + n * sizeof(u64) + sizeof(u32), off_cpu_raw_data, i * sizeof(u64));
>
> - /* calculate sample callchain data array length */
> - n += len + 2;
> + n += i + 1;
> }
> - if (sample_type & PERF_SAMPLE_CGROUP)
> - data.array[n++] = key.cgroup_id;
> + if (sample_type_bpf_output & PERF_SAMPLE_CGROUP)
> + data.array[n++] = 0;
>
> size = n * sizeof(u64);
> data.hdr.size = size;
> --
> 2.43.0
>
next prev parent reply other threads:[~2024-09-30 6:39 UTC|newest]
Thread overview: 15+ messages / expand[flat|nested] mbox.gz Atom feed top
2024-09-27 20:27 [PATCH v6 0/8] perf record --off-cpu: Dump off-cpu samples directly Howard Chu
2024-09-27 20:27 ` [PATCH v6 1/8] perf evsel: Set off-cpu BPF output to system-wide Howard Chu
2024-09-30 5:37 ` Namhyung Kim
2024-09-27 20:27 ` [PATCH v6 2/8] perf record --off-cpu: Add --off-cpu-thresh Howard Chu
2024-09-30 5:40 ` Namhyung Kim
2024-09-27 20:27 ` [PATCH v6 3/8] perf record --off-cpu: Parse offcpu-time event Howard Chu
2024-09-30 5:51 ` Namhyung Kim
2024-09-27 20:27 ` [PATCH v6 4/8] perf record off-cpu: Dump direct off-cpu samples in BPF Howard Chu
2024-09-30 6:23 ` Namhyung Kim
2024-09-27 20:27 ` [PATCH v6 5/8] perf record --off-cpu: Dump total off-cpu time at the end Howard Chu
2024-09-30 6:39 ` Namhyung Kim [this message]
2024-09-27 20:27 ` [PATCH v6 6/8] perf evsel: Delete unnecessary = 0 Howard Chu
2024-09-27 20:27 ` [PATCH v6 7/8] perf record --off-cpu: Parse BPF output embedded data Howard Chu
2024-09-30 6:51 ` Namhyung Kim
2024-09-27 20:27 ` [PATCH v6 8/8] perf test: Add direct off-cpu dumping test Howard Chu
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=ZvpHocNuQRBehUWp@google.com \
--to=namhyung@kernel.org \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=alexander.shishkin@linux.intel.com \
--cc=howardchu95@gmail.com \
--cc=irogers@google.com \
--cc=jolsa@kernel.org \
--cc=kan.liang@linux.intel.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-perf-users@vger.kernel.org \
--cc=mark.rutland@arm.com \
--cc=mingo@redhat.com \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.