* [PATCH bpf-next v3] selftests/bpf: emit top frequent code lines in veristat
@ 2024-09-30 23:15 Mykyta Yatsenko
2024-10-01 17:38 ` Andrii Nakryiko
2024-10-01 17:40 ` patchwork-bot+netdevbpf
0 siblings, 2 replies; 3+ messages in thread
From: Mykyta Yatsenko @ 2024-09-30 23:15 UTC (permalink / raw)
To: bpf, ast, andrii, daniel, kafai, kernel-team; +Cc: Mykyta Yatsenko
From: Mykyta Yatsenko <yatsenko@meta.com>
Production BPF programs are increasing in number of instructions and states
to the point, where optimising verification process for them is necessary
to avoid running into instruction limit. Authors of those BPF programs
need to analyze verifier output, for example, collecting the most
frequent source code lines to understand which part of the program has
the biggest verification cost.
This patch introduces `--top-src-lines` flag in veristat.
`--top-src-lines=N` makes veristat output N the most popular sorce code
lines, parsed from verification log.
An example of output:
```
sudo ./veristat --top-src-lines=2 bpf_flow.bpf.o
Processing 'bpf_flow.bpf.o'...
Top source lines (_dissect):
4: (bpf_helpers.h:161) asm volatile("r1 = %[ctx]\n\t"
4: (bpf_flow.c:155) if (iph && iph->ihl == 5 &&
...
```
Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
---
tools/testing/selftests/bpf/veristat.c | 127 ++++++++++++++++++++++++-
1 file changed, 125 insertions(+), 2 deletions(-)
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
index 1ec5c4c47235..977c5eca56f7 100644
--- a/tools/testing/selftests/bpf/veristat.c
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -179,6 +179,7 @@ static struct env {
int files_skipped;
int progs_processed;
int progs_skipped;
+ int top_src_lines;
} env;
static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
@@ -206,6 +207,7 @@ const char argp_program_doc[] =
enum {
OPT_LOG_FIXED = 1000,
OPT_LOG_SIZE = 1001,
+ OPT_TOP_SRC_LINES = 1002,
};
static const struct argp_option opts[] = {
@@ -228,6 +230,7 @@ static const struct argp_option opts[] = {
"Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
{ "test-reg-invariants", 'r', NULL, 0,
"Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
+ { "top-src-lines", OPT_TOP_SRC_LINES, "N", 0, "Emit N most frequent source code lines" },
{},
};
@@ -337,6 +340,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return -ENOMEM;
env.filename_cnt++;
break;
+ case OPT_TOP_SRC_LINES:
+ errno = 0;
+ env.top_src_lines = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
default:
return ARGP_ERR_UNKNOWN;
}
@@ -854,6 +865,115 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
return 0;
}
+struct line_cnt {
+ char *line;
+ int cnt;
+};
+
+static int str_cmp(const void *a, const void *b)
+{
+ const char **str1 = (const char **)a;
+ const char **str2 = (const char **)b;
+
+ return strcmp(*str1, *str2);
+}
+
+static int line_cnt_cmp(const void *a, const void *b)
+{
+ const struct line_cnt *a_cnt = (const struct line_cnt *)a;
+ const struct line_cnt *b_cnt = (const struct line_cnt *)b;
+
+ return b_cnt->cnt - a_cnt->cnt;
+}
+
+static int print_top_src_lines(char * const buf, size_t buf_sz, const char *prog_name)
+{
+ int lines_cap = 0;
+ int lines_size = 0;
+ char **lines = NULL;
+ char *line = NULL;
+ char *state;
+ struct line_cnt *freq = NULL;
+ struct line_cnt *cur;
+ int unique_lines;
+ int err = 0;
+ int i;
+
+ while ((line = strtok_r(line ? NULL : buf, "\n", &state))) {
+ if (strncmp(line, "; ", 2) != 0)
+ continue;
+ line += 2;
+
+ if (lines_size == lines_cap) {
+ char **tmp;
+
+ lines_cap = max(16, lines_cap * 2);
+ tmp = realloc(lines, lines_cap * sizeof(*tmp));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ lines = tmp;
+ }
+ lines[lines_size] = line;
+ lines_size++;
+ }
+
+ if (lines_size == 0)
+ goto cleanup;
+
+ qsort(lines, lines_size, sizeof(*lines), str_cmp);
+
+ freq = calloc(lines_size, sizeof(*freq));
+ if (!freq) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ cur = freq;
+ cur->line = lines[0];
+ cur->cnt = 1;
+ for (i = 1; i < lines_size; ++i) {
+ if (strcmp(lines[i], cur->line) != 0) {
+ cur++;
+ cur->line = lines[i];
+ cur->cnt = 0;
+ }
+ cur->cnt++;
+ }
+ unique_lines = cur - freq + 1;
+
+ qsort(freq, unique_lines, sizeof(struct line_cnt), line_cnt_cmp);
+
+ printf("Top source lines (%s):\n", prog_name);
+ for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
+ const char *src_code = freq[i].line;
+ const char *src_line = NULL;
+ char *split = strrchr(freq[i].line, '@');
+
+ if (split) {
+ src_line = split + 1;
+
+ while (*src_line && isspace(*src_line))
+ src_line++;
+
+ while (split > src_code && isspace(*split))
+ split--;
+ *split = '\0';
+ }
+
+ if (src_line)
+ printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
+ else
+ printf("%5d: %s\n", freq[i].cnt, src_code);
+ }
+
+cleanup:
+ free(freq);
+ free(lines);
+ return err;
+}
+
static int guess_prog_type_by_ctx_name(const char *ctx_name,
enum bpf_prog_type *prog_type,
enum bpf_attach_type *attach_type)
@@ -1009,13 +1129,14 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
stats = &env.prog_stats[env.prog_stat_cnt++];
memset(stats, 0, sizeof(*stats));
- if (env.verbose) {
+ if (env.verbose || env.top_src_lines > 0) {
buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024;
buf = malloc(buf_sz);
if (!buf)
return -ENOMEM;
/* ensure we always request stats */
- log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
+ log_level = (env.top_src_lines > 0 && env.log_level == 0 ? 2 : env.log_level)
+ | 4 | (env.log_fixed ? 8 : 0);
} else {
buf = verif_log_buf;
buf_sz = sizeof(verif_log_buf);
@@ -1048,6 +1169,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
filename, prog_name, stats->stats[DURATION],
err ? "failure" : "success", buf);
}
+ if (env.top_src_lines > 0)
+ print_top_src_lines(buf, buf_sz, stats->prog_name);
if (verif_log_buf != buf)
free(buf);
--
2.46.2
^ permalink raw reply related [flat|nested] 3+ messages in thread* Re: [PATCH bpf-next v3] selftests/bpf: emit top frequent code lines in veristat
2024-09-30 23:15 [PATCH bpf-next v3] selftests/bpf: emit top frequent code lines in veristat Mykyta Yatsenko
@ 2024-10-01 17:38 ` Andrii Nakryiko
2024-10-01 17:40 ` patchwork-bot+netdevbpf
1 sibling, 0 replies; 3+ messages in thread
From: Andrii Nakryiko @ 2024-10-01 17:38 UTC (permalink / raw)
To: Mykyta Yatsenko
Cc: bpf, ast, andrii, daniel, kafai, kernel-team, Mykyta Yatsenko
On Mon, Sep 30, 2024 at 4:15 PM Mykyta Yatsenko
<mykyta.yatsenko5@gmail.com> wrote:
>
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> Production BPF programs are increasing in number of instructions and states
> to the point, where optimising verification process for them is necessary
> to avoid running into instruction limit. Authors of those BPF programs
> need to analyze verifier output, for example, collecting the most
> frequent source code lines to understand which part of the program has
> the biggest verification cost.
>
> This patch introduces `--top-src-lines` flag in veristat.
> `--top-src-lines=N` makes veristat output N the most popular sorce code
> lines, parsed from verification log.
>
> An example of output:
> ```
> sudo ./veristat --top-src-lines=2 bpf_flow.bpf.o
> Processing 'bpf_flow.bpf.o'...
> Top source lines (_dissect):
> 4: (bpf_helpers.h:161) asm volatile("r1 = %[ctx]\n\t"
> 4: (bpf_flow.c:155) if (iph && iph->ihl == 5 &&
> ...
> ```
>
> Signed-off-by: Mykyta Yatsenko <yatsenko@meta.com>
> ---
> tools/testing/selftests/bpf/veristat.c | 127 ++++++++++++++++++++++++-
> 1 file changed, 125 insertions(+), 2 deletions(-)
>
LGTM, I played with it locally, works nicely, thanks!
I did a few tweaks before applying, see below for details.
> diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
> index 1ec5c4c47235..977c5eca56f7 100644
> --- a/tools/testing/selftests/bpf/veristat.c
> +++ b/tools/testing/selftests/bpf/veristat.c
> @@ -179,6 +179,7 @@ static struct env {
> int files_skipped;
> int progs_processed;
> int progs_skipped;
> + int top_src_lines;
> } env;
>
> static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
> @@ -206,6 +207,7 @@ const char argp_program_doc[] =
> enum {
> OPT_LOG_FIXED = 1000,
> OPT_LOG_SIZE = 1001,
> + OPT_TOP_SRC_LINES = 1002,
> };
>
> static const struct argp_option opts[] = {
> @@ -228,6 +230,7 @@ static const struct argp_option opts[] = {
> "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
> { "test-reg-invariants", 'r', NULL, 0,
> "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
> + { "top-src-lines", OPT_TOP_SRC_LINES, "N", 0, "Emit N most frequent source code lines" },
I added 'S' as a short name, I found --top-src-lines too much pain to type :)
> {},
> };
>
> @@ -337,6 +340,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
> return -ENOMEM;
> env.filename_cnt++;
> break;
> + case OPT_TOP_SRC_LINES:
> + errno = 0;
> + env.top_src_lines = strtol(arg, NULL, 10);
> + if (errno) {
> + fprintf(stderr, "invalid top lines N specifier: %s\n", arg);
> + argp_usage(state);
> + }
> + break;
> default:
> return ARGP_ERR_UNKNOWN;
> }
> @@ -854,6 +865,115 @@ static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *
> return 0;
> }
>
> +struct line_cnt {
> + char *line;
> + int cnt;
> +};
> +
> +static int str_cmp(const void *a, const void *b)
> +{
> + const char **str1 = (const char **)a;
> + const char **str2 = (const char **)b;
> +
> + return strcmp(*str1, *str2);
> +}
> +
> +static int line_cnt_cmp(const void *a, const void *b)
> +{
> + const struct line_cnt *a_cnt = (const struct line_cnt *)a;
> + const struct line_cnt *b_cnt = (const struct line_cnt *)b;
> +
> + return b_cnt->cnt - a_cnt->cnt;
for ordering stability, I added a fallback to line_cnt->line string
comparison, if counts are equal
> +}
> +
[...]
> + printf("Top source lines (%s):\n", prog_name);
> + for (i = 0; i < min(unique_lines, env.top_src_lines); ++i) {
> + const char *src_code = freq[i].line;
> + const char *src_line = NULL;
> + char *split = strrchr(freq[i].line, '@');
> +
> + if (split) {
> + src_line = split + 1;
> +
> + while (*src_line && isspace(*src_line))
> + src_line++;
> +
> + while (split > src_code && isspace(*split))
> + split--;
> + *split = '\0';
> + }
> +
> + if (src_line)
> + printf("%5d: (%s)\t%s\n", freq[i].cnt, src_line, src_code);
> + else
> + printf("%5d: %s\n", freq[i].cnt, src_code);
> + }
> +
added printf("\n") to visually separate the output a bit
> +cleanup:
> + free(freq);
> + free(lines);
> + return err;
> +}
> +
> static int guess_prog_type_by_ctx_name(const char *ctx_name,
> enum bpf_prog_type *prog_type,
> enum bpf_attach_type *attach_type)
> @@ -1009,13 +1129,14 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
> stats = &env.prog_stats[env.prog_stat_cnt++];
> memset(stats, 0, sizeof(*stats));
>
> - if (env.verbose) {
> + if (env.verbose || env.top_src_lines > 0) {
> buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024;
> buf = malloc(buf_sz);
> if (!buf)
> return -ENOMEM;
> /* ensure we always request stats */
> - log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
> + log_level = (env.top_src_lines > 0 && env.log_level == 0 ? 2 : env.log_level)
> + | 4 | (env.log_fixed ? 8 : 0);
this felt a bit too crowded, so I switched this to one extra if, and
left original log_level assignment untouched
> } else {
> buf = verif_log_buf;
> buf_sz = sizeof(verif_log_buf);
> @@ -1048,6 +1169,8 @@ static int process_prog(const char *filename, struct bpf_object *obj, struct bpf
> filename, prog_name, stats->stats[DURATION],
> err ? "failure" : "success", buf);
> }
> + if (env.top_src_lines > 0)
> + print_top_src_lines(buf, buf_sz, stats->prog_name);
>
> if (verif_log_buf != buf)
> free(buf);
> --
> 2.46.2
>
^ permalink raw reply [flat|nested] 3+ messages in thread* Re: [PATCH bpf-next v3] selftests/bpf: emit top frequent code lines in veristat
2024-09-30 23:15 [PATCH bpf-next v3] selftests/bpf: emit top frequent code lines in veristat Mykyta Yatsenko
2024-10-01 17:38 ` Andrii Nakryiko
@ 2024-10-01 17:40 ` patchwork-bot+netdevbpf
1 sibling, 0 replies; 3+ messages in thread
From: patchwork-bot+netdevbpf @ 2024-10-01 17:40 UTC (permalink / raw)
To: Mykyta Yatsenko; +Cc: bpf, ast, andrii, daniel, kafai, kernel-team, yatsenko
Hello:
This patch was applied to bpf/bpf-next.git (master)
by Andrii Nakryiko <andrii@kernel.org>:
On Tue, 1 Oct 2024 00:15:22 +0100 you wrote:
> From: Mykyta Yatsenko <yatsenko@meta.com>
>
> Production BPF programs are increasing in number of instructions and states
> to the point, where optimising verification process for them is necessary
> to avoid running into instruction limit. Authors of those BPF programs
> need to analyze verifier output, for example, collecting the most
> frequent source code lines to understand which part of the program has
> the biggest verification cost.
>
> [...]
Here is the summary with links:
- [bpf-next,v3] selftests/bpf: emit top frequent code lines in veristat
https://git.kernel.org/bpf/bpf-next/c/9502a7de5a61
You are awesome, thank you!
--
Deet-doot-dot, I am a bot.
https://korg.docs.kernel.org/patchwork/pwbot.html
^ permalink raw reply [flat|nested] 3+ messages in thread
end of thread, other threads:[~2024-10-01 17:40 UTC | newest]
Thread overview: 3+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2024-09-30 23:15 [PATCH bpf-next v3] selftests/bpf: emit top frequent code lines in veristat Mykyta Yatsenko
2024-10-01 17:38 ` Andrii Nakryiko
2024-10-01 17:40 ` patchwork-bot+netdevbpf
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox