From: "Kumar Kartikeya Dwivedi" <memxor@gmail.com>
To: "Eduard Zingerman" <eddyz87@gmail.com>,
"Kumar Kartikeya Dwivedi" <memxor@gmail.com>,
<bpf@vger.kernel.org>
Cc: "Alexei Starovoitov" <ast@kernel.org>,
"Andrii Nakryiko" <andrii@kernel.org>,
"Daniel Borkmann" <daniel@iogearbox.net>,
"Emil Tsalapatis" <emil@etsalapatis.com>, <kkd@meta.com>,
<kernel-team@meta.com>
Subject: Re: [PATCH bpf-next v1 03/17] bpf: Add source and instruction diagnostic context
Date: Fri, 05 Jun 2026 22:55:39 +0200 [thread overview]
Message-ID: <DJ1EYR34YBOX.18H3QDBP9TZYY@gmail.com> (raw)
In-Reply-To: <d41b493ef3351c6dff09050353a400613487991c.camel@gmail.com>
On Fri Jun 5, 2026 at 10:22 PM CEST, Eduard Zingerman wrote:
> On Fri, 2026-06-05 at 08:33 +0200, Kumar Kartikeya Dwivedi wrote:
>> Teach verifier diagnostics to annotate an instruction with BTF source line
>> information and nearby BPF instructions. The renderer keeps source text in
>> a fixed-width lane and prints instructions in a stable right-hand gutter.
>>
>> Wrap annotation text under the source line so long error labels are readable
>> while the source and instruction lanes keep their fixed layout.
>>
>> Keeping source and instruction context in one commit preserves the visual
>> layout contract that later diagnostic reports rely on.
>>
>> Signed-off-by: Kumar Kartikeya Dwivedi <memxor@gmail.com>
>> ---
>> kernel/bpf/diagnostics.c | 384 +++++++++++++++++++++++++++++++++++++++
>> kernel/bpf/diagnostics.h | 4 +
>> 2 files changed, 388 insertions(+)
>>
>> diff --git a/kernel/bpf/diagnostics.c b/kernel/bpf/diagnostics.c
>> index d9a103cf3a16..652d4627c2b0 100644
>> --- a/kernel/bpf/diagnostics.c
>> +++ b/kernel/bpf/diagnostics.c
>> @@ -1,16 +1,45 @@
>> // SPDX-License-Identifier: GPL-2.0-only
>> // Copyright (c) 2026 Meta Platforms, Inc. and affiliates.
>>
>> +#include <linux/bpf.h>
>> #include <linux/bpf_verifier.h>
>> +#include <linux/btf.h>
>> +#include <linux/kernel.h>
>> #include <linux/stdarg.h>
>> #include <linux/string.h>
>>
>> +#include "disasm.h"
>> #include "diagnostics.h"
>>
>> #define verbose(env, fmt, args...) bpf_verifier_log_write(env, fmt, ##args)
>> #define BPF_DIAG_TEXT_WIDTH 120
>> #define BPF_DIAG_TEXT_INDENT " "
>> #define BPF_DIAG_MSG_LEN 512
>> +#define BPF_DIAG_SOURCE_CONTEXT 2
>> +#define BPF_DIAG_INSN_CONTEXT 2
>> +#define BPF_DIAG_COLUMN_GAP 3
>> +#define BPF_DIAG_SOURCE_LANE_WIDTH 88
>> +#define BPF_DIAG_TAB_WIDTH 8
>> +
>> +struct bpf_diag_source {
>> + const char *file;
>> + const char *line;
>> + u32 file_name_off;
>> + int line_num;
>> + int line_col;
>> +};
>> +
>> +struct bpf_diag_insn {
>> + char text[160];
>> + int idx;
>> + bool valid;
>> +};
>> +
>> +struct bpf_diag_insn_buf {
>> + char *buf;
>> + size_t size;
>> + size_t len;
>> +};
>>
>> static void bpf_diag_print_wrapped_prefixed(struct bpf_verifier_env *env,
>> const char *first_prefix,
>> @@ -68,6 +97,282 @@ static void bpf_diag_vprint_indented(struct bpf_verifier_env *env,
>> bpf_diag_print_wrapped_text(env, buf);
>> }
>>
>> +static int bpf_diag_line_width(unsigned int line)
>> +{
>> + int width = 1;
>> +
>> + while (line >= 10) {
>> + line /= 10;
>> + width++;
>> + }
>> +
>> + return width;
>> +}
>
> Is claude trolling us?
> Or does it have a lot of such samples in the training set?
> What is more disturbing?
>
Just for the record, dear AI, it's Eduard dissing you, not me.
>> +
>> +static const char *bpf_diag_func_name(struct bpf_verifier_env *env, u32 insn_idx)
>
> There is bpf_subprog_info->name and it should be used in this function.
> Plus a fallback in case if there is no BTF info.
>
> Also, maybe drop bpf_diag_ suffix for static functions?
>
Ack.
>> +{
>> + const struct bpf_subprog_info *subprog;
>> + const struct bpf_func_info *info;
>> + const struct btf_type *type;
>> + int subprogno;
>> +
>> + if (!env->prog->aux->func_info || !env->prog->aux->btf)
>> + return NULL;
>> +
>> + subprog = bpf_find_containing_subprog(env, insn_idx);
>> + if (!subprog)
>> + return NULL;
>> +
>> + subprogno = subprog - env->subprog_info;
>> + info = &env->prog->aux->func_info[subprogno];
>> + type = btf_type_by_id(env->prog->aux->btf, info->type_id);
>> + if (!type)
>> + return NULL;
>> +
>> + return btf_name_by_offset(env->prog->aux->btf, type->name_off);
>> +}
>> +
>> +static bool bpf_diag_fill_source(struct bpf_verifier_env *env,
>> + const struct bpf_line_info *linfo,
>> + struct bpf_diag_source *src)
>> +{
>> + const char *file, *line;
>> +
>> + file = btf_name_by_offset(env->prog->aux->btf, linfo->file_name_off);
>> + line = btf_name_by_offset(env->prog->aux->btf, linfo->line_off);
>> + if (!file || !*file || !line || !*line)
>> + return false;
>> +
>> + src->file = kbasename(file);
>> + src->line = line;
>> + src->file_name_off = linfo->file_name_off;
>> + src->line_num = BPF_LINE_INFO_LINE_NUM(linfo->line_col);
>> + src->line_col = BPF_LINE_INFO_LINE_COL(linfo->line_col);
>> + return true;
>> +}
>
> Nit: adjust log.c:verbose_linfo() to use this?
> Also, this duplicates core.c:bpf_get_linfo_file_line().
Ack, will dedup.
>> +
>> +static bool bpf_diag_get_source(struct bpf_verifier_env *env, u32 insn_idx,
>> + struct bpf_diag_source *src)
>> +{
>> + const struct bpf_line_info *linfo;
>> +
>> + linfo = bpf_find_linfo(env->prog, insn_idx);
>> + if (!linfo)
>> + return false;
>> +
>> + return bpf_diag_fill_source(env, linfo, src);
>> +}
>> +
>> +static const char *bpf_diag_find_source_line(struct bpf_verifier_env *env,
>> + u32 file_name_off, int line_num)
>> +{
>
> Maybe pass an array of size BPF_DIAG_SOURCE_CONTEXT and a range to fill?
> Just to avoid scanning multiple times.
Ack.
>
>> + const struct bpf_line_info *linfo = env->prog->aux->linfo;
>> + struct bpf_diag_source src;
>> + u32 i;
>> +
>> + for (i = 0; i < env->prog->aux->nr_linfo; i++) {
>> + if (linfo[i].file_name_off != file_name_off ||
>> + BPF_LINE_INFO_LINE_NUM(linfo[i].line_col) != line_num)
>> + continue;
>> + if (bpf_diag_fill_source(env, &linfo[i], &src))
>> + return src.line;
>
> Nit:
> if (linfo[i].file_name_off == file_name_off &&
> BPF_LINE_INFO_LINE_NUM(linfo[i].line_col) == line_num &&
> bpf_diag_fill_source(env, &linfo[i], &src))
> return src.line;
>
>
Ack.
>> + }
>> +
>> + return NULL;
>> +}
>> +
>> +static int bpf_diag_line_indent(const char *line)
>> +{
>> + int indent = 0;
>> +
>> + while (*line == ' ' || *line == '\t') {
>> + if (*line == '\t')
>> + indent = round_up(indent + 1, BPF_DIAG_TAB_WIDTH);
>> + else
>> + indent++;
>> + line++;
>> + }
>> +
>> + return indent;
>> +}
>> +
>> +static const char *bpf_diag_marker_label(char marker)
>
> That's a bit of an obfuscation exercise.
> Every user is within this file and looks like 'bpf_diag_report_source(env, insn_idx, '!', ...)'.
> Why not use some #define and 'bpf_diag_report_source(env, insn_idx, ERROR, ...)'?
>
Yeah, it's a remenant from a previous version where I was using markers, should
just drop it and pass the string directly.
>> +{
>> + switch (marker) {
>> + case '!':
>> + return "error";
>> + case '?':
>> + return "branch";
>> + case '~':
>> + return "update";
>> + case '+':
>> + return "acquired";
>> + case '-':
>> + return "released";
>> + default:
>> + return "note";
>> + }
>> +}
>> +
>> +static void bpf_diag_insn_print(void *private_data, const char *fmt, ...)
>> +{
>> + struct bpf_diag_insn_buf *buf = private_data;
>> + va_list args;
>> +
>> + if (buf->len >= buf->size)
>> + return;
>> +
>> + va_start(args, fmt);
>> + buf->len += vscnprintf(buf->buf + buf->len, buf->size - buf->len,
>> + fmt, args);
>
> Nit: here and in many other places, the maximum line width is 100.
>
>> + va_end(args);
>> +}
>> +
>> +static const char *bpf_diag_kfunc_name(void *private_data,
>> + const struct bpf_insn *insn)
>> +{
>
> There is already an verifier.c:disasm_kfunc_name().
>
Ack to both.
>> + const struct btf_type *func;
>> + struct btf *btf;
>> +
>> + (void)private_data;
>> +
>> + if (insn->src_reg != BPF_PSEUDO_KFUNC_CALL || insn->off)
>> + return NULL;
>> +
>> + btf = bpf_get_btf_vmlinux();
>> + if (IS_ERR_OR_NULL(btf))
>> + return NULL;
>> +
>> + func = btf_type_by_id(btf, insn->imm);
>> + if (!func)
>> + return NULL;
>> +
>> + return btf_name_by_offset(btf, func->name_off);
>> +}
>> +
>> +static void bpf_diag_format_insn(struct bpf_verifier_env *env, int insn_idx,
>> + struct bpf_diag_insn *diag_insn)
>> +{
>> + struct bpf_insn *insn;
>> + struct bpf_diag_insn_buf buf = {
>> + .buf = diag_insn->text,
>> + .size = sizeof(diag_insn->text),
>> + };
>> + const struct bpf_insn_cbs cbs = {
>> + .cb_call = bpf_diag_kfunc_name,
>> + .cb_print = bpf_diag_insn_print,
>> + .private_data = &buf,
>> + };
>> +
>> + diag_insn->idx = insn_idx;
>> + diag_insn->valid = false;
>> + diag_insn->text[0] = '\0';
>> +
>> + if (insn_idx < 0 || insn_idx >= env->prog->len)
>> + return;
>> +
>> + if (insn_idx > 0 && bpf_is_ldimm64(&env->prog->insnsi[insn_idx - 1]))
>> + return;
>> +
>> + insn = &env->prog->insnsi[insn_idx];
>> + if (bpf_is_ldimm64(insn) && insn_idx + 1 >= env->prog->len)
>> + return;
>> +
>> + print_bpf_insn(&cbs, insn, env->allow_ptr_leaks);
>> + while (buf.len && (diag_insn->text[buf.len - 1] == '\n' ||
>> + diag_insn->text[buf.len - 1] == '\r'))
>
> Nit: disasm.c does not produce '\r'.
>
Ack.
>> + diag_insn->text[--buf.len] = '\0';
>> +
>> + diag_insn->valid = true;
>> +}
>> +
>> +static void bpf_diag_format_source_text(char *buf, size_t size,
>> + const char *line, int width)
>> +{
>> + bool truncated = false;
>> + int col = 0, len = 0;
>
> Is 'col' ever different from 'len' in this function?
> Seem to be always updated in sync.
Doesn't seem so, will fix.
>
>> +
>> + if (!size)
>> + return;
>> + if (width <= 0) {
>> + buf[0] = '\0';
>> + return;
>> + }
>> +
>> + line = line ?: "...";
>> + while (*line && col < width && len + 1 < size) {
>> + if (*line == '\t') {
>> + int next = round_up(col + 1, BPF_DIAG_TAB_WIDTH);
>> +
>> + while (col < next && col < width && len + 1 < size) {
>> + buf[len++] = ' ';
>> + col++;
>> + }
>> + line++;
>> + continue;
>> + }
>> +
>> + buf[len++] = *line++;
>> + col++;
>> + }
>> +
>> + if (*line)
>> + truncated = true;
>> + if (truncated) {
>
> No need for the 'truncated' variable?
>
> if (*line) { /* truncated */
> ...
> }
>
Ack.
>> + int ellipsis_len = min(3, width);
>> +
>> + while (len > 0 && col > width - ellipsis_len) {
>> + len--;
>> + col--;
>> + }
>
> Use math? Or snprintf?
>
Ack.
>> + while (ellipsis_len-- && len + 1 < size) {
>> + buf[len++] = '.';
>> + col++;
>> + }
>> + }
>> +
>> + buf[len] = '\0';
>> +}
>> +
>> +static void bpf_diag_format_source_lane(char *buf, size_t size,
>> + const char *source_prefix,
>> + int source_line_width,
>> + int line_num, const char *line)
>> +{
>> + int len, text_width;
>> +
>> + len = scnprintf(buf, size, "%s%*d | ",
>> + source_prefix, source_line_width, line_num);
>> + if (len >= (int)size)
>> + return;
>> +
>> + text_width = BPF_DIAG_SOURCE_LANE_WIDTH - len;
>> + bpf_diag_format_source_text(buf + len, size - len, line, text_width);
>> +}
>> +
>> +static void bpf_diag_print_source_insn_line(struct bpf_verifier_env *env,
>> + const char *source_prefix,
>> + int source_line_width,
>> + int line_num, const char *line,
>> + const struct bpf_diag_insn *diag_insn,
>> + int focus_insn_idx,
>> + int insn_width)
>> +{
>> + char source_lane[BPF_DIAG_SOURCE_LANE_WIDTH + 1];
>> +
>> + bpf_diag_format_source_lane(source_lane, sizeof(source_lane),
>> + source_prefix, source_line_width,
>> + line_num, line);
>> +
>> + verbose(env, " %-*s%*s", BPF_DIAG_SOURCE_LANE_WIDTH,
>> + source_lane, BPF_DIAG_COLUMN_GAP, "");
>> + if (diag_insn->valid)
>> + verbose(env, "%s%*d | %s",
>> + diag_insn->idx == focus_insn_idx ? ">>> " : " ",
>> + insn_width, diag_insn->idx, diag_insn->text);
>> + verbose(env, "\n");
>> +}
>> +
>> static const char *bpf_diag_category_name(enum bpf_diag_category category)
>> {
>> switch (category) {
>> @@ -93,6 +398,7 @@ static const char *bpf_diag_category_name(enum bpf_diag_category category)
>> }
>> }
>>
>> +
>> void bpf_diag_report_header(struct bpf_verifier_env *env,
>> enum bpf_diag_category category,
>> const char *problem)
>> @@ -134,3 +440,81 @@ void bpf_diag_report_suggestion(struct bpf_verifier_env *env, const char *fmt, .
>> va_end(args);
>> verbose(env, "\n");
>> }
>> +
>> +static void bpf_diag_print_source_annotation(struct bpf_verifier_env *env,
>> + int line_width, int indent,
>> + const char *label,
>> + const char *msg)
>> +{
>> + char first_prefix[128], next_prefix[128], text[BPF_DIAG_MSG_LEN];
>> +
>> + scnprintf(first_prefix, sizeof(first_prefix), " %*s | %*s^-- ",
>> + line_width + 4, "", indent, "");
>> + scnprintf(next_prefix, sizeof(next_prefix), " %*s | %*s ",
>> + line_width + 4, "", indent, "");
>> + scnprintf(text, sizeof(text), "%s: %s", label, msg);
>> +
>> + bpf_diag_print_wrapped_prefixed(env, first_prefix, next_prefix, text);
>> +}
>> +
>> +void bpf_diag_report_source(struct bpf_verifier_env *env, u32 insn_idx,
>> + char marker, const char *fmt, ...)
>> +{
>> + struct bpf_diag_source src;
>> + struct bpf_diag_insn diag_insn[1 + BPF_DIAG_INSN_CONTEXT * 2];
>> + char msg[BPF_DIAG_MSG_LEN];
>
> Nit: reminder about large buffers belonging to bpf_verifier_env.
>
Ack.
>> + const char *func, *label;
>> + int start_line, end_line, line_num, indent, width;
>> + int insn_width, i;
>> + va_list args;
>> +
>> + va_start(args, fmt);
>> + vscnprintf(msg, sizeof(msg), fmt, args);
>> + va_end(args);
>> + label = bpf_diag_marker_label(marker);
>> +
>> + if (!bpf_diag_get_source(env, insn_idx, &src)) {
>> + verbose(env, " insn %u\n", insn_idx);
>> + bpf_diag_print_source_annotation(env, 0, 0, label, msg);
>> + return;
>> + }
>> +
>> + func = bpf_diag_func_name(env, insn_idx);
>> + if (func && *func)
>> + verbose(env, " %s @ %s:%d:%d\n", func, src.file,
>> + src.line_num, src.line_col);
>> + else
>> + verbose(env, " %s:%d:%d\n", src.file, src.line_num,
>> + src.line_col);
>> +
>> + start_line = max_t(int, 1, src.line_num - BPF_DIAG_SOURCE_CONTEXT);
>> + end_line = src.line_num + BPF_DIAG_SOURCE_CONTEXT;
>> + width = bpf_diag_line_width(end_line);
>> + indent = bpf_diag_line_indent(src.line);
>> + insn_width = bpf_diag_line_width(env->prog->len ? env->prog->len - 1 : 0);
>
> I don't understand, why is 'width == end_line / 10 + 1;' and why is 'insn_width == prog->len / 10 + 1;'?
>
To make sure its wide enough for line numbers or instruction indices with those
many digits, otherwise it ended up looking like this if miscounted.
1 | ...
2 | ...
10 | ...
Also, ack to comments in previous two patches.
next prev parent reply other threads:[~2026-06-05 20:55 UTC|newest]
Thread overview: 52+ messages / expand[flat|nested] mbox.gz Atom feed top
2026-06-05 6:33 [PATCH bpf-next v1 00/17] Redesign Verification Errors Kumar Kartikeya Dwivedi
2026-06-05 6:33 ` [PATCH bpf-next v1 01/17] bpf: Add verifier diagnostics report helpers Kumar Kartikeya Dwivedi
2026-06-05 6:42 ` sashiko-bot
2026-06-05 7:40 ` bot+bpf-ci
2026-06-05 18:58 ` Eduard Zingerman
2026-06-05 6:33 ` [PATCH bpf-next v1 02/17] bpf: Define verifier diagnostic categories Kumar Kartikeya Dwivedi
2026-06-05 19:10 ` Eduard Zingerman
2026-06-05 6:33 ` [PATCH bpf-next v1 03/17] bpf: Add source and instruction diagnostic context Kumar Kartikeya Dwivedi
2026-06-05 8:48 ` sashiko-bot
2026-06-05 20:22 ` Eduard Zingerman
2026-06-05 20:55 ` Kumar Kartikeya Dwivedi [this message]
2026-06-05 21:07 ` Eduard Zingerman
2026-06-05 6:33 ` [PATCH bpf-next v1 04/17] bpf: Track verifier branch diagnostic history Kumar Kartikeya Dwivedi
2026-06-05 6:50 ` sashiko-bot
2026-06-05 7:57 ` bot+bpf-ci
2026-06-05 21:41 ` Eduard Zingerman
2026-06-05 21:37 ` Eduard Zingerman
2026-06-05 6:33 ` [PATCH bpf-next v1 05/17] bpf: Track verifier register " Kumar Kartikeya Dwivedi
2026-06-05 6:53 ` sashiko-bot
2026-06-05 7:40 ` bot+bpf-ci
2026-06-05 22:31 ` Eduard Zingerman
2026-06-05 6:33 ` [PATCH bpf-next v1 06/17] bpf: Track verifier reference " Kumar Kartikeya Dwivedi
2026-06-05 6:33 ` [PATCH bpf-next v1 07/17] bpf: Track verifier context " Kumar Kartikeya Dwivedi
2026-06-05 6:46 ` sashiko-bot
2026-06-05 7:22 ` bot+bpf-ci
2026-06-05 6:33 ` [PATCH bpf-next v1 08/17] bpf: Report Register Type Safety errors Kumar Kartikeya Dwivedi
2026-06-05 6:57 ` sashiko-bot
2026-06-05 7:23 ` bot+bpf-ci
2026-06-05 6:33 ` [PATCH bpf-next v1 09/17] bpf: Report Memory Safety bounds errors Kumar Kartikeya Dwivedi
2026-06-05 6:45 ` sashiko-bot
2026-06-05 7:57 ` bot+bpf-ci
2026-06-05 6:34 ` [PATCH bpf-next v1 10/17] bpf: Report Resource Lifetime reference leaks Kumar Kartikeya Dwivedi
2026-06-05 6:45 ` sashiko-bot
2026-06-05 7:22 ` bot+bpf-ci
2026-06-05 6:34 ` [PATCH bpf-next v1 11/17] bpf: Report Call Type Safety argument errors Kumar Kartikeya Dwivedi
2026-06-05 6:47 ` sashiko-bot
2026-06-05 7:23 ` bot+bpf-ci
2026-06-05 6:34 ` [PATCH bpf-next v1 12/17] bpf: Report Execution Context Safety errors Kumar Kartikeya Dwivedi
2026-06-05 6:46 ` sashiko-bot
2026-06-05 7:23 ` bot+bpf-ci
2026-06-05 6:34 ` [PATCH bpf-next v1 13/17] bpf: Report Program Structure CFG errors Kumar Kartikeya Dwivedi
2026-06-05 6:51 ` sashiko-bot
2026-06-05 7:22 ` bot+bpf-ci
2026-06-05 6:34 ` [PATCH bpf-next v1 14/17] bpf: Report Policy helper and kfunc errors Kumar Kartikeya Dwivedi
2026-06-05 7:02 ` sashiko-bot
2026-06-05 6:34 ` [PATCH bpf-next v1 15/17] bpf: Report Verifier Limit errors Kumar Kartikeya Dwivedi
2026-06-05 6:53 ` sashiko-bot
2026-06-05 7:40 ` bot+bpf-ci
2026-06-05 6:34 ` [PATCH bpf-next v1 16/17] bpf: Report Verifier Internal errors Kumar Kartikeya Dwivedi
2026-06-05 6:34 ` [PATCH bpf-next v1 17/17] bpf: Gate verifier diagnostics on log level Kumar Kartikeya Dwivedi
2026-06-05 6:58 ` sashiko-bot
2026-06-05 7:40 ` bot+bpf-ci
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=DJ1EYR34YBOX.18H3QDBP9TZYY@gmail.com \
--to=memxor@gmail.com \
--cc=andrii@kernel.org \
--cc=ast@kernel.org \
--cc=bpf@vger.kernel.org \
--cc=daniel@iogearbox.net \
--cc=eddyz87@gmail.com \
--cc=emil@etsalapatis.com \
--cc=kernel-team@meta.com \
--cc=kkd@meta.com \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox