Re: [PATCH bpf-next v4 1/2] perf: Refactor get_perf_callchain

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Tao Chen <chen.dylane@linux.dev>
To: bot+bpf-ci@kernel.org, peterz@infradead.org, mingo@redhat.com,
	acme@kernel.org, namhyung@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	irogers@google.com, adrian.hunter@intel.com,
	kan.liang@linux.intel.com, song@kernel.org, ast@kernel.org,
	daniel@iogearbox.net, andrii@kernel.org, martin.lau@linux.dev,
	eddyz87@gmail.com, yonghong.song@linux.dev,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@fomichev.me,
	haoluo@google.com
Cc: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	bpf@vger.kernel.org, martin.lau@kernel.org, clm@meta.com,
	ihor.solodrai@linux.dev
Subject: Re: [PATCH bpf-next v4 1/2] perf: Refactor get_perf_callchain
Date: Thu, 30 Oct 2025 10:36:46 +0800	[thread overview]
Message-ID: <2dbebe3a-5b96-4076-a3e3-00ae8de990d8@linux.dev> (raw)
In-Reply-To: <a48e281e6912037490270da40f79e45c1ad1e430c8c1cc84c48306106b06113a@mail.kernel.org>

在 2025/10/29 01:09, bot+bpf-ci@kernel.org 写道:
> 
> ```
> commit 59cfc61f83ad4ec998f4242ca000c554e3001b47
> Author: Tao Chen <chen.dylane@linux.dev>
> 
> perf: Refactor get_perf_callchain
> 
> This patch refactors get_perf_callchain() by extracting logic into new
> helper functions to provide more flexible stack-sampling APIs for BPF.
> 
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index fd1d91017..14a382cad 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -67,6 +67,7 @@ struct perf_callchain_entry_ctx {
>>   	u32				nr;
>>   	short				contexts;
>>   	bool				contexts_maxed;
>> +	bool				add_mark;
>>   };
> 
> [ ... ]
> 
>> @@ -1718,9 +1719,17 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
>>
>>   extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
>>   extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
>> +
>> +extern void __init_perf_callchain_ctx(struct perf_callchain_entry_ctx *ctx,
>> +				      struct perf_callchain_entry *entry,
>> +				      u32 max_stack, bool add_mark);
>> +
>> +extern void __get_perf_callchain_kernel(struct perf_callchain_entry_ctx *ctx, struct pt_regs *regs);
>> +extern void __get_perf_callchain_user(struct perf_callchain_entry_ctx *ctx, struct pt_regs *regs);
>> +
>>   extern struct perf_callchain_entry *
>>   get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>> -		   u32 max_stack, bool crosstask, bool add_mark);
>> +		   u32 max_stack, bool crosstask);
> 
>> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>> index 4d53cdd13..e28b35c7e 100644
>> --- a/kernel/bpf/stackmap.c
>> +++ b/kernel/bpf/stackmap.c
>> @@ -315,7 +315,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
>>   		max_depth = sysctl_perf_event_max_stack;
>>
>>   	trace = get_perf_callchain(regs, kernel, user, max_depth,
>> -				   false, false);
>> +				   false);
>                                         ^^^^^ removed last parameter
> 
> Previously bpf_get_stackid() passed add_mark=false to get_perf_callchain().
> Now the parameter is gone, but get_perf_callchain() hardcodes add_mark=true
> when calling __init_perf_callchain_ctx(). This means BPF stack traces will
> now include PERF_CONTEXT_KERNEL/PERF_CONTEXT_USER marker values when they
> didn't before.
> 

The entire modification involves changes to two subsystems. To make the 
review easier, the get_perf_callchain has been simply adapted here. In 
the next patch, BPF will no longer use get_perf_callchain, and add_mark 
will be set to false.

> The markers get added by __get_perf_callchain_kernel() and
> __get_perf_callchain_user() when ctx->add_mark is true. These markers are
> sentinel values (-128 for KERNEL, -512 for USER) that appear in the trace
> IP array alongside actual instruction pointers.
> 
>>
>>   	if (unlikely(!trace))
>>   		/* couldn't fetch the stack trace */
>> @@ -452,7 +452,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>>   		trace = get_callchain_entry_for_task(task, max_depth);
>>   	else
>>   		trace = get_perf_callchain(regs, kernel, user, max_depth,
>> -					   crosstask, false);
>> +					   crosstask);
>                                                     ^^^^^ same issue here
> 
> Same problem in __bpf_get_stack() which was also passing add_mark=false.
> 
> This changes BPF stack map behavior in user-visible ways:
> 
> 1) bpf_get_stackid() hashes the entire trace->ip[] array including any
>     markers, so identical stacks will now produce different stack IDs.
> 
> 2) bpf_get_stack() copies trace->ip[] directly to userspace buffers,
>     so BPF programs will see marker values mixed with instruction pointers.
> 
> Can BPF tools handle these unexpected marker values? The commit message
> says "The next patch will modify the BPF part" but doesn't explain this
> intermediate behavior change.
> 
>> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
>> index 808c0d7a3..2c36e4906 100644
>> --- a/kernel/events/callchain.c
>> +++ b/kernel/events/callchain.c
> 
> [ ... ]
> 
>>   struct perf_callchain_entry *
>>   get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>> -		   u32 max_stack, bool crosstask, bool add_mark)
>> +		   u32 max_stack, bool crosstask)
>>   {
>>   	struct perf_callchain_entry *entry;
>>   	struct perf_callchain_entry_ctx ctx;
>> -	int rctx, start_entry_idx;
>> +	int rctx;
>>
>>   	/* crosstask is not supported for user stacks */
>>   	if (crosstask && user && !kernel)
>> @@ -232,34 +273,14 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>>   	if (!entry)
>>   		return NULL;
>>
>> -	ctx.entry		= entry;
>> -	ctx.max_stack		= max_stack;
>> -	ctx.nr			= entry->nr = 0;
>> -	ctx.contexts		= 0;
>> -	ctx.contexts_maxed	= false;
>> +	__init_perf_callchain_ctx(&ctx, entry, max_stack, true);
>                                                                  ^^^^
> Here's where add_mark is hardcoded to true for all callers. The old code
> allowed callers to control this via parameter. BPF was passing false,
> perf_callchain() was passing true. Now both get true unconditionally.
> 
> [ ... ]
> 
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 7541f6f85..eb0f11059 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -8218,7 +8218,7 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
>>   		return &__empty_callchain;
>>
>>   	callchain = get_perf_callchain(regs, kernel, user,
>> -				       max_stack, crosstask, true);
>> +				       max_stack, crosstask);
>>   	return callchain ?: &__empty_callchain;
>>   }
> 
> 
> ```
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> In-Reply-To-Subject: `perf: Refactor get_perf_callchain`
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/18882141979


-- 
Best Regards
Tao Chen

next prev parent reply	other threads:[~2025-10-30  2:37 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-28 16:25 [PATCH bpf-next v4 0/2] Pass external callchain entry to get_perf_callchain Tao Chen
2025-10-28 16:25 ` [PATCH bpf-next v4 1/2] perf: Refactor get_perf_callchain Tao Chen
2025-10-28 17:09   ` bot+bpf-ci
2025-10-30  2:36     ` Tao Chen [this message]
2025-11-05 20:45   ` Yonghong Song
2025-11-06  3:28     ` Tao Chen
2025-10-28 16:25 ` [PATCH bpf-next v4 2/2] bpf: Hold the perf callchain entry until used completely Tao Chen
2025-11-05 22:16   ` Yonghong Song
2025-11-06  5:12     ` Tao Chen
2025-11-06  6:20       ` Yonghong Song
2025-11-06  7:08         ` Tao Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2dbebe3a-5b96-4076-a3e3-00ae8de990d8@linux.dev \
    --to=chen.dylane@linux.dev \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bot+bpf-ci@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=clm@meta.com \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=haoluo@google.com \
    --cc=ihor.solodrai@linux.dev \
    --cc=irogers@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=martin.lau@kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=sdf@fomichev.me \
    --cc=song@kernel.org \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.