public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Tao Chen <chen.dylane@linux.dev>
To: bot+bpf-ci@kernel.org, peterz@infradead.org, mingo@redhat.com,
	acme@kernel.org, namhyung@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	irogers@google.com, adrian.hunter@intel.com,
	kan.liang@linux.intel.com, song@kernel.org, ast@kernel.org,
	daniel@iogearbox.net, andrii@kernel.org, martin.lau@linux.dev,
	eddyz87@gmail.com, yonghong.song@linux.dev,
	john.fastabend@gmail.com, kpsingh@kernel.org, sdf@fomichev.me,
	haoluo@google.com
Cc: linux-perf-users@vger.kernel.org, linux-kernel@vger.kernel.org,
	bpf@vger.kernel.org, martin.lau@kernel.org, clm@meta.com,
	ihor.solodrai@linux.dev
Subject: Re: [PATCH bpf-next v4 1/2] perf: Refactor get_perf_callchain
Date: Thu, 30 Oct 2025 10:36:46 +0800	[thread overview]
Message-ID: <2dbebe3a-5b96-4076-a3e3-00ae8de990d8@linux.dev> (raw)
In-Reply-To: <a48e281e6912037490270da40f79e45c1ad1e430c8c1cc84c48306106b06113a@mail.kernel.org>

在 2025/10/29 01:09, bot+bpf-ci@kernel.org 写道:
> 
> ```
> commit 59cfc61f83ad4ec998f4242ca000c554e3001b47
> Author: Tao Chen <chen.dylane@linux.dev>
> 
> perf: Refactor get_perf_callchain
> 
> This patch refactors get_perf_callchain() by extracting logic into new
> helper functions to provide more flexible stack-sampling APIs for BPF.
> 
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index fd1d91017..14a382cad 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -67,6 +67,7 @@ struct perf_callchain_entry_ctx {
>>   	u32				nr;
>>   	short				contexts;
>>   	bool				contexts_maxed;
>> +	bool				add_mark;
>>   };
> 
> [ ... ]
> 
>> @@ -1718,9 +1719,17 @@ DECLARE_PER_CPU(struct perf_callchain_entry, perf_callchain_entry);
>>
>>   extern void perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
>>   extern void perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs);
>> +
>> +extern void __init_perf_callchain_ctx(struct perf_callchain_entry_ctx *ctx,
>> +				      struct perf_callchain_entry *entry,
>> +				      u32 max_stack, bool add_mark);
>> +
>> +extern void __get_perf_callchain_kernel(struct perf_callchain_entry_ctx *ctx, struct pt_regs *regs);
>> +extern void __get_perf_callchain_user(struct perf_callchain_entry_ctx *ctx, struct pt_regs *regs);
>> +
>>   extern struct perf_callchain_entry *
>>   get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>> -		   u32 max_stack, bool crosstask, bool add_mark);
>> +		   u32 max_stack, bool crosstask);
> 
>> diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
>> index 4d53cdd13..e28b35c7e 100644
>> --- a/kernel/bpf/stackmap.c
>> +++ b/kernel/bpf/stackmap.c
>> @@ -315,7 +315,7 @@ BPF_CALL_3(bpf_get_stackid, struct pt_regs *, regs, struct bpf_map *, map,
>>   		max_depth = sysctl_perf_event_max_stack;
>>
>>   	trace = get_perf_callchain(regs, kernel, user, max_depth,
>> -				   false, false);
>> +				   false);
>                                         ^^^^^ removed last parameter
> 
> Previously bpf_get_stackid() passed add_mark=false to get_perf_callchain().
> Now the parameter is gone, but get_perf_callchain() hardcodes add_mark=true
> when calling __init_perf_callchain_ctx(). This means BPF stack traces will
> now include PERF_CONTEXT_KERNEL/PERF_CONTEXT_USER marker values when they
> didn't before.
> 

The entire modification involves changes to two subsystems. To make the 
review easier, the get_perf_callchain has been simply adapted here. In 
the next patch, BPF will no longer use get_perf_callchain, and add_mark 
will be set to false.

> The markers get added by __get_perf_callchain_kernel() and
> __get_perf_callchain_user() when ctx->add_mark is true. These markers are
> sentinel values (-128 for KERNEL, -512 for USER) that appear in the trace
> IP array alongside actual instruction pointers.
> 
>>
>>   	if (unlikely(!trace))
>>   		/* couldn't fetch the stack trace */
>> @@ -452,7 +452,7 @@ static long __bpf_get_stack(struct pt_regs *regs, struct task_struct *task,
>>   		trace = get_callchain_entry_for_task(task, max_depth);
>>   	else
>>   		trace = get_perf_callchain(regs, kernel, user, max_depth,
>> -					   crosstask, false);
>> +					   crosstask);
>                                                     ^^^^^ same issue here
> 
> Same problem in __bpf_get_stack() which was also passing add_mark=false.
> 
> This changes BPF stack map behavior in user-visible ways:
> 
> 1) bpf_get_stackid() hashes the entire trace->ip[] array including any
>     markers, so identical stacks will now produce different stack IDs.
> 
> 2) bpf_get_stack() copies trace->ip[] directly to userspace buffers,
>     so BPF programs will see marker values mixed with instruction pointers.
> 
> Can BPF tools handle these unexpected marker values? The commit message
> says "The next patch will modify the BPF part" but doesn't explain this
> intermediate behavior change.
> 
>> diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
>> index 808c0d7a3..2c36e4906 100644
>> --- a/kernel/events/callchain.c
>> +++ b/kernel/events/callchain.c
> 
> [ ... ]
> 
>>   struct perf_callchain_entry *
>>   get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>> -		   u32 max_stack, bool crosstask, bool add_mark)
>> +		   u32 max_stack, bool crosstask)
>>   {
>>   	struct perf_callchain_entry *entry;
>>   	struct perf_callchain_entry_ctx ctx;
>> -	int rctx, start_entry_idx;
>> +	int rctx;
>>
>>   	/* crosstask is not supported for user stacks */
>>   	if (crosstask && user && !kernel)
>> @@ -232,34 +273,14 @@ get_perf_callchain(struct pt_regs *regs, bool kernel, bool user,
>>   	if (!entry)
>>   		return NULL;
>>
>> -	ctx.entry		= entry;
>> -	ctx.max_stack		= max_stack;
>> -	ctx.nr			= entry->nr = 0;
>> -	ctx.contexts		= 0;
>> -	ctx.contexts_maxed	= false;
>> +	__init_perf_callchain_ctx(&ctx, entry, max_stack, true);
>                                                                  ^^^^
> Here's where add_mark is hardcoded to true for all callers. The old code
> allowed callers to control this via parameter. BPF was passing false,
> perf_callchain() was passing true. Now both get true unconditionally.
> 
> [ ... ]
> 
>> diff --git a/kernel/events/core.c b/kernel/events/core.c
>> index 7541f6f85..eb0f11059 100644
>> --- a/kernel/events/core.c
>> +++ b/kernel/events/core.c
>> @@ -8218,7 +8218,7 @@ perf_callchain(struct perf_event *event, struct pt_regs *regs)
>>   		return &__empty_callchain;
>>
>>   	callchain = get_perf_callchain(regs, kernel, user,
>> -				       max_stack, crosstask, true);
>> +				       max_stack, crosstask);
>>   	return callchain ?: &__empty_callchain;
>>   }
> 
> 
> ```
> 
> ---
> AI reviewed your patch. Please fix the bug or email reply why it's not a bug.
> See: https://github.com/kernel-patches/vmtest/blob/master/ci/claude/README.md
> 
> In-Reply-To-Subject: `perf: Refactor get_perf_callchain`
> CI run summary: https://github.com/kernel-patches/bpf/actions/runs/18882141979


-- 
Best Regards
Tao Chen

  reply	other threads:[~2025-10-30  2:37 UTC|newest]

Thread overview: 11+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-10-28 16:25 [PATCH bpf-next v4 0/2] Pass external callchain entry to get_perf_callchain Tao Chen
2025-10-28 16:25 ` [PATCH bpf-next v4 1/2] perf: Refactor get_perf_callchain Tao Chen
2025-10-28 17:09   ` bot+bpf-ci
2025-10-30  2:36     ` Tao Chen [this message]
2025-11-05 20:45   ` Yonghong Song
2025-11-06  3:28     ` Tao Chen
2025-10-28 16:25 ` [PATCH bpf-next v4 2/2] bpf: Hold the perf callchain entry until used completely Tao Chen
2025-11-05 22:16   ` Yonghong Song
2025-11-06  5:12     ` Tao Chen
2025-11-06  6:20       ` Yonghong Song
2025-11-06  7:08         ` Tao Chen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=2dbebe3a-5b96-4076-a3e3-00ae8de990d8@linux.dev \
    --to=chen.dylane@linux.dev \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andrii@kernel.org \
    --cc=ast@kernel.org \
    --cc=bot+bpf-ci@kernel.org \
    --cc=bpf@vger.kernel.org \
    --cc=clm@meta.com \
    --cc=daniel@iogearbox.net \
    --cc=eddyz87@gmail.com \
    --cc=haoluo@google.com \
    --cc=ihor.solodrai@linux.dev \
    --cc=irogers@google.com \
    --cc=john.fastabend@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=kpsingh@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=martin.lau@kernel.org \
    --cc=martin.lau@linux.dev \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=sdf@fomichev.me \
    --cc=song@kernel.org \
    --cc=yonghong.song@linux.dev \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox