Re: [PATCH v2 2/4] perf lock: Retrieve owner callstack in bpf program

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Namhyung Kim <namhyung@kernel.org>
To: Chun-Tse Shao <ctshao@google.com>
Cc: linux-kernel@vger.kernel.org, peterz@infradead.org,
	mingo@redhat.com, acme@kernel.org, mark.rutland@arm.com,
	alexander.shishkin@linux.intel.com, jolsa@kernel.org,
	irogers@google.com, adrian.hunter@intel.com,
	kan.liang@linux.intel.com, linux-perf-users@vger.kernel.org,
	bpf@vger.kernel.org
Subject: Re: [PATCH v2 2/4] perf lock: Retrieve owner callstack in bpf program
Date: Mon, 13 Jan 2025 19:35:17 -0800	[thread overview]
Message-ID: <Z4XbdVKyXgjUqZcP@google.com> (raw)
In-Reply-To: <20250113052220.2105645-3-ctshao@google.com>

On Sun, Jan 12, 2025 at 09:20:15PM -0800, Chun-Tse Shao wrote:
> Tracing owner callstack in `contention_begin()` and `contention_end()`,
> and storing in `owner_lock_stat` bpf map.
> 
> Signed-off-by: Chun-Tse Shao <ctshao@google.com>
> ---
>  .../perf/util/bpf_skel/lock_contention.bpf.c  | 152 +++++++++++++++++-
>  1 file changed, 151 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/util/bpf_skel/lock_contention.bpf.c b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> index 05da19fdab23..3f47fbfa237c 100644
> --- a/tools/perf/util/bpf_skel/lock_contention.bpf.c
> +++ b/tools/perf/util/bpf_skel/lock_contention.bpf.c
> @@ -7,6 +7,7 @@
>  #include <asm-generic/errno-base.h>
>  
>  #include "lock_data.h"
> +#include <time.h>
>  
>  /* for collect_lock_syms().  4096 was rejected by the verifier */
>  #define MAX_CPUS  1024
> @@ -178,6 +179,9 @@ int data_fail;
>  int task_map_full;
>  int data_map_full;
>  
> +struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
> +void bpf_task_release(struct task_struct *p) __ksym;

To support old (ancient?) kernels, you can declare them as __weak and
check if one of them is defined and ignore owner stacks on them.  Also
you can check them in user space and turn off the option before loading.

> +
>  static inline __u64 get_current_cgroup_id(void)
>  {
>  	struct task_struct *task;
> @@ -407,6 +411,60 @@ int contention_begin(u64 *ctx)
>  	pelem->flags = (__u32)ctx[1];
>  
>  	if (needs_callstack) {
> +		u32 i = 0;
> +		int owner_pid;
> +		unsigned long *entries;
> +		struct task_struct *task;
> +		cotd *data;
> +
> +		if (!lock_owner)
> +			goto contention_begin_skip_owner_callstack;

Can be it 'skip_owner'?

> +
> +		task = get_lock_owner(pelem->lock, pelem->flags);
> +		if (!task)
> +			goto contention_begin_skip_owner_callstack;
> +
> +		owner_pid = BPF_CORE_READ(task, pid);
> +
> +		entries = bpf_map_lookup_elem(&owner_stacks_entries, &i);
> +		if (!entries)
> +			goto contention_begin_skip_owner_callstack;
> +		for (i = 0; i < max_stack; i++)
> +			entries[i] = 0x0;
> +
> +		task = bpf_task_from_pid(owner_pid);
> +		if (task) {
> +			bpf_get_task_stack(task, entries,
> +					   max_stack * sizeof(unsigned long),
> +					   0);
> +			bpf_task_release(task);
> +		}
> +
> +		data = bpf_map_lookup_elem(&contention_owner_tracing,
> +					   &(pelem->lock));

No need for parenthesis.

> +
> +		// Contention just happens, or corner case `lock` is owned by
> +		// process not `owner_pid`.
> +		if (!data || data->pid != owner_pid) {
> +			cotd first = {
> +				.pid = owner_pid,
> +				.timestamp = pelem->timestamp,
> +				.count = 1,
> +			};
> +			bpf_map_update_elem(&contention_owner_tracing,
> +					    &(pelem->lock), &first, BPF_ANY);
> +			bpf_map_update_elem(&contention_owner_stacks,
> +					    &(pelem->lock), entries, BPF_ANY);

Hmm.. it just discard the old owner data if it comes from a new owner?
Why not save the data into the result for the old lock/callstack?


> +		}
> +		// Contention is going on and new waiter joins.
> +		else {
> +			__sync_fetch_and_add(&data->count, 1);
> +			// TODO: Since for owner the callstack would change at
> +			// different time, We should check and report if the
> +			// callstack is different with the recorded one in
> +			// `contention_owner_stacks`.
> +		}
> +contention_begin_skip_owner_callstack:
>  		pelem->stack_id = bpf_get_stackid(ctx, &stacks,
>  						  BPF_F_FAST_STACK_CMP | stack_skip);
>  		if (pelem->stack_id < 0)
> @@ -443,6 +501,7 @@ int contention_end(u64 *ctx)
>  	struct tstamp_data *pelem;
>  	struct contention_key key = {};
>  	struct contention_data *data;
> +	__u64 timestamp;
>  	__u64 duration;
>  	bool need_delete = false;
>  
> @@ -469,12 +528,103 @@ int contention_end(u64 *ctx)
>  			return 0;
>  		need_delete = true;
>  	}
> -	duration = bpf_ktime_get_ns() - pelem->timestamp;
> +	timestamp = bpf_ktime_get_ns();
> +	duration = timestamp - pelem->timestamp;
>  	if ((__s64)duration < 0) {
>  		__sync_fetch_and_add(&time_fail, 1);
>  		goto out;
>  	}
>  
> +	if (needs_callstack && lock_owner) {
> +		u64 owner_contention_time;
> +		unsigned long *owner_stack;
> +		struct contention_data *cdata;
> +		cotd *otdata;
> +
> +		otdata = bpf_map_lookup_elem(&contention_owner_tracing,
> +					     &(pelem->lock));
> +		owner_stack = bpf_map_lookup_elem(&contention_owner_stacks,
> +						  &(pelem->lock));
> +		if (!otdata || !owner_stack)
> +			goto contention_end_skip_owner_callstack;
> +
> +		owner_contention_time = timestamp - otdata->timestamp;
> +
> +		// Update `owner_lock_stat` if `owner_stack` is
> +		// available.
> +		if (owner_stack[0] != 0x0) {
> +			cdata = bpf_map_lookup_elem(&owner_lock_stat,
> +						    owner_stack);
> +			if (!cdata) {
> +				struct contention_data first = {
> +					.total_time = owner_contention_time,
> +					.max_time = owner_contention_time,
> +					.min_time = owner_contention_time,
> +					.count = 1,
> +					.flags = pelem->flags,
> +				};
> +				bpf_map_update_elem(&owner_lock_stat,
> +						    owner_stack, &first,
> +						    BPF_ANY);
> +			} else {
> +				__sync_fetch_and_add(&cdata->total_time,
> +						     owner_contention_time);
> +				__sync_fetch_and_add(&cdata->count, 1);
> +
> +				/* FIXME: need atomic operations */
> +				if (cdata->max_time < owner_contention_time)
> +					cdata->max_time = owner_contention_time;
> +				if (cdata->min_time > owner_contention_time)
> +					cdata->min_time = owner_contention_time;
> +			}
> +		}
> +
> +		//  No contention is going on, delete `lock` in
> +		//  `contention_owner_tracing` and
> +		//  `contention_owner_stacks`
> +		if (otdata->count <= 1) {
> +			bpf_map_delete_elem(&contention_owner_tracing,
> +					    &(pelem->lock));
> +			bpf_map_delete_elem(&contention_owner_stacks,
> +					    &(pelem->lock));
> +		}
> +		// Contention is still going on, with a new owner
> +		// (current task). `otdata` should be updated accordingly.
> +		else {
> +			(otdata->count)--;

No need for parenthesis, and it needs to be atomic dec.

> +
> +			// If ctx[1] is not 0, the current task terminates lock
> +			// waiting without acquiring it. Owner is not changed.

Please add a comment that ctx[1] has the return code of the lock
function.  Maybe it's better to use a local variable.

Also I think you need to say about the normal case too.  Returing 0
means the waiter now gets the lock and becomes a new owner.  So it needs
to update the owner information.


> +			if (ctx[1] == 0) {
> +				u32 i = 0;
> +				unsigned long *entries = bpf_map_lookup_elem(
> +					&owner_stacks_entries, &i);
> +				if (entries) {
> +					for (i = 0; i < (u32)max_stack; i++)
> +						entries[i] = 0x0;
> +
> +					bpf_get_task_stack(
> +						bpf_get_current_task_btf(),

Same as bpf_get_stack(), right?

> +						entries,
> +						max_stack *
> +							sizeof(unsigned long),
> +						0);
> +					bpf_map_update_elem(
> +						&contention_owner_stacks,
> +						&(pelem->lock), entries,
> +						BPF_ANY);

Please factor out the code if it indents too much.  Or you can use goto
or something to reduce the indentation level.

  if (ret != 0)
  	goto skip_update;

  ...

  if (entries == NULL)
  	goto skip_stack;

  ...

Thanks,
Namhyung

> +				}
> +
> +				otdata->pid = pid;
> +				otdata->timestamp = timestamp;
> +			}
> +
> +			bpf_map_update_elem(&contention_owner_tracing,
> +					    &(pelem->lock), otdata, BPF_ANY);
> +		}
> +	}
> +contention_end_skip_owner_callstack:
> +
>  	switch (aggr_mode) {
>  	case LOCK_AGGR_CALLER:
>  		key.stack_id = pelem->stack_id;
> -- 
> 2.47.1.688.g23fc6f90ad-goog
>

next prev parent reply	other threads:[~2025-01-14  3:35 UTC|newest]

Thread overview: 13+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2025-01-13  5:20 [PATCH v2 0/4] Tracing contention lock owner call stack Chun-Tse Shao
2025-01-13  5:20 ` [PATCH v2 1/4] perf lock: Add bpf maps for owner stack tracing Chun-Tse Shao
2025-01-14  3:05   ` Namhyung Kim
2025-01-21 22:02     ` Chun-Tse Shao
2025-01-24  0:34       ` Namhyung Kim
2025-01-13  5:20 ` [PATCH v2 2/4] perf lock: Retrieve owner callstack in bpf program Chun-Tse Shao
2025-01-14  3:35   ` Namhyung Kim [this message]
2025-01-21 22:35     ` Chun-Tse Shao
2025-01-24  0:39       ` Namhyung Kim
2025-01-13  5:20 ` [PATCH v2 3/4] perf lock: Make rb_tree helper functions generic Chun-Tse Shao
2025-01-13  5:20 ` [PATCH v2 4/4] perf lock: Report owner stack in usermode Chun-Tse Shao
2025-01-14  3:56   ` Namhyung Kim
2025-01-29  0:26 ` [PATCH v2 0/4] Tracing contention lock owner call stack Chun-Tse Shao

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=Z4XbdVKyXgjUqZcP@google.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=bpf@vger.kernel.org \
    --cc=ctshao@google.com \
    --cc=irogers@google.com \
    --cc=jolsa@kernel.org \
    --cc=kan.liang@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.