public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Namhyung Kim <namhyung@kernel.org>
To: Ravi Bangoria <ravi.bangoria@amd.com>
Cc: peterz@infradead.org, mingo@redhat.com, acme@kernel.org,
	irogers@google.com, swapnil.sapkal@amd.com, yu.c.chen@intel.com,
	mark.rutland@arm.com, alexander.shishkin@linux.intel.com,
	jolsa@kernel.org, rostedt@goodmis.org,
	vincent.guittot@linaro.org, bristot@redhat.com,
	adrian.hunter@intel.com, james.clark@arm.com,
	kan.liang@linux.intel.com, gautham.shenoy@amd.com,
	kprateek.nayak@amd.com, juri.lelli@redhat.com,
	yangjihong@bytedance.com, void@manifault.com, tj@kernel.org,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	santosh.shukla@amd.com, ananth.narayan@amd.com,
	sandipan.das@amd.com
Subject: Re: [PATCH 2/5] perf sched stats: Add record and rawdump support
Date: Wed, 25 Sep 2024 23:12:38 -0700	[thread overview]
Message-ID: <ZvT7VkSUpNFKfqge@google.com> (raw)
In-Reply-To: <20240916164722.1838-3-ravi.bangoria@amd.com>

On Mon, Sep 16, 2024 at 04:47:19PM +0000, Ravi Bangoria wrote:
> From: Swapnil Sapkal <swapnil.sapkal@amd.com>
> 
> Define new, perf tool only, sample types and their layouts. Add logic
> to parse /proc/schedstat, convert it to perf sample format and save
> samples to perf.data file with `perf sched stats record` command. Also
> add logic to read perf.data file, interpret schedstat samples and
> print rawdump of samples with `perf script -D`.
> 
> Note that, /proc/schedstat file output is standardized with version
> number. The patch supports v15 but older or newer version can be added
> easily.
> 
> Signed-off-by: Swapnil Sapkal <swapnil.sapkal@amd.com>
> Co-developed-by: Ravi Bangoria <ravi.bangoria@amd.com>
> Signed-off-by: Ravi Bangoria <ravi.bangoria@amd.com>
> ---
>  tools/lib/perf/Documentation/libperf.txt      |   2 +
>  tools/lib/perf/Makefile                       |   2 +-
>  tools/lib/perf/include/perf/event.h           |  42 +++
>  .../lib/perf/include/perf/schedstat-cpu-v15.h |  13 +
>  .../perf/include/perf/schedstat-domain-v15.h  |  40 +++
>  tools/perf/builtin-inject.c                   |   2 +
>  tools/perf/builtin-sched.c                    | 222 +++++++++++++++-
>  tools/perf/util/event.c                       |  98 +++++++
>  tools/perf/util/event.h                       |   2 +
>  tools/perf/util/session.c                     |  20 ++
>  tools/perf/util/synthetic-events.c            | 249 ++++++++++++++++++
>  tools/perf/util/synthetic-events.h            |   3 +
>  tools/perf/util/tool.c                        |  20 ++
>  tools/perf/util/tool.h                        |   4 +-
>  14 files changed, 716 insertions(+), 3 deletions(-)
>  create mode 100644 tools/lib/perf/include/perf/schedstat-cpu-v15.h
>  create mode 100644 tools/lib/perf/include/perf/schedstat-domain-v15.h
> 
> diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt
> index fcfb9499ef9c..39c78682ad2e 100644
> --- a/tools/lib/perf/Documentation/libperf.txt
> +++ b/tools/lib/perf/Documentation/libperf.txt
> @@ -211,6 +211,8 @@ SYNOPSIS
>    struct perf_record_time_conv;
>    struct perf_record_header_feature;
>    struct perf_record_compressed;
> +  struct perf_record_schedstat_cpu;
> +  struct perf_record_schedstat_domain;
>  --
>  
>  DESCRIPTION
> diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile
> index 3a9b2140aa04..ebbfea891a6a 100644
> --- a/tools/lib/perf/Makefile
> +++ b/tools/lib/perf/Makefile
> @@ -187,7 +187,7 @@ install_lib: libs
>  		$(call do_install_mkdir,$(libdir_SQ)); \
>  		cp -fpR $(LIBPERF_ALL) $(DESTDIR)$(libdir_SQ)
>  
> -HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h
> +HDRS := bpf_perf.h core.h cpumap.h threadmap.h evlist.h evsel.h event.h mmap.h schedstat-cpu-v15.h schedstat-domain-v15.h
>  INTERNAL_HDRS := cpumap.h evlist.h evsel.h lib.h mmap.h rc_check.h threadmap.h xyarray.h
>  
>  INSTALL_HDRS_PFX := $(DESTDIR)$(prefix)/include/perf
> diff --git a/tools/lib/perf/include/perf/event.h b/tools/lib/perf/include/perf/event.h
> index 37bb7771d914..35be296d68d5 100644
> --- a/tools/lib/perf/include/perf/event.h
> +++ b/tools/lib/perf/include/perf/event.h
> @@ -457,6 +457,44 @@ struct perf_record_compressed {
>  	char			 data[];
>  };
>  
> +struct perf_record_schedstat_cpu_v15 {
> +#define CPU_FIELD(_type, _name, _ver)		_type _name;
> +#include "schedstat-cpu-v15.h"
> +#undef CPU_FIELD
> +};
> +
> +struct perf_record_schedstat_cpu {
> +	struct perf_event_header header;
> +	__u16			 version;
> +	__u64			 timestamp;
> +	__u32			 cpu;

Can you change the layout to minimize the paddings?  Probably better to
add an explicit field for unused bits.


> +	union {
> +		struct perf_record_schedstat_cpu_v15 v15;
> +	};
> +};
> +
> +struct perf_record_schedstat_domain_v15 {
> +#define DOMAIN_FIELD(_type, _name, _ver)	_type _name;
> +#include "schedstat-domain-v15.h"
> +#undef DOMAIN_FIELD
> +};
> +
> +#define DOMAIN_NAME_LEN		16
> +
> +struct perf_record_schedstat_domain {
> +	struct perf_event_header header;
> +	__u16			 version;
> +	__u64			 timestamp;
> +	__u32			 cpu;
> +	__u16			 domain;

Ditto.

> +	char			 name[DOMAIN_NAME_LEN];
> +	union {
> +		struct perf_record_schedstat_domain_v15 v15;
> +	};
> +	__u16			 nr_cpus;
> +	__u8			 cpu_mask[];
> +};
> +
>  enum perf_user_event_type { /* above any possible kernel type */
>  	PERF_RECORD_USER_TYPE_START		= 64,
>  	PERF_RECORD_HEADER_ATTR			= 64,
> @@ -478,6 +516,8 @@ enum perf_user_event_type { /* above any possible kernel type */
>  	PERF_RECORD_HEADER_FEATURE		= 80,
>  	PERF_RECORD_COMPRESSED			= 81,
>  	PERF_RECORD_FINISHED_INIT		= 82,
> +	PERF_RECORD_SCHEDSTAT_CPU		= 83,
> +	PERF_RECORD_SCHEDSTAT_DOMAIN		= 84,
>  	PERF_RECORD_HEADER_MAX
>  };
>  
> @@ -518,6 +558,8 @@ union perf_event {
>  	struct perf_record_time_conv		time_conv;
>  	struct perf_record_header_feature	feat;
>  	struct perf_record_compressed		pack;
> +	struct perf_record_schedstat_cpu	schedstat_cpu;
> +	struct perf_record_schedstat_domain	schedstat_domain;
>  };
>  
>  #endif /* __LIBPERF_EVENT_H */
> diff --git a/tools/lib/perf/include/perf/schedstat-cpu-v15.h b/tools/lib/perf/include/perf/schedstat-cpu-v15.h
> new file mode 100644
> index 000000000000..8e4355ee3705
> --- /dev/null
> +++ b/tools/lib/perf/include/perf/schedstat-cpu-v15.h
> @@ -0,0 +1,13 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifdef CPU_FIELD
> +CPU_FIELD(__u32, yld_count, v15)
> +CPU_FIELD(__u32, array_exp, v15)
> +CPU_FIELD(__u32, sched_count, v15)
> +CPU_FIELD(__u32, sched_goidle, v15)
> +CPU_FIELD(__u32, ttwu_count, v15)
> +CPU_FIELD(__u32, ttwu_local, v15)
> +CPU_FIELD(__u64, rq_cpu_time, v15)
> +CPU_FIELD(__u64, run_delay, v15)
> +CPU_FIELD(__u64, pcount, v15)
> +#endif

Can we have a single schedstat.h containing both CPU fields and domain
fields?  You might require users to define the macro always and get rid
of the ifdef condition here.

Also is there any macro magic to handle the version number?  I think you
can have the number only (15; without 'v') and compare with input if
needed..

Thanks,
Namhyung


> diff --git a/tools/lib/perf/include/perf/schedstat-domain-v15.h b/tools/lib/perf/include/perf/schedstat-domain-v15.h
> new file mode 100644
> index 000000000000..422e713d617a
> --- /dev/null
> +++ b/tools/lib/perf/include/perf/schedstat-domain-v15.h
> @@ -0,0 +1,40 @@
> +/* SPDX-License-Identifier: GPL-2.0 */
> +
> +#ifdef DOMAIN_FIELD
> +DOMAIN_FIELD(__u32, idle_lb_count, v15)
> +DOMAIN_FIELD(__u32, idle_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, idle_lb_failed, v15)
> +DOMAIN_FIELD(__u32, idle_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, idle_lb_gained, v15)
> +DOMAIN_FIELD(__u32, idle_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, idle_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, idle_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, busy_lb_count, v15)
> +DOMAIN_FIELD(__u32, busy_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, busy_lb_failed, v15)
> +DOMAIN_FIELD(__u32, busy_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, busy_lb_gained, v15)
> +DOMAIN_FIELD(__u32, busy_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, busy_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, busy_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_count, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_balanced, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_failed, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_imbalance, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_gained, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_hot_gained, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyq, v15)
> +DOMAIN_FIELD(__u32, newidle_lb_nobusyg, v15)
> +DOMAIN_FIELD(__u32, alb_count, v15)
> +DOMAIN_FIELD(__u32, alb_failed, v15)
> +DOMAIN_FIELD(__u32, alb_pushed, v15)
> +DOMAIN_FIELD(__u32, sbe_count, v15)
> +DOMAIN_FIELD(__u32, sbe_balanced, v15)
> +DOMAIN_FIELD(__u32, sbe_pushed, v15)
> +DOMAIN_FIELD(__u32, sbf_count, v15)
> +DOMAIN_FIELD(__u32, sbf_balanced, v15)
> +DOMAIN_FIELD(__u32, sbf_pushed, v15)
> +DOMAIN_FIELD(__u32, ttwu_wake_remote, v15)
> +DOMAIN_FIELD(__u32, ttwu_move_affine, v15)
> +DOMAIN_FIELD(__u32, ttwu_move_balance, v15)
> +#endif /* DOMAIN_FIELD */

  parent reply	other threads:[~2024-09-26  6:12 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2024-09-16 16:47 [PATCH 0/5] perf sched: Introduce stats tool Ravi Bangoria
2024-09-16 16:47 ` [PATCH 1/5] sched/stats: Print domain name in /proc/schedstat Ravi Bangoria
2024-09-16 16:47 ` [PATCH 2/5] perf sched stats: Add record and rawdump support Ravi Bangoria
2024-09-17 10:35   ` James Clark
2024-09-18  8:52     ` Sapkal, Swapnil
2024-09-26  6:12   ` Namhyung Kim [this message]
2024-09-27 11:04     ` Sapkal, Swapnil
2024-09-16 16:47 ` [PATCH 3/5] perf sched stats: Add schedstat v16 support Ravi Bangoria
2024-09-26  6:14   ` Namhyung Kim
2024-09-27 11:08     ` Sapkal, Swapnil
2024-09-16 16:47 ` [PATCH 4/5] perf sched stats: Add support for report subcommand Ravi Bangoria
2024-09-16 16:47 ` [PATCH 5/5] perf sched stats: Add support for live mode Ravi Bangoria
2024-09-17 10:35 ` [PATCH 0/5] perf sched: Introduce stats tool James Clark
2024-09-18 13:19   ` Sapkal, Swapnil
2024-09-17 10:57 ` Madadi Vineeth Reddy
2024-09-18  8:43   ` Sapkal, Swapnil
2024-09-18  8:45   ` Sapkal, Swapnil

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=ZvT7VkSUpNFKfqge@google.com \
    --to=namhyung@kernel.org \
    --cc=acme@kernel.org \
    --cc=adrian.hunter@intel.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=ananth.narayan@amd.com \
    --cc=bristot@redhat.com \
    --cc=gautham.shenoy@amd.com \
    --cc=irogers@google.com \
    --cc=james.clark@arm.com \
    --cc=jolsa@kernel.org \
    --cc=juri.lelli@redhat.com \
    --cc=kan.liang@linux.intel.com \
    --cc=kprateek.nayak@amd.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mark.rutland@arm.com \
    --cc=mingo@redhat.com \
    --cc=peterz@infradead.org \
    --cc=ravi.bangoria@amd.com \
    --cc=rostedt@goodmis.org \
    --cc=sandipan.das@amd.com \
    --cc=santosh.shukla@amd.com \
    --cc=swapnil.sapkal@amd.com \
    --cc=tj@kernel.org \
    --cc=vincent.guittot@linaro.org \
    --cc=void@manifault.com \
    --cc=yangjihong@bytedance.com \
    --cc=yu.c.chen@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox