* [PATCH 1/8] perf stat record: Keep sample_type 0 for pipe session
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
@ 2015-12-22 15:43 ` Jiri Olsa
2015-12-22 15:43 ` [PATCH 2/8] perf script: Process cpu/threads maps Jiri Olsa
` (7 subsequent siblings)
8 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: lkml, David Ahern, Ingo Molnar, Namhyung Kim, Peter Zijlstra,
Liang, Kan
For pipe sessions we need to keep sample_type zero,
because script's perf_evsel__check_attr is triggered
by sample_type != 0, and the check would fail on
stat session.
I was tempted to keep it zero unconditionally, but the
pipe session is sufficient. In perf.data session we
are guarded by HEADER_STAT feature.
Link: http://lkml.kernel.org/n/tip-77k0kpd1c9beill47ceo8fp3@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/builtin-stat.c | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 9805e03ab163..7f568244662b 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -184,11 +184,18 @@ static int create_perf_stat_counter(struct perf_evsel *evsel)
* like tracepoints. Clear it up for counting.
*/
attr->sample_period = 0;
+
/*
* But set sample_type to PERF_SAMPLE_IDENTIFIER, which should be harmless
* while avoiding that older tools show confusing messages.
+ *
+ * However for pipe sessions we need to keep it zero,
+ * because script's perf_evsel__check_attr is triggered
+ * by attr->sample_type != 0, and we can't run it on
+ * stat sessions.
*/
- attr->sample_type = PERF_SAMPLE_IDENTIFIER;
+ if (!(STAT_RECORD && perf_stat.file.is_pipe))
+ attr->sample_type = PERF_SAMPLE_IDENTIFIER;
/*
* Disabling all counters initially, they will be enabled
--
2.4.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 2/8] perf script: Process cpu/threads maps
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
2015-12-22 15:43 ` [PATCH 1/8] perf stat record: Keep sample_type 0 for pipe session Jiri Olsa
@ 2015-12-22 15:43 ` Jiri Olsa
2015-12-22 15:43 ` [PATCH 3/8] perf script: Process stat config event Jiri Olsa
` (6 subsequent siblings)
8 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: lkml, David Ahern, Ingo Molnar, Namhyung Kim, Peter Zijlstra,
Liang, Kan
Adding processing of cpu/threads maps. Configuring session's
evlist with these maps.
Tested-by: Kan Liang <kan.liang@intel.com>
Link: http://lkml.kernel.org/n/tip-s3txa1u2qv3bi8uspp4hi7al@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/builtin-script.c | 67 +++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 67 insertions(+)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index bcc3542d9df5..aa6d7cf87dab 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -18,7 +18,11 @@
#include "util/sort.h"
#include "util/data.h"
#include "util/auxtrace.h"
+#include "util/cpumap.h"
+#include "util/thread_map.h"
+#include "util/stat.h"
#include <linux/bitmap.h>
+#include "asm/bug.h"
static char const *script_name;
static char const *generate_script_lang;
@@ -606,6 +610,9 @@ struct perf_script {
bool show_task_events;
bool show_mmap_events;
bool show_switch_events;
+ bool allocated;
+ struct cpu_map *cpus;
+ struct thread_map *threads;
};
static void process_event(struct perf_script *script __maybe_unused, union perf_event *event,
@@ -1682,6 +1689,63 @@ static void script__setup_sample_type(struct perf_script *script)
}
}
+static int set_maps(struct perf_script *script)
+{
+ struct perf_evlist *evlist = script->session->evlist;
+
+ if (!script->cpus || !script->threads)
+ return 0;
+
+ if (WARN_ONCE(script->allocated, "stats double allocation\n"))
+ return -EINVAL;
+
+ perf_evlist__set_maps(evlist, script->cpus, script->threads);
+
+ if (perf_evlist__alloc_stats(evlist, true))
+ return -ENOMEM;
+
+ script->allocated = true;
+ return 0;
+}
+
+static
+int process_thread_map_event(struct perf_tool *tool,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ if (script->threads) {
+ pr_warning("Extra thread map event, ignoring.\n");
+ return 0;
+ }
+
+ script->threads = thread_map__new_event(&event->thread_map);
+ if (!script->threads)
+ return -ENOMEM;
+
+ return set_maps(script);
+}
+
+static
+int process_cpu_map_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ struct perf_script *script = container_of(tool, struct perf_script, tool);
+
+ if (script->cpus) {
+ pr_warning("Extra cpu map event, ignoring.\n");
+ return 0;
+ }
+
+ script->cpus = cpu_map__new_data(&event->cpu_map.data);
+ if (!script->cpus)
+ return -ENOMEM;
+
+ return set_maps(script);
+}
+
int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
{
bool show_full_info = false;
@@ -1710,6 +1774,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.auxtrace_error = perf_event__process_auxtrace_error,
+ .thread_map = process_thread_map_event,
+ .cpu_map = process_cpu_map_event,
.ordered_events = true,
.ordering_requires_timestamps = true,
},
@@ -2063,6 +2129,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
flush_scripting();
out_delete:
+ perf_evlist__free_stats(session->evlist);
perf_session__delete(session);
if (script_started)
--
2.4.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 3/8] perf script: Process stat config event
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
2015-12-22 15:43 ` [PATCH 1/8] perf stat record: Keep sample_type 0 for pipe session Jiri Olsa
2015-12-22 15:43 ` [PATCH 2/8] perf script: Process cpu/threads maps Jiri Olsa
@ 2015-12-22 15:43 ` Jiri Olsa
2015-12-22 15:43 ` [PATCH 4/8] perf script: Add process_stat/process_stat_interval scripting interface Jiri Olsa
` (5 subsequent siblings)
8 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: lkml, David Ahern, Ingo Molnar, Namhyung Kim, Peter Zijlstra,
Liang, Kan
Adding processing of stat config event and initialize
stat_config object.
Tested-by: Kan Liang <kan.liang@intel.com>
Link: http://lkml.kernel.org/n/tip-1m1s8mahzq38foo32qb7p2a5@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/builtin-script.c | 10 ++++++++++
1 file changed, 10 insertions(+)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index aa6d7cf87dab..a90bc0b81e70 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -36,6 +36,7 @@ static bool print_flags;
static bool nanosecs;
static const char *cpu_list;
static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
+static struct perf_stat_config stat_config;
unsigned int scripting_max_stack = PERF_MAX_STACK_DEPTH;
@@ -1689,6 +1690,14 @@ static void script__setup_sample_type(struct perf_script *script)
}
}
+static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session __maybe_unused)
+{
+ perf_event__read_stat_config(&stat_config, &event->stat_config);
+ return 0;
+}
+
static int set_maps(struct perf_script *script)
{
struct perf_evlist *evlist = script->session->evlist;
@@ -1774,6 +1783,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.auxtrace_error = perf_event__process_auxtrace_error,
+ .stat_config = process_stat_config_event,
.thread_map = process_thread_map_event,
.cpu_map = process_cpu_map_event,
.ordered_events = true,
--
2.4.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 4/8] perf script: Add process_stat/process_stat_interval scripting interface
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
` (2 preceding siblings ...)
2015-12-22 15:43 ` [PATCH 3/8] perf script: Process stat config event Jiri Olsa
@ 2015-12-22 15:43 ` Jiri Olsa
2015-12-22 15:43 ` [PATCH 5/8] perf script: Add stat default handlers Jiri Olsa
` (4 subsequent siblings)
8 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: lkml, David Ahern, Ingo Molnar, Namhyung Kim, Peter Zijlstra,
Liang, Kan
Python and perl scripting code will define those
callbacks and get stat data.
Tested-by: Kan Liang <kan.liang@intel.com>
Link: http://lkml.kernel.org/n/tip-6802z3siu5f59wdsmhaym3wm@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/util/trace-event.h | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h
index b85ee55cca0c..0ebc9dab2c7c 100644
--- a/tools/perf/util/trace-event.h
+++ b/tools/perf/util/trace-event.h
@@ -65,6 +65,7 @@ int tracing_data_put(struct tracing_data *tdata);
struct addr_location;
struct perf_session;
+struct perf_stat_config;
struct scripting_ops {
const char *name;
@@ -75,6 +76,9 @@ struct scripting_ops {
struct perf_sample *sample,
struct perf_evsel *evsel,
struct addr_location *al);
+ void (*process_stat) (struct perf_stat_config *config,
+ struct perf_evsel *evsel, u64 time);
+ void (*process_stat_interval) (u64 time);
int (*generate_script) (struct pevent *pevent, const char *outfile);
};
--
2.4.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 5/8] perf script: Add stat default handlers
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
` (3 preceding siblings ...)
2015-12-22 15:43 ` [PATCH 4/8] perf script: Add process_stat/process_stat_interval scripting interface Jiri Olsa
@ 2015-12-22 15:43 ` Jiri Olsa
2015-12-23 13:40 ` Namhyung Kim
2015-12-22 15:43 ` [PATCH 6/8] perf script: Display stat events by default Jiri Olsa
` (3 subsequent siblings)
8 siblings, 1 reply; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: lkml, David Ahern, Ingo Molnar, Namhyung Kim, Peter Zijlstra,
Liang, Kan
Implement struct scripting_ops::(process_stat|process_stat_interval)
handlers - calling scripting handlers from stat events handlers.
Tested-by: Kan Liang <kan.liang@intel.com>
Link: http://lkml.kernel.org/n/tip-3iu6vmvuur1bntbnh43v3bib@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/builtin-script.c | 33 +++++++++++++++++++++++++++++++++
1 file changed, 33 insertions(+)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index a90bc0b81e70..656541eff151 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -221,6 +221,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
struct perf_event_attr *attr = &evsel->attr;
bool allow_user_set;
+ if (perf_header__has_feat(&session->header, HEADER_STAT))
+ return 0;
+
allow_user_set = perf_header__has_feat(&session->header,
HEADER_AUXTRACE);
@@ -674,6 +677,18 @@ static void process_event(struct perf_script *script __maybe_unused, union perf_
static struct scripting_ops *scripting_ops;
+static void process_stat(struct perf_evsel *counter, u64 time)
+{
+ if (scripting_ops)
+ scripting_ops->process_stat(&stat_config, counter, time);
+}
+
+static void process_stat_interval(u64 time)
+{
+ if (scripting_ops)
+ scripting_ops->process_stat_interval(time);
+}
+
static void setup_scripting(void)
{
setup_perl_scripting();
@@ -1690,6 +1705,22 @@ static void script__setup_sample_type(struct perf_script *script)
}
}
+static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
+ union perf_event *event,
+ struct perf_session *session)
+{
+ struct stat_round_event *round = &event->stat_round;
+ struct perf_evsel *counter;
+
+ evlist__for_each(session->evlist, counter) {
+ perf_stat_process_counter(&stat_config, counter);
+ process_stat(counter, round->time);
+ }
+
+ process_stat_interval(round->time);
+ return 0;
+}
+
static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
union perf_event *event,
struct perf_session *session __maybe_unused)
@@ -1783,6 +1814,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
.auxtrace_info = perf_event__process_auxtrace_info,
.auxtrace = perf_event__process_auxtrace,
.auxtrace_error = perf_event__process_auxtrace_error,
+ .stat = perf_event__process_stat_event,
+ .stat_round = process_stat_round_event,
.stat_config = process_stat_config_event,
.thread_map = process_thread_map_event,
.cpu_map = process_cpu_map_event,
--
2.4.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH 5/8] perf script: Add stat default handlers
2015-12-22 15:43 ` [PATCH 5/8] perf script: Add stat default handlers Jiri Olsa
@ 2015-12-23 13:40 ` Namhyung Kim
2015-12-23 16:01 ` Jiri Olsa
0 siblings, 1 reply; 17+ messages in thread
From: Namhyung Kim @ 2015-12-23 13:40 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, lkml, David Ahern, Ingo Molnar,
Peter Zijlstra, Liang, Kan
On Tue, Dec 22, 2015 at 04:43:31PM +0100, Jiri Olsa wrote:
> Implement struct scripting_ops::(process_stat|process_stat_interval)
> handlers - calling scripting handlers from stat events handlers.
>
> Tested-by: Kan Liang <kan.liang@intel.com>
> Link: http://lkml.kernel.org/n/tip-3iu6vmvuur1bntbnh43v3bib@git.kernel.org
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
> tools/perf/builtin-script.c | 33 +++++++++++++++++++++++++++++++++
> 1 file changed, 33 insertions(+)
>
> diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> index a90bc0b81e70..656541eff151 100644
> --- a/tools/perf/builtin-script.c
> +++ b/tools/perf/builtin-script.c
> @@ -221,6 +221,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
> struct perf_event_attr *attr = &evsel->attr;
> bool allow_user_set;
>
> + if (perf_header__has_feat(&session->header, HEADER_STAT))
> + return 0;
> +
> allow_user_set = perf_header__has_feat(&session->header,
> HEADER_AUXTRACE);
>
> @@ -674,6 +677,18 @@ static void process_event(struct perf_script *script __maybe_unused, union perf_
>
> static struct scripting_ops *scripting_ops;
>
> +static void process_stat(struct perf_evsel *counter, u64 time)
> +{
> + if (scripting_ops)
> + scripting_ops->process_stat(&stat_config, counter, time);
Shouldn't it be
if (scripting_ops && scripting_ops->process_stat)
? It seems you only set it for python in patch 7..
> +}
> +
> +static void process_stat_interval(u64 time)
> +{
> + if (scripting_ops)
> + scripting_ops->process_stat_interval(time);
Ditto.
Thanks,
Namhyung
> +}
> +
> static void setup_scripting(void)
> {
> setup_perl_scripting();
> @@ -1690,6 +1705,22 @@ static void script__setup_sample_type(struct perf_script *script)
> }
> }
>
> +static int process_stat_round_event(struct perf_tool *tool __maybe_unused,
> + union perf_event *event,
> + struct perf_session *session)
> +{
> + struct stat_round_event *round = &event->stat_round;
> + struct perf_evsel *counter;
> +
> + evlist__for_each(session->evlist, counter) {
> + perf_stat_process_counter(&stat_config, counter);
> + process_stat(counter, round->time);
> + }
> +
> + process_stat_interval(round->time);
> + return 0;
> +}
> +
> static int process_stat_config_event(struct perf_tool *tool __maybe_unused,
> union perf_event *event,
> struct perf_session *session __maybe_unused)
> @@ -1783,6 +1814,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused)
> .auxtrace_info = perf_event__process_auxtrace_info,
> .auxtrace = perf_event__process_auxtrace,
> .auxtrace_error = perf_event__process_auxtrace_error,
> + .stat = perf_event__process_stat_event,
> + .stat_round = process_stat_round_event,
> .stat_config = process_stat_config_event,
> .thread_map = process_thread_map_event,
> .cpu_map = process_cpu_map_event,
> --
> 2.4.3
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH 5/8] perf script: Add stat default handlers
2015-12-23 13:40 ` Namhyung Kim
@ 2015-12-23 16:01 ` Jiri Olsa
0 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-23 16:01 UTC (permalink / raw)
To: Namhyung Kim
Cc: Jiri Olsa, Arnaldo Carvalho de Melo, lkml, David Ahern,
Ingo Molnar, Peter Zijlstra, Liang, Kan
On Wed, Dec 23, 2015 at 10:40:38PM +0900, Namhyung Kim wrote:
> On Tue, Dec 22, 2015 at 04:43:31PM +0100, Jiri Olsa wrote:
> > Implement struct scripting_ops::(process_stat|process_stat_interval)
> > handlers - calling scripting handlers from stat events handlers.
> >
> > Tested-by: Kan Liang <kan.liang@intel.com>
> > Link: http://lkml.kernel.org/n/tip-3iu6vmvuur1bntbnh43v3bib@git.kernel.org
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> > tools/perf/builtin-script.c | 33 +++++++++++++++++++++++++++++++++
> > 1 file changed, 33 insertions(+)
> >
> > diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
> > index a90bc0b81e70..656541eff151 100644
> > --- a/tools/perf/builtin-script.c
> > +++ b/tools/perf/builtin-script.c
> > @@ -221,6 +221,9 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel,
> > struct perf_event_attr *attr = &evsel->attr;
> > bool allow_user_set;
> >
> > + if (perf_header__has_feat(&session->header, HEADER_STAT))
> > + return 0;
> > +
> > allow_user_set = perf_header__has_feat(&session->header,
> > HEADER_AUXTRACE);
> >
> > @@ -674,6 +677,18 @@ static void process_event(struct perf_script *script __maybe_unused, union perf_
> >
> > static struct scripting_ops *scripting_ops;
> >
> > +static void process_stat(struct perf_evsel *counter, u64 time)
> > +{
> > + if (scripting_ops)
> > + scripting_ops->process_stat(&stat_config, counter, time);
>
> Shouldn't it be
>
> if (scripting_ops && scripting_ops->process_stat)
>
> ? It seems you only set it for python in patch 7..
right.. forgot about perf completely..
thanks,
jirka
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 6/8] perf script: Display stat events by default
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
` (4 preceding siblings ...)
2015-12-22 15:43 ` [PATCH 5/8] perf script: Add stat default handlers Jiri Olsa
@ 2015-12-22 15:43 ` Jiri Olsa
2015-12-22 15:43 ` [PATCH 7/8] perf script: Add python support for stat events Jiri Olsa
` (2 subsequent siblings)
8 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: lkml, David Ahern, Ingo Molnar, Namhyung Kim, Peter Zijlstra,
Liang, Kan
If no script is specified for stat data, display
stat events in raw form.
$ perf stat record ls
SNIP
Performance counter stats for 'ls':
0.851585 task-clock (msec) # 0.717 CPUs utilized
0 context-switches # 0.000 K/sec
0 cpu-migrations # 0.000 K/sec
114 page-faults # 0.134 M/sec
2,620,918 cycles # 3.078 GHz
<not supported> stalled-cycles-frontend
<not supported> stalled-cycles-backend
2,714,111 instructions # 1.04 insns per cycle
542,434 branches # 636.970 M/sec
15,946 branch-misses # 2.94% of all branches
0.001186954 seconds time elapsed
$ perf script
CPU THREAD VAL ENA RUN TIME EVENT
-1 26185 851585 851585 851585 1186954 task-clock
-1 26185 0 851585 851585 1186954 context-switches
-1 26185 0 851585 851585 1186954 cpu-migrations
-1 26185 114 851585 851585 1186954 page-faults
-1 26185 2620918 853340 853340 1186954 cycles
-1 26185 0 0 0 1186954 stalled-cycles-frontend
-1 26185 0 0 0 1186954 stalled-cycles-backend
-1 26185 2714111 853340 853340 1186954 instructions
-1 26185 542434 853340 853340 1186954 branches
-1 26185 15946 853340 853340 1186954 branch-misses
Tested-by: Kan Liang <kan.liang@intel.com>
Link: http://lkml.kernel.org/n/tip-ph7bpnetmskvmietfwllf6i6@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/builtin-script.c | 36 ++++++++++++++++++++++++++++++++++++
1 file changed, 36 insertions(+)
diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c
index 656541eff151..bf734ad7165f 100644
--- a/tools/perf/builtin-script.c
+++ b/tools/perf/builtin-script.c
@@ -677,10 +677,46 @@ static void process_event(struct perf_script *script __maybe_unused, union perf_
static struct scripting_ops *scripting_ops;
+static void __process_stat(struct perf_evsel *counter, u64 time)
+{
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = perf_evsel__nr_cpus(counter);
+ int cpu, thread;
+ static int header_printed;
+
+ if (counter->system_wide)
+ nthreads = 1;
+
+ if (!header_printed) {
+ printf("%3s %8s %15s %15s %15s %15s %s\n",
+ "CPU", "THREAD", "VAL", "ENA", "RUN", "TIME", "EVENT");
+ header_printed = 1;
+ }
+
+ for (thread = 0; thread < nthreads; thread++) {
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ struct perf_counts_values *counts;
+
+ counts = perf_counts(counter->counts, cpu, thread);
+
+ printf("%3d %8d %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %15" PRIu64 " %s\n",
+ counter->cpus->map[cpu],
+ thread_map__pid(counter->threads, thread),
+ counts->val,
+ counts->ena,
+ counts->run,
+ time,
+ perf_evsel__name(counter));
+ }
+ }
+}
+
static void process_stat(struct perf_evsel *counter, u64 time)
{
if (scripting_ops)
scripting_ops->process_stat(&stat_config, counter, time);
+ else
+ __process_stat(counter, time);
}
static void process_stat_interval(u64 time)
--
2.4.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* [PATCH 7/8] perf script: Add python support for stat events
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
` (5 preceding siblings ...)
2015-12-22 15:43 ` [PATCH 6/8] perf script: Display stat events by default Jiri Olsa
@ 2015-12-22 15:43 ` Jiri Olsa
2015-12-23 13:42 ` Namhyung Kim
2015-12-22 15:43 ` [PATCH 8/8] perf script: Add stat-cpi.py script Jiri Olsa
2015-12-22 15:53 ` [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
8 siblings, 1 reply; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: lkml, David Ahern, Ingo Molnar, Namhyung Kim, Peter Zijlstra,
Liang, Kan
Add support to get stat events data in perf python scripts.
The python script shall implement following
new interface to process stat data:
def stat__<event_name>_[<modifier>](cpu, thread, time, val, ena, run):
- is called for every stat event for given counter,
if user monitors 'cycles,instructions:u" following
callbacks should be defined:
def stat__cycles(cpu, thread, time, val, ena, run):
def stat__instructions_u(cpu, thread, time, val, ena, run):
def stat__interval(time):
- is called for every interval with its time,
in non interval mode it's called after last
stat event with total measured time in ns
The rest of the current interface stays untouched..
Please check example CPI metrics script in following patch
with command line examples in changelogs.
Tested-by: Kan Liang <kan.liang@intel.com>
Link: http://lkml.kernel.org/n/tip-jojiaelyckrw6040wqc06q1j@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
.../util/scripting-engines/trace-event-python.c | 114 +++++++++++++++++++--
1 file changed, 108 insertions(+), 6 deletions(-)
diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
index a8e825fca42a..8436eb23eb16 100644
--- a/tools/perf/util/scripting-engines/trace-event-python.c
+++ b/tools/perf/util/scripting-engines/trace-event-python.c
@@ -41,6 +41,9 @@
#include "../thread-stack.h"
#include "../trace-event.h"
#include "../machine.h"
+#include "thread_map.h"
+#include "cpumap.h"
+#include "stat.h"
PyMODINIT_FUNC initperf_trace_context(void);
@@ -859,6 +862,103 @@ static void python_process_event(union perf_event *event,
}
}
+static void get_handler_name(char *str, size_t size,
+ struct perf_evsel *evsel)
+{
+ char *p = str;
+
+ scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
+
+ while ((p = strchr(p, ':'))) {
+ *p = '_';
+ p++;
+ }
+}
+
+static void
+process_stat(struct perf_evsel *counter, int cpu, int thread, u64 time,
+ struct perf_counts_values *count)
+{
+ PyObject *handler, *t;
+ static char handler_name[256];
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ get_handler_name(handler_name, sizeof(handler_name),
+ counter);
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
+ PyTuple_SetItem(t, n++, PyInt_FromLong(thread));
+ PyTuple_SetItem(t, n++, PyLong_FromLong(time));
+ PyTuple_SetItem(t, n++, PyLong_FromLong(count->val));
+ PyTuple_SetItem(t, n++, PyLong_FromLong(count->ena));
+ PyTuple_SetItem(t, n++, PyLong_FromLong(count->run));
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
+static void python_process_stat(struct perf_stat_config *config,
+ struct perf_evsel *counter, u64 time)
+{
+ struct thread_map *threads = counter->threads;
+ struct cpu_map *cpus = counter->cpus;
+ int cpu, thread;
+
+ if (config->aggr_mode == AGGR_GLOBAL) {
+ process_stat(counter, -1, -1, time,
+ &counter->counts->aggr);
+ return;
+ }
+
+ for (thread = 0; thread < threads->nr; thread++) {
+ for (cpu = 0; cpu < cpus->nr; cpu++) {
+ process_stat(counter, cpus->map[cpu],
+ thread_map__pid(threads, thread), time,
+ perf_counts(counter->counts, cpu, thread));
+ }
+ }
+}
+
+static void python_process_stat_interval(u64 time)
+{
+ PyObject *handler, *t;
+ static const char handler_name[] = "stat__interval";
+ int n = 0;
+
+ t = PyTuple_New(MAX_FIELDS);
+ if (!t)
+ Py_FatalError("couldn't create Python tuple");
+
+ handler = get_handler(handler_name);
+ if (!handler) {
+ pr_debug("can't find python handler %s\n", handler_name);
+ return;
+ }
+
+ PyTuple_SetItem(t, n++, PyLong_FromLong(time));
+
+ if (_PyTuple_Resize(&t, n) == -1)
+ Py_FatalError("error resizing Python tuple");
+
+ call_object(handler, t, handler_name);
+
+ Py_DECREF(t);
+}
+
static int run_start_sub(void)
{
main_module = PyImport_AddModule("__main__");
@@ -1201,10 +1301,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
}
struct scripting_ops python_scripting_ops = {
- .name = "Python",
- .start_script = python_start_script,
- .flush_script = python_flush_script,
- .stop_script = python_stop_script,
- .process_event = python_process_event,
- .generate_script = python_generate_script,
+ .name = "Python",
+ .start_script = python_start_script,
+ .flush_script = python_flush_script,
+ .stop_script = python_stop_script,
+ .process_event = python_process_event,
+ .process_stat = python_process_stat,
+ .process_stat_interval = python_process_stat_interval,
+ .generate_script = python_generate_script,
};
--
2.4.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH 7/8] perf script: Add python support for stat events
2015-12-22 15:43 ` [PATCH 7/8] perf script: Add python support for stat events Jiri Olsa
@ 2015-12-23 13:42 ` Namhyung Kim
2015-12-23 16:04 ` Jiri Olsa
0 siblings, 1 reply; 17+ messages in thread
From: Namhyung Kim @ 2015-12-23 13:42 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, lkml, David Ahern, Ingo Molnar,
Peter Zijlstra, Liang, Kan
On Tue, Dec 22, 2015 at 04:43:33PM +0100, Jiri Olsa wrote:
> Add support to get stat events data in perf python scripts.
>
> The python script shall implement following
> new interface to process stat data:
>
> def stat__<event_name>_[<modifier>](cpu, thread, time, val, ena, run):
>
> - is called for every stat event for given counter,
> if user monitors 'cycles,instructions:u" following
> callbacks should be defined:
>
> def stat__cycles(cpu, thread, time, val, ena, run):
> def stat__instructions_u(cpu, thread, time, val, ena, run):
>
> def stat__interval(time):
>
> - is called for every interval with its time,
> in non interval mode it's called after last
> stat event with total measured time in ns
>
> The rest of the current interface stays untouched..
>
> Please check example CPI metrics script in following patch
> with command line examples in changelogs.
>
> Tested-by: Kan Liang <kan.liang@intel.com>
> Link: http://lkml.kernel.org/n/tip-jojiaelyckrw6040wqc06q1j@git.kernel.org
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
> .../util/scripting-engines/trace-event-python.c | 114 +++++++++++++++++++--
> 1 file changed, 108 insertions(+), 6 deletions(-)
>
> diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c
> index a8e825fca42a..8436eb23eb16 100644
> --- a/tools/perf/util/scripting-engines/trace-event-python.c
> +++ b/tools/perf/util/scripting-engines/trace-event-python.c
> @@ -41,6 +41,9 @@
> #include "../thread-stack.h"
> #include "../trace-event.h"
> #include "../machine.h"
> +#include "thread_map.h"
> +#include "cpumap.h"
> +#include "stat.h"
>
> PyMODINIT_FUNC initperf_trace_context(void);
>
> @@ -859,6 +862,103 @@ static void python_process_event(union perf_event *event,
> }
> }
>
> +static void get_handler_name(char *str, size_t size,
> + struct perf_evsel *evsel)
> +{
> + char *p = str;
> +
> + scnprintf(str, size, "stat__%s", perf_evsel__name(evsel));
> +
> + while ((p = strchr(p, ':'))) {
> + *p = '_';
> + p++;
> + }
> +}
> +
> +static void
> +process_stat(struct perf_evsel *counter, int cpu, int thread, u64 time,
> + struct perf_counts_values *count)
> +{
> + PyObject *handler, *t;
> + static char handler_name[256];
> + int n = 0;
> +
> + t = PyTuple_New(MAX_FIELDS);
> + if (!t)
> + Py_FatalError("couldn't create Python tuple");
> +
> + get_handler_name(handler_name, sizeof(handler_name),
> + counter);
> +
> + handler = get_handler(handler_name);
> + if (!handler) {
> + pr_debug("can't find python handler %s\n", handler_name);
> + return;
> + }
> +
> + PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
> + PyTuple_SetItem(t, n++, PyInt_FromLong(thread));
> + PyTuple_SetItem(t, n++, PyLong_FromLong(time));
> + PyTuple_SetItem(t, n++, PyLong_FromLong(count->val));
> + PyTuple_SetItem(t, n++, PyLong_FromLong(count->ena));
> + PyTuple_SetItem(t, n++, PyLong_FromLong(count->run));
What about 32-bit systems? It seems the PyLong_FromLong() takes long
but the counts are u64.
> +
> + if (_PyTuple_Resize(&t, n) == -1)
> + Py_FatalError("error resizing Python tuple");
> +
> + call_object(handler, t, handler_name);
> +
> + Py_DECREF(t);
> +}
> +
> +static void python_process_stat(struct perf_stat_config *config,
> + struct perf_evsel *counter, u64 time)
> +{
> + struct thread_map *threads = counter->threads;
> + struct cpu_map *cpus = counter->cpus;
> + int cpu, thread;
> +
> + if (config->aggr_mode == AGGR_GLOBAL) {
> + process_stat(counter, -1, -1, time,
> + &counter->counts->aggr);
> + return;
> + }
> +
> + for (thread = 0; thread < threads->nr; thread++) {
> + for (cpu = 0; cpu < cpus->nr; cpu++) {
> + process_stat(counter, cpus->map[cpu],
> + thread_map__pid(threads, thread), time,
> + perf_counts(counter->counts, cpu, thread));
> + }
> + }
> +}
> +
> +static void python_process_stat_interval(u64 time)
> +{
> + PyObject *handler, *t;
> + static const char handler_name[] = "stat__interval";
> + int n = 0;
> +
> + t = PyTuple_New(MAX_FIELDS);
> + if (!t)
> + Py_FatalError("couldn't create Python tuple");
> +
> + handler = get_handler(handler_name);
> + if (!handler) {
> + pr_debug("can't find python handler %s\n", handler_name);
> + return;
> + }
> +
> + PyTuple_SetItem(t, n++, PyLong_FromLong(time));
Ditto.
Thanks,
Namhyung
> +
> + if (_PyTuple_Resize(&t, n) == -1)
> + Py_FatalError("error resizing Python tuple");
> +
> + call_object(handler, t, handler_name);
> +
> + Py_DECREF(t);
> +}
> +
> static int run_start_sub(void)
> {
> main_module = PyImport_AddModule("__main__");
> @@ -1201,10 +1301,12 @@ static int python_generate_script(struct pevent *pevent, const char *outfile)
> }
>
> struct scripting_ops python_scripting_ops = {
> - .name = "Python",
> - .start_script = python_start_script,
> - .flush_script = python_flush_script,
> - .stop_script = python_stop_script,
> - .process_event = python_process_event,
> - .generate_script = python_generate_script,
> + .name = "Python",
> + .start_script = python_start_script,
> + .flush_script = python_flush_script,
> + .stop_script = python_stop_script,
> + .process_event = python_process_event,
> + .process_stat = python_process_stat,
> + .process_stat_interval = python_process_stat_interval,
> + .generate_script = python_generate_script,
> };
> --
> 2.4.3
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH 7/8] perf script: Add python support for stat events
2015-12-23 13:42 ` Namhyung Kim
@ 2015-12-23 16:04 ` Jiri Olsa
0 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-23 16:04 UTC (permalink / raw)
To: Namhyung Kim
Cc: Jiri Olsa, Arnaldo Carvalho de Melo, lkml, David Ahern,
Ingo Molnar, Peter Zijlstra, Liang, Kan
On Wed, Dec 23, 2015 at 10:42:47PM +0900, Namhyung Kim wrote:
SNIP
> > +static void
> > +process_stat(struct perf_evsel *counter, int cpu, int thread, u64 time,
> > + struct perf_counts_values *count)
> > +{
> > + PyObject *handler, *t;
> > + static char handler_name[256];
> > + int n = 0;
> > +
> > + t = PyTuple_New(MAX_FIELDS);
> > + if (!t)
> > + Py_FatalError("couldn't create Python tuple");
> > +
> > + get_handler_name(handler_name, sizeof(handler_name),
> > + counter);
> > +
> > + handler = get_handler(handler_name);
> > + if (!handler) {
> > + pr_debug("can't find python handler %s\n", handler_name);
> > + return;
> > + }
> > +
> > + PyTuple_SetItem(t, n++, PyInt_FromLong(cpu));
> > + PyTuple_SetItem(t, n++, PyInt_FromLong(thread));
> > + PyTuple_SetItem(t, n++, PyLong_FromLong(time));
> > + PyTuple_SetItem(t, n++, PyLong_FromLong(count->val));
> > + PyTuple_SetItem(t, n++, PyLong_FromLong(count->ena));
> > + PyTuple_SetItem(t, n++, PyLong_FromLong(count->run));
>
> What about 32-bit systems? It seems the PyLong_FromLong() takes long
> but the counts are u64.
hum, if it does I think we'll have problems in other
parts of python code.. I'll check, thanks
jirka
^ permalink raw reply [flat|nested] 17+ messages in thread
* [PATCH 8/8] perf script: Add stat-cpi.py script
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
` (6 preceding siblings ...)
2015-12-22 15:43 ` [PATCH 7/8] perf script: Add python support for stat events Jiri Olsa
@ 2015-12-22 15:43 ` Jiri Olsa
2015-12-23 13:44 ` Namhyung Kim
2015-12-22 15:53 ` [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
8 siblings, 1 reply; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:43 UTC (permalink / raw)
To: Arnaldo Carvalho de Melo
Cc: lkml, David Ahern, Ingo Molnar, Namhyung Kim, Peter Zijlstra,
Liang, Kan
Adding stat-cpi.py as an example of how to do stat scripting.
It computes the CPI metrics from cycles and instructions
events.
Following stat record/report/script combinations could be used:
- get CPI for given workload
$ perf stat -e cycles,instructions record ls
SNIP
Performance counter stats for 'ls':
2,904,431 cycles
3,346,878 instructions # 1.15 insns per cycle
0.001782686 seconds time elapsed
$ perf script -s ./scripts/python/stat-cpi.py
0.001783: cpu -1, thread -1 -> cpi 0.867803 (2904431/3346878)
$ perf stat -e cycles,instructions record ls | perf script -s ./scripts/python/stat-cpi.py
SNIP
0.001730: cpu -1, thread -1 -> cpi 0.869026 (2928292/3369627)
- get CPI systemwide:
$ perf stat -e cycles,instructions -a -I 1000 record sleep 3
# time counts unit events
1.000158618 594,274,711 cycles (100.00%)
1.000158618 441,898,250 instructions
2.000350973 567,649,705 cycles (100.00%)
2.000350973 432,669,206 instructions
3.000559210 561,940,430 cycles (100.00%)
3.000559210 420,403,465 instructions
3.000670798 780,105 cycles (100.00%)
3.000670798 326,516 instructions
$ perf script -s ./scripts/python/stat-cpi.py
1.000159: cpu -1, thread -1 -> cpi 1.344823 (594274711/441898250)
2.000351: cpu -1, thread -1 -> cpi 1.311972 (567649705/432669206)
3.000559: cpu -1, thread -1 -> cpi 1.336669 (561940430/420403465)
3.000671: cpu -1, thread -1 -> cpi 2.389178 (780105/326516)
$ perf stat -e cycles,instructions -a -I 1000 record sleep 3 | perf script -s ./scripts/python/stat-cpi.py
1.000202: cpu -1, thread -1 -> cpi 1.035091 (940778881/908885530)
2.000392: cpu -1, thread -1 -> cpi 1.442600 (627493992/434974455)
3.000545: cpu -1, thread -1 -> cpi 1.353612 (741463930/547766890)
3.000622: cpu -1, thread -1 -> cpi 2.642110 (784083/296764)
Tested-by: Kan Liang <kan.liang@intel.com>
Link: http://lkml.kernel.org/n/tip-15vwwb4yea15wzz6bqbxdpc0@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/scripts/python/stat-cpi.py | 77 +++++++++++++++++++++++++++++++++++
1 file changed, 77 insertions(+)
create mode 100644 tools/perf/scripts/python/stat-cpi.py
diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py
new file mode 100644
index 000000000000..32689a83b6e6
--- /dev/null
+++ b/tools/perf/scripts/python/stat-cpi.py
@@ -0,0 +1,77 @@
+#!/bin/python
+
+data = {}
+times = []
+threads = []
+cpus = []
+
+def get_key(time, event, cpu, thread):
+ return "%d-%s-%d-%d" % (time, event, cpu, thread)
+
+def store_key(time, cpu, thread):
+ if (time not in times):
+ times.append(time)
+
+ if (cpu not in cpus):
+ cpus.append(cpu)
+
+ if (thread not in threads):
+ threads.append(thread)
+
+def store(time, event, cpu, thread, val, ena, run):
+ #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \
+ # (event, cpu, thread, time, val, ena, run)
+
+ store_key(time, cpu, thread)
+ key = get_key(time, event, cpu, thread)
+ data[key] = [ val, ena, run]
+
+def get(time, event, cpu, thread):
+ key = get_key(time, event, cpu, thread)
+ return data[key][0]
+
+def stat__cycles_k(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_k(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles_u(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions_u(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__cycles(cpu, thread, time, val, ena, run):
+ store(time, "cycles", cpu, thread, val, ena, run);
+
+def stat__instructions(cpu, thread, time, val, ena, run):
+ store(time, "instructions", cpu, thread, val, ena, run);
+
+def stat__interval(time):
+ for cpu in cpus:
+ for thread in threads:
+ cyc = get(time, "cycles", cpu, thread)
+ ins = get(time, "instructions", cpu, thread)
+ cpi = 0
+
+ if ins != 0:
+ cpi = cyc/float(ins)
+
+ print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins)
+
+def trace_end():
+ pass
+# XXX trace_end callback could be used as an alternative place
+# to compute same values as in the script above:
+#
+# for time in times:
+# for cpu in cpus:
+# for thread in threads:
+# cyc = get(time, "cycles", cpu, thread)
+# ins = get(time, "instructions", cpu, thread)
+#
+# if ins != 0:
+# cpi = cyc/float(ins)
+#
+# print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi)
--
2.4.3
^ permalink raw reply related [flat|nested] 17+ messages in thread* Re: [PATCH 8/8] perf script: Add stat-cpi.py script
2015-12-22 15:43 ` [PATCH 8/8] perf script: Add stat-cpi.py script Jiri Olsa
@ 2015-12-23 13:44 ` Namhyung Kim
2015-12-23 16:04 ` Jiri Olsa
0 siblings, 1 reply; 17+ messages in thread
From: Namhyung Kim @ 2015-12-23 13:44 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, lkml, David Ahern, Ingo Molnar,
Peter Zijlstra, Liang, Kan
On Tue, Dec 22, 2015 at 04:43:34PM +0100, Jiri Olsa wrote:
> Adding stat-cpi.py as an example of how to do stat scripting.
> It computes the CPI metrics from cycles and instructions
> events.
>
> Following stat record/report/script combinations could be used:
>
> - get CPI for given workload
>
> $ perf stat -e cycles,instructions record ls
>
> SNIP
>
> Performance counter stats for 'ls':
>
> 2,904,431 cycles
> 3,346,878 instructions # 1.15 insns per cycle
>
> 0.001782686 seconds time elapsed
>
> $ perf script -s ./scripts/python/stat-cpi.py
> 0.001783: cpu -1, thread -1 -> cpi 0.867803 (2904431/3346878)
>
> $ perf stat -e cycles,instructions record ls | perf script -s ./scripts/python/stat-cpi.py
>
> SNIP
>
> 0.001730: cpu -1, thread -1 -> cpi 0.869026 (2928292/3369627)
>
> - get CPI systemwide:
>
> $ perf stat -e cycles,instructions -a -I 1000 record sleep 3
> # time counts unit events
> 1.000158618 594,274,711 cycles (100.00%)
> 1.000158618 441,898,250 instructions
> 2.000350973 567,649,705 cycles (100.00%)
> 2.000350973 432,669,206 instructions
> 3.000559210 561,940,430 cycles (100.00%)
> 3.000559210 420,403,465 instructions
> 3.000670798 780,105 cycles (100.00%)
> 3.000670798 326,516 instructions
>
> $ perf script -s ./scripts/python/stat-cpi.py
> 1.000159: cpu -1, thread -1 -> cpi 1.344823 (594274711/441898250)
> 2.000351: cpu -1, thread -1 -> cpi 1.311972 (567649705/432669206)
> 3.000559: cpu -1, thread -1 -> cpi 1.336669 (561940430/420403465)
> 3.000671: cpu -1, thread -1 -> cpi 2.389178 (780105/326516)
>
> $ perf stat -e cycles,instructions -a -I 1000 record sleep 3 | perf script -s ./scripts/python/stat-cpi.py
> 1.000202: cpu -1, thread -1 -> cpi 1.035091 (940778881/908885530)
> 2.000392: cpu -1, thread -1 -> cpi 1.442600 (627493992/434974455)
> 3.000545: cpu -1, thread -1 -> cpi 1.353612 (741463930/547766890)
> 3.000622: cpu -1, thread -1 -> cpi 2.642110 (784083/296764)
>
> Tested-by: Kan Liang <kan.liang@intel.com>
> Link: http://lkml.kernel.org/n/tip-15vwwb4yea15wzz6bqbxdpc0@git.kernel.org
> Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> ---
> tools/perf/scripts/python/stat-cpi.py | 77 +++++++++++++++++++++++++++++++++++
> 1 file changed, 77 insertions(+)
> create mode 100644 tools/perf/scripts/python/stat-cpi.py
>
> diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py
> new file mode 100644
> index 000000000000..32689a83b6e6
> --- /dev/null
> +++ b/tools/perf/scripts/python/stat-cpi.py
> @@ -0,0 +1,77 @@
> +#!/bin/python
It an unusual place IMHO. Wouldn't it be '#!/usr/bin/env python' ?
Thanks,
Namhyung
> +
> +data = {}
> +times = []
> +threads = []
> +cpus = []
> +
> +def get_key(time, event, cpu, thread):
> + return "%d-%s-%d-%d" % (time, event, cpu, thread)
> +
> +def store_key(time, cpu, thread):
> + if (time not in times):
> + times.append(time)
> +
> + if (cpu not in cpus):
> + cpus.append(cpu)
> +
> + if (thread not in threads):
> + threads.append(thread)
> +
> +def store(time, event, cpu, thread, val, ena, run):
> + #print "event %s cpu %d, thread %d, time %d, val %d, ena %d, run %d" % \
> + # (event, cpu, thread, time, val, ena, run)
> +
> + store_key(time, cpu, thread)
> + key = get_key(time, event, cpu, thread)
> + data[key] = [ val, ena, run]
> +
> +def get(time, event, cpu, thread):
> + key = get_key(time, event, cpu, thread)
> + return data[key][0]
> +
> +def stat__cycles_k(cpu, thread, time, val, ena, run):
> + store(time, "cycles", cpu, thread, val, ena, run);
> +
> +def stat__instructions_k(cpu, thread, time, val, ena, run):
> + store(time, "instructions", cpu, thread, val, ena, run);
> +
> +def stat__cycles_u(cpu, thread, time, val, ena, run):
> + store(time, "cycles", cpu, thread, val, ena, run);
> +
> +def stat__instructions_u(cpu, thread, time, val, ena, run):
> + store(time, "instructions", cpu, thread, val, ena, run);
> +
> +def stat__cycles(cpu, thread, time, val, ena, run):
> + store(time, "cycles", cpu, thread, val, ena, run);
> +
> +def stat__instructions(cpu, thread, time, val, ena, run):
> + store(time, "instructions", cpu, thread, val, ena, run);
> +
> +def stat__interval(time):
> + for cpu in cpus:
> + for thread in threads:
> + cyc = get(time, "cycles", cpu, thread)
> + ins = get(time, "instructions", cpu, thread)
> + cpi = 0
> +
> + if ins != 0:
> + cpi = cyc/float(ins)
> +
> + print "%15f: cpu %d, thread %d -> cpi %f (%d/%d)" % (time/(float(1000000000)), cpu, thread, cpi, cyc, ins)
> +
> +def trace_end():
> + pass
> +# XXX trace_end callback could be used as an alternative place
> +# to compute same values as in the script above:
> +#
> +# for time in times:
> +# for cpu in cpus:
> +# for thread in threads:
> +# cyc = get(time, "cycles", cpu, thread)
> +# ins = get(time, "instructions", cpu, thread)
> +#
> +# if ins != 0:
> +# cpi = cyc/float(ins)
> +#
> +# print "time %.9f, cpu %d, thread %d -> cpi %f" % (time/(float(1000000000)), cpu, thread, cpi)
> --
> 2.4.3
>
^ permalink raw reply [flat|nested] 17+ messages in thread* Re: [PATCH 8/8] perf script: Add stat-cpi.py script
2015-12-23 13:44 ` Namhyung Kim
@ 2015-12-23 16:04 ` Jiri Olsa
0 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-23 16:04 UTC (permalink / raw)
To: Namhyung Kim
Cc: Jiri Olsa, Arnaldo Carvalho de Melo, lkml, David Ahern,
Ingo Molnar, Peter Zijlstra, Liang, Kan
On Wed, Dec 23, 2015 at 10:44:20PM +0900, Namhyung Kim wrote:
SNIP
> > $ perf stat -e cycles,instructions -a -I 1000 record sleep 3 | perf script -s ./scripts/python/stat-cpi.py
> > 1.000202: cpu -1, thread -1 -> cpi 1.035091 (940778881/908885530)
> > 2.000392: cpu -1, thread -1 -> cpi 1.442600 (627493992/434974455)
> > 3.000545: cpu -1, thread -1 -> cpi 1.353612 (741463930/547766890)
> > 3.000622: cpu -1, thread -1 -> cpi 2.642110 (784083/296764)
> >
> > Tested-by: Kan Liang <kan.liang@intel.com>
> > Link: http://lkml.kernel.org/n/tip-15vwwb4yea15wzz6bqbxdpc0@git.kernel.org
> > Signed-off-by: Jiri Olsa <jolsa@kernel.org>
> > ---
> > tools/perf/scripts/python/stat-cpi.py | 77 +++++++++++++++++++++++++++++++++++
> > 1 file changed, 77 insertions(+)
> > create mode 100644 tools/perf/scripts/python/stat-cpi.py
> >
> > diff --git a/tools/perf/scripts/python/stat-cpi.py b/tools/perf/scripts/python/stat-cpi.py
> > new file mode 100644
> > index 000000000000..32689a83b6e6
> > --- /dev/null
> > +++ b/tools/perf/scripts/python/stat-cpi.py
> > @@ -0,0 +1,77 @@
> > +#!/bin/python
>
> It an unusual place IMHO. Wouldn't it be '#!/usr/bin/env python' ?
yep, I'll change that
thanks,
jirka
^ permalink raw reply [flat|nested] 17+ messages in thread
* Re: [PATCHv7 00/25] perf stat: Add scripting support
2015-12-22 15:43 [PATCHv7 00/25] perf stat: Add scripting support Jiri Olsa
` (7 preceding siblings ...)
2015-12-22 15:43 ` [PATCH 8/8] perf script: Add stat-cpi.py script Jiri Olsa
@ 2015-12-22 15:53 ` Jiri Olsa
8 siblings, 0 replies; 17+ messages in thread
From: Jiri Olsa @ 2015-12-22 15:53 UTC (permalink / raw)
To: Jiri Olsa
Cc: Arnaldo Carvalho de Melo, Andi Kleen, Ulrich Drepper, Will Deacon,
Stephane Eranian, Don Zickus, lkml, David Ahern, Ingo Molnar,
Namhyung Kim, Peter Zijlstra, Liang, Kan
should be 0/8 ;-) sry
jirka
On Tue, Dec 22, 2015 at 04:43:26PM +0100, Jiri Olsa wrote:
> hi,
> sending another version of stat scripting.
>
> v7 changes:
> - perf stat record/report patches already taken,
> posting the rest of the scripting support
> - rebased to latest Arnaldo's perf/core
>
> v6 changes:
> - several patches from v4 already taken
> - perf stat record can now place 'record' keyword
> anywhere within stat options
> - placed STAT feature checking earlier into record
> patches so commands processing perf.data recognize
> stat data and skip sample_type checking
> - rebased on Arnaldo's perf/stat
> - added Tested-by: Kan Liang <kan.liang@intel.com>
>
> v5 changes:
> - several patches from v4 already taken
> - using u16 for cpu number in cpu_map_event
> - renamed PERF_RECORD_HEADER_ATTR_UPDATE to PERF_RECORD_EVENT_UPDATE
> - moved low hanging fuits patches to the start of the patchset
> - patchset tested by Kan Liang, thanks!
>
> v4 changes:
> - added attr update event for event's cpumask
> - forbig aggregation on task workloads
> - some minor reorders and changelog fixes
>
> v3 changes:
> - added attr update event to handle unit,scale,name for event
> it fixed the uncore_imc_1/cas_count_read/ record/report
> - perf report -D now displays stat related events
> - some minor and changelog fixes
>
> v2 changes:
> - rebased to latest Arnaldo's perf/core
> - patches 1 to 11 already merged in
> - added --per-core/--per-socket/-A options for perf stat report
> command to allow custom aggregation in stat report, please
> check new examples below
> - couple changelogs changes
>
> The initial attempt defined its own formula lang and allowed
> triggering user's script on the end of the stat command:
> http://marc.info/?l=linux-kernel&m=136742146322273&w=2
>
> This patchset abandons the idea of new formula language
> and rather adds support to:
> - store stat data into perf.data file
> - add python support to process stat events
>
> Basically it allows to store stat data into perf.data and
> post process it with python scripts in a similar way we
> do for sampling data.
>
> The stat data are stored in new stat, stat-round, stat-config user events.
> stat - stored for each read syscall of the counter
> stat round - stored for each interval or end of the command invocation
> stat config - stores all the config information needed to process data
> so report tool could restore the same output as record
>
> The python script can now define 'stat__<eventname>_<modifier>' functions
> to get stat events data and 'stat__interval' to get stat-round data.
>
> See CPI script example in scripts/python/stat-cpi.py.
>
> Also available in:
> git://git.kernel.org/pub/scm/linux/kernel/git/jolsa/perf.git
> perf/stat_script
>
> thanks,
> jirka
>
> Examples:
>
> - To record data for command stat workload:
>
> $ perf stat record kill
> ...
>
> Performance counter stats for 'kill':
>
> 0.372007 task-clock (msec) # 0.613 CPUs utilized
> 3 context-switches # 0.008 M/sec
> 0 cpu-migrations # 0.000 K/sec
> 62 page-faults # 0.167 M/sec
> 1,129,973 cycles # 3.038 GHz
> <not supported> stalled-cycles-frontend
> <not supported> stalled-cycles-backend
> 813,313 instructions # 0.72 insns per cycle
> 166,161 branches # 446.661 M/sec
> 8,747 branch-misses # 5.26% of all branches
>
> 0.000607287 seconds time elapsed
>
> - To report perf stat data:
>
> $ perf stat report
>
> Performance counter stats for '/home/jolsa/bin/perf stat record kill':
>
> 0.372007 task-clock (msec) # inf CPUs utilized
> 3 context-switches # 0.008 M/sec
> 0 cpu-migrations # 0.000 K/sec
> 62 page-faults # 0.167 M/sec
> 1,129,973 cycles # 3.038 GHz
> <not supported> stalled-cycles-frontend
> <not supported> stalled-cycles-backend
> 813,313 instructions # 0.72 insns per cycle
> 166,161 branches # 446.661 M/sec
> 8,747 branch-misses # 5.26% of all branches
>
> 0.000000000 seconds time elapsed
>
> - To store system-wide period stat data:
>
> $ perf stat -e cycles:u,instructions:u -a -I 1000 record
> # time counts unit events
> 1.000265471 462,311,482 cycles:u (100.00%)
> 1.000265471 590,037,440 instructions:u
> 2.000483453 722,532,336 cycles:u (100.00%)
> 2.000483453 848,678,197 instructions:u
> 3.000759876 75,990,880 cycles:u (100.00%)
> 3.000759876 86,187,813 instructions:u
> ^C 3.213960893 85,329,533 cycles:u (100.00%)
> 3.213960893 135,954,296 instructions:u
>
> - To report perf stat data:
>
> $ perf stat report
> # time counts unit events
> 1.000265471 462,311,482 cycles:u (100.00%)
> 1.000265471 590,037,440 instructions:u
> 2.000483453 722,532,336 cycles:u (100.00%)
> 2.000483453 848,678,197 instructions:u
> 3.000759876 75,990,880 cycles:u (100.00%)
> 3.000759876 86,187,813 instructions:u
> 3.213960893 85,329,533 cycles:u (100.00%)
> 3.213960893 135,954,296 instructions:u
>
> - To run stat-cpi.py script over perf.data:
>
> $ perf script -s scripts/python/stat-cpi.py
> 1.000265: cpu -1, thread -1 -> cpi 0.783529 (462311482/590037440)
> 2.000483: cpu -1, thread -1 -> cpi 0.851362 (722532336/848678197)
> 3.000760: cpu -1, thread -1 -> cpi 0.881689 (75990880/86187813)
> 3.213961: cpu -1, thread -1 -> cpi 0.627634 (85329533/135954296)
>
> - To pipe data from stat to stat-cpi script:
>
> $ perf stat -e cycles:u,instructions:u -A -C 0 -I 1000 record | perf script -s scripts/python/stat-cpi.py
> 1.000192: cpu 0, thread -1 -> cpi 0.739535 (23921908/32347236)
> 2.000376: cpu 0, thread -1 -> cpi 1.663482 (2519340/1514498)
> 3.000621: cpu 0, thread -1 -> cpi 1.396308 (16162767/11575362)
> 4.000700: cpu 0, thread -1 -> cpi 1.092246 (20077258/18381624)
> 5.000867: cpu 0, thread -1 -> cpi 0.473816 (45157586/95306156)
> 6.001034: cpu 0, thread -1 -> cpi 0.532792 (43701668/82023818)
> 7.001195: cpu 0, thread -1 -> cpi 1.122059 (29890042/26638561)
>
> - Raw script stat data output:
>
> $ perf stat -e cycles:u,instructions:u -A -C 0 -I 1000 record | perf --no-pager script
> CPU THREAD VAL ENA RUN TIME EVENT
> 0 -1 12302059 1000811347 1000810712 1000198821 cycles:u
> 0 -1 2565362 1000823218 1000823218 1000198821 instructions:u
> 0 -1 14453353 1000812704 1000812704 2000382283 cycles:u
> 0 -1 4600932 1000799342 1000799342 2000382283 instructions:u
> 0 -1 15245106 1000774425 1000774425 3000538255 cycles:u
> 0 -1 2624324 1000769310 1000769310 3000538255 instructions:u
>
> - To display different aggregation in report:
>
> $ perf stat -e cycles -a -I 1000 record sleep 3
> # time counts unit events
> 1.000223609 703,427,617 cycles
> 2.000443651 609,975,307 cycles
> 3.000569616 668,479,597 cycles
> 3.000735323 1,155,816 cycles
>
> $ perf stat report
> # time counts unit events
> 1.000223609 703,427,617 cycles
> 2.000443651 609,975,307 cycles
> 3.000569616 668,479,597 cycles
> 3.000735323 1,155,816 cycles
>
> $ perf stat report --per-core
> # time core cpus counts unit events
> 1.000223609 S0-C0 2 327,612,412 cycles
> 1.000223609 S0-C1 2 375,815,205 cycles
> 2.000443651 S0-C0 2 287,462,177 cycles
> 2.000443651 S0-C1 2 322,513,130 cycles
> 3.000569616 S0-C0 2 271,571,908 cycles
> 3.000569616 S0-C1 2 396,907,689 cycles
> 3.000735323 S0-C0 2 694,977 cycles
> 3.000735323 S0-C1 2 460,839 cycles
>
> $ perf stat report --per-socket
> # time socket cpus counts unit events
> 1.000223609 S0 4 703,427,617 cycles
> 2.000443651 S0 4 609,975,307 cycles
> 3.000569616 S0 4 668,479,597 cycles
> 3.000735323 S0 4 1,155,816 cycles
>
> $ perf stat report -A
> # time CPU counts unit events
> 1.000223609 CPU0 205,431,505 cycles
> 1.000223609 CPU1 122,180,907 cycles
> 1.000223609 CPU2 176,649,682 cycles
> 1.000223609 CPU3 199,165,523 cycles
> 2.000443651 CPU0 148,447,922 cycles
> 2.000443651 CPU1 139,014,255 cycles
> 2.000443651 CPU2 204,436,559 cycles
> 2.000443651 CPU3 118,076,571 cycles
> 3.000569616 CPU0 149,788,954 cycles
> 3.000569616 CPU1 121,782,954 cycles
> 3.000569616 CPU2 247,277,700 cycles
> 3.000569616 CPU3 149,629,989 cycles
> 3.000735323 CPU0 269,675 cycles
> 3.000735323 CPU1 425,302 cycles
> 3.000735323 CPU2 364,169 cycles
> 3.000735323 CPU3 96,670 cycles
>
>
> Cc: Andi Kleen <andi@firstfloor.org>
> Cc: Ulrich Drepper <drepper@gmail.com>
> Cc: Will Deacon <will.deacon@arm.com>
> Cc: Stephane Eranian <eranian@google.com>
> Cc: Don Zickus <dzickus@redhat.com>
> Tested-by: Kan Liang <kan.liang@intel.com>
> ---
> Jiri Olsa (8):
> perf stat record: Keep sample_type 0 for pipe session
> perf script: Process cpu/threads maps
> perf script: Process stat config event
> perf script: Add process_stat/process_stat_interval scripting interface
> perf script: Add stat default handlers
> perf script: Display stat events by default
> perf script: Add python support for stat events
> perf script: Add stat-cpi.py script
>
> tools/perf/builtin-script.c | 146 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
> tools/perf/builtin-stat.c | 9 ++++-
> tools/perf/scripts/python/stat-cpi.py | 77 ++++++++++++++++++++++++++++++++++++++++
> tools/perf/util/scripting-engines/trace-event-python.c | 114 +++++++++++++++++++++++++++++++++++++++++++++++++++++++----
> tools/perf/util/trace-event.h | 4 +++
> 5 files changed, 343 insertions(+), 7 deletions(-)
> create mode 100644 tools/perf/scripts/python/stat-cpi.py
^ permalink raw reply [flat|nested] 17+ messages in thread