From: Jiri Olsa <jolsa@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: lkml <linux-kernel@vger.kernel.org>,
Adrian Hunter <adrian.hunter@intel.com>,
Andi Kleen <ak@linux.intel.com>, David Ahern <dsahern@gmail.com>,
Ingo Molnar <mingo@kernel.org>,
Namhyung Kim <namhyung@kernel.org>,
Peter Zijlstra <a.p.zijlstra@chello.nl>,
Stephane Eranian <eranian@google.com>
Subject: [PATCH 30/30] perf stat: Introduce --per-task option
Date: Sun, 14 Jun 2015 10:19:45 +0200 [thread overview]
Message-ID: <1434269985-521-31-git-send-email-jolsa@kernel.org> (raw)
In-Reply-To: <1434269985-521-1-git-send-email-jolsa@kernel.org>
Currently all the -p option PID arguments tasks values
get aggregated and printed as single values.
Adding --per-tasks option to print values per task.
$ perf stat -e cycles,instructions --per-task -p 25388,25442 -a
^C
Performance counter stats for process id '25388,25442':
cat-25388 100,122 cycles
vim-25442 4,167,876 cycles
cat-25388 20,080 instructions
vim-25442 3,232,735 instructions
6.057130572 seconds time elapsed
Also works under interval mode:
$ perf stat -e cycles,instructions --per-task -p 25388,25442 -a -I 1000
# time task counts unit events
1.000190002 cat-25388 54,196 cycles
1.000190002 vim-25442 0 cycles
1.000190002 cat-25388 11,660 instructions
1.000190002 vim-25442 0 instructions
2.000742579 cat-25388 55,247 cycles
2.000742579 vim-25442 1,752,951 cycles
2.000742579 cat-25388 11,872 instructions
2.000742579 vim-25442 1,238,512 instructions
3.001057871 cat-25388 0 cycles
3.001057871 vim-25442 0 cycles
3.001057871 cat-25388 0 instructions
3.001057871 vim-25442 0 instructions
^C 3.242519732 cat-25388 0 cycles
3.242519732 vim-25442 0 cycles
3.242519732 cat-25388 0 instructions
3.242519732 vim-25442 0 instructions
It works only with -t and -p options, otherwise following
error is printed:
$ perf stat -e cycles --per-task -I 1000 ls
The --per-task option is only available when monitoring tasks via -p -t options.
-p, --pid <pid> stat events on existing process id
-t, --tid <tid> stat events on existing thread id
Link: http://lkml.kernel.org/n/tip-0v0ixd9k7o9z1u8hqngm1coe@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
tools/perf/Documentation/perf-stat.txt | 3 ++
tools/perf/builtin-stat.c | 67 +++++++++++++++++++++++++++++++++-
tools/perf/util/stat.h | 1 +
3 files changed, 69 insertions(+), 2 deletions(-)
diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 04e150d83e7d..b83cc5bbfa9a 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -144,6 +144,9 @@ is a useful mode to detect imbalance between physical cores. To enable this mod
use --per-core in addition to -a. (system-wide). The output includes the
core number and the number of online logical processors on that physical processor.
+--per-task::
+Aggregate counts per monitored threads (-t option) or processes (-p option).
+
-D msecs::
--delay msecs::
After starting the program, wait msecs before measuring. This is useful to
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index cefc905343f7..c202c5d629bf 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -237,6 +237,7 @@ process_counter_values(struct perf_evsel *evsel, int cpu, int thread,
count = &zero;
switch (aggr_mode) {
+ case AGGR_TASK:
case AGGR_CORE:
case AGGR_SOCKET:
case AGGR_NONE:
@@ -608,6 +609,13 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr)
csv_output ? 0 : -4,
perf_evsel__cpus(evsel)->map[id], csv_sep);
break;
+ case AGGR_TASK:
+ fprintf(output, "%s-%*d%s",
+ thread_map__comm(evsel->threads, id),
+ csv_output ? 0 : -8,
+ thread_map__pid(evsel->threads, id),
+ csv_sep);
+ break;
case AGGR_GLOBAL:
default:
break;
@@ -756,6 +764,40 @@ static void print_aggr(char *prefix)
}
}
+static void print_aggr_task(struct perf_evsel *counter, char *prefix)
+{
+ int nthreads = thread_map__nr(counter->threads);
+ int ncpus = cpu_map__nr(counter->cpus);
+ int cpu, thread;
+ double uval;
+
+ for (thread = 0; thread < nthreads; thread++) {
+ u64 ena = 0, run = 0, val = 0;
+
+ for (cpu = 0; cpu < ncpus; cpu++) {
+ val += perf_counts(counter->counts, cpu, thread)->val;
+ ena += perf_counts(counter->counts, cpu, thread)->ena;
+ run += perf_counts(counter->counts, cpu, thread)->run;
+ }
+
+ if (prefix)
+ fprintf(output, "%s", prefix);
+
+ uval = val * counter->scale;
+
+ if (nsec_counter(counter))
+ nsec_printout(thread, 0, counter, uval);
+ else
+ abs_printout(thread, 0, counter, uval);
+
+ if (!csv_output)
+ print_noise(counter, 1.0);
+
+ print_running(run, ena);
+ fputc('\n', output);
+ }
+}
+
/*
* Print out the results of a single counter:
* aggregated counts in system-wide mode
@@ -882,6 +924,9 @@ static void print_interval(char *prefix, struct timespec *ts)
case AGGR_NONE:
fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit");
break;
+ case AGGR_TASK:
+ fprintf(output, "# time task counts %*s events\n", unit_width, "unit");
+ break;
case AGGR_GLOBAL:
default:
fprintf(output, "# time counts %*s events\n", unit_width, "unit");
@@ -950,6 +995,10 @@ static void print_counters(struct timespec *ts, int argc, const char **argv)
case AGGR_SOCKET:
print_aggr(prefix);
break;
+ case AGGR_TASK:
+ evlist__for_each(evsel_list, counter)
+ print_aggr_task(counter, prefix);
+ break;
case AGGR_GLOBAL:
evlist__for_each(evsel_list, counter)
print_counter_aggr(counter, prefix);
@@ -1037,6 +1086,7 @@ static int perf_stat_init_aggr_mode(void)
break;
case AGGR_NONE:
case AGGR_GLOBAL:
+ case AGGR_TASK:
default:
break;
}
@@ -1261,6 +1311,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
"aggregate counts per processor socket", AGGR_SOCKET),
OPT_SET_UINT(0, "per-core", &aggr_mode,
"aggregate counts per physical processor core", AGGR_CORE),
+ OPT_SET_UINT(0, "per-task", &aggr_mode,
+ "aggregate counts per task", AGGR_TASK),
OPT_UINTEGER('D', "delay", &initial_delay,
"ms to wait before starting measurement after program start"),
OPT_END()
@@ -1352,8 +1404,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
run_count = 1;
}
- /* no_aggr, cgroup are for system-wide only */
- if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) &&
+ if ((aggr_mode == AGGR_TASK) && !target__has_task(&target)) {
+ fprintf(stderr, "The --per-task option is only available "
+ "when monitoring tasks via -p -t options.\n");
+ parse_options_usage(NULL, options, "p", 1);
+ parse_options_usage(NULL, options, "t", 1);
+ goto out;
+ }
+
+ /*
+ * no_aggr, cgroup are for system-wide only
+ * --per-task is aggregated per task, we dont mix it with cpu mode
+ */
+ if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_TASK) || nr_cgroups) &&
!target__has_cpu(&target)) {
fprintf(stderr, "both cgroup and no-aggregation "
"modes only available in system-wide mode\n");
diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h
index 870256735a77..deb9a0faccf7 100644
--- a/tools/perf/util/stat.h
+++ b/tools/perf/util/stat.h
@@ -30,6 +30,7 @@ enum aggr_mode {
AGGR_GLOBAL,
AGGR_SOCKET,
AGGR_CORE,
+ AGGR_TASK,
};
struct perf_counts_values {
--
1.9.3
prev parent reply other threads:[~2015-06-14 8:23 UTC|newest]
Thread overview: 45+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-06-14 8:19 [PATCHv2 00/30] perf stat: Introduce --per-task option Jiri Olsa
2015-06-14 8:19 ` [PATCH 01/30] perf tools: Introduce xyarray__reset function Jiri Olsa
2015-06-18 8:13 ` [tip:perf/core] " tip-bot for Jiri Olsa
2015-06-14 8:19 ` [PATCH 02/30] perf tools: Add thread_map__(alloc|realloc) helpers Jiri Olsa
2015-06-18 8:14 ` [tip:perf/core] " tip-bot for Jiri Olsa
2015-06-14 8:19 ` [PATCH 03/30] perf tools: Change thread_map::map into struct Jiri Olsa
2015-06-15 19:04 ` Arnaldo Carvalho de Melo
2015-06-15 21:33 ` Jiri Olsa
2015-06-14 8:19 ` [PATCH 04/30] perf tools: Add comm string into struct thread_map Jiri Olsa
2015-06-15 19:17 ` Arnaldo Carvalho de Melo
2015-06-15 21:55 ` Jiri Olsa
2015-06-14 8:19 ` [PATCH 05/30] perf tests: Add thread_map object tests Jiri Olsa
2015-06-14 8:19 ` [PATCH 06/30] perf tools: Add reference counting for cpu_map object Jiri Olsa
2015-06-14 8:19 ` [PATCH 07/30] perf tools: Add reference counting for thread_map object Jiri Olsa
2015-06-15 19:25 ` Arnaldo Carvalho de Melo
2015-06-15 21:28 ` Jiri Olsa
2015-06-14 8:19 ` [PATCH 08/30] perf tools: Propagate cpu maps through the evlist Jiri Olsa
2015-06-15 19:34 ` Arnaldo Carvalho de Melo
2015-06-15 21:29 ` Jiri Olsa
2015-06-14 8:19 ` [PATCH 09/30] perf tools: Propagate thread " Jiri Olsa
2015-06-14 8:19 ` [PATCH 10/30] perf tools: Make perf_evsel__(nr_)cpus generic Jiri Olsa
2015-06-14 8:19 ` [PATCH 11/30] perf tools: Move perf_evsel__(alloc|free|reset)_counts into stat object Jiri Olsa
2015-06-15 20:13 ` Arnaldo Carvalho de Melo
2015-06-18 8:14 ` [tip:perf/core] perf tools: Move perf_evsel__(alloc|free|reset) _counts " tip-bot for Jiri Olsa
2015-06-14 8:19 ` [PATCH 12/30] perf stat: Introduce perf_counts__(alloc|free|reset) functions Jiri Olsa
2015-06-15 20:16 ` Arnaldo Carvalho de Melo
2015-06-18 8:14 ` [tip:perf/core] perf stat: Introduce perf_counts__( new|delete|reset) functions tip-bot for Jiri Olsa
2015-06-14 8:19 ` [PATCH 13/30] perf stat: Introduce perf_counts function Jiri Olsa
2015-06-14 8:19 ` [PATCH 14/30] perf stat: Use xyarray for cpu evsel counts Jiri Olsa
2015-06-14 8:19 ` [PATCH 15/30] perf stat: Make stats work over the thread dimension Jiri Olsa
2015-06-14 8:19 ` [PATCH 16/30] perf stat: Rename struct perf_counts::cpu member to values Jiri Olsa
2015-06-14 8:19 ` [PATCH 17/30] perf stat: Move perf_evsel__(alloc|free|reset)_stat_priv into stat object Jiri Olsa
2015-06-14 8:19 ` [PATCH 18/30] perf stat: Move perf_evsel__(alloc|free)_prev_raw_counts " Jiri Olsa
2015-06-14 8:19 ` [PATCH 19/30] perf stat: Move perf_evlist__(alloc|free)_stats into evlist object Jiri Olsa
2015-06-14 8:19 ` [PATCH 20/30] perf stat: Introduce perf_evsel__alloc_stats function Jiri Olsa
2015-06-14 8:19 ` [PATCH 21/30] perf stat: Introduce perf_evsel__read function Jiri Olsa
2015-06-14 8:19 ` [PATCH 22/30] perf stat: Introduce read_counters function Jiri Olsa
2015-06-14 8:19 ` [PATCH 23/30] perf stat: Separate counters reading and processing Jiri Olsa
2015-06-14 8:19 ` [PATCH 24/30] perf stat: Move zero_per_pkg into counter process code Jiri Olsa
2015-06-14 8:19 ` [PATCH 25/30] perf stat: Move perf_stat initialization " Jiri Olsa
2015-06-14 8:19 ` [PATCH 26/30] perf stat: Remove perf_evsel__read_cb function Jiri Olsa
2015-06-14 8:19 ` [PATCH 27/30] perf stat: Rename print_interval to process_interval Jiri Olsa
2015-06-14 8:19 ` [PATCH 28/30] perf stat: Using init_stats instead of memset Jiri Olsa
2015-06-14 8:19 ` [PATCH 29/30] perf stat: Introduce print_counters function Jiri Olsa
2015-06-14 8:19 ` Jiri Olsa [this message]
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1434269985-521-31-git-send-email-jolsa@kernel.org \
--to=jolsa@kernel.org \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@kernel.org \
--cc=adrian.hunter@intel.com \
--cc=ak@linux.intel.com \
--cc=dsahern@gmail.com \
--cc=eranian@google.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox