All of lore.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org, Andi Kleen <ak@linux.intel.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 09/20] perf stat: Collapse identically named events
Date: Fri, 24 Mar 2017 11:57:20 -0300	[thread overview]
Message-ID: <20170324145731.29350-10-acme@kernel.org> (raw)
In-Reply-To: <20170324145731.29350-1-acme@kernel.org>

From: Andi Kleen <ak@linux.intel.com>

The uncore PMU has a lot of duplicated PMUs for different subsystems.
When expanding an uncore alias we usually end up with a large
number of identically named aliases, which makes perf stat
output difficult to read.

Automatically sum them up in perf stat, unless --no-merge is specified.

This can be default because only the uncores generally have duplicated
aliases. Other PMUs have unique names.

Before:

  % perf stat --no-merge -a -e unc_c_llc_lookup.any sleep 1

  Performance counter stats for 'system wide':

           694,976 Bytes unc_c_llc_lookup.any
           706,304 Bytes unc_c_llc_lookup.any
           956,608 Bytes unc_c_llc_lookup.any
           782,720 Bytes unc_c_llc_lookup.any
           605,696 Bytes unc_c_llc_lookup.any
           442,816 Bytes unc_c_llc_lookup.any
           659,328 Bytes unc_c_llc_lookup.any
           509,312 Bytes unc_c_llc_lookup.any
           263,936 Bytes unc_c_llc_lookup.any
           592,448 Bytes unc_c_llc_lookup.any
           672,448 Bytes unc_c_llc_lookup.any
           608,640 Bytes unc_c_llc_lookup.any
           641,024 Bytes unc_c_llc_lookup.any
           856,896 Bytes unc_c_llc_lookup.any
           808,832 Bytes unc_c_llc_lookup.any
           684,864 Bytes unc_c_llc_lookup.any
           710,464 Bytes unc_c_llc_lookup.any
           538,304 Bytes unc_c_llc_lookup.any

       1.002577660 seconds time elapsed

After:

  % perf stat -a -e unc_c_llc_lookup.any sleep 1

  Performance counter stats for 'system wide':

         2,685,120 Bytes unc_c_llc_lookup.any

       1.002648032 seconds time elapsed

v2: Split collect_aliases. Rename alias flag.
v3: Make sure unsupported/not counted is always printed.
v4: Factor out callback change into separate patch.
v5: Move check for bad results here
    Move merged check into collect_data

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Jiri Olsa <jolsa@kernel.org>
Link: http://lkml.kernel.org/r/20170320201711.14142-3-andi@firstfloor.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt |  3 +++
 tools/perf/builtin-stat.c              | 38 ++++++++++++++++++++++++++++++----
 tools/perf/util/evsel.h                |  1 +
 3 files changed, 38 insertions(+), 4 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 978548138624..bd0e4417f2be 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -236,6 +236,9 @@ To interpret the results it is usually needed to know on which
 CPUs the workload runs on. If needed the CPUs can be forced using
 taskset.
 
+--no-merge::
+Do not merge results from same PMUs.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 5c13a0f40adc..a4da10a506dd 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -140,6 +140,7 @@ static unsigned int		unit_width			= 4; /* strlen("unit") */
 static bool			forever				= false;
 static bool			metric_only			= false;
 static bool			force_metric_only		= false;
+static bool			no_merge			= false;
 static struct timespec		ref_time;
 static struct cpu_map		*aggr_map;
 static aggr_get_id_t		aggr_get_id;
@@ -1182,12 +1183,37 @@ static void aggr_update_shadow(void)
 	}
 }
 
-static void collect_data(struct perf_evsel *counter,
+static void collect_all_aliases(struct perf_evsel *counter,
 			    void (*cb)(struct perf_evsel *counter, void *data,
 				       bool first),
 			    void *data)
 {
+	struct perf_evsel *alias;
+
+	alias = list_prepare_entry(counter, &(evsel_list->entries), node);
+	list_for_each_entry_continue (alias, &evsel_list->entries, node) {
+		if (strcmp(perf_evsel__name(alias), perf_evsel__name(counter)) ||
+		    alias->scale != counter->scale ||
+		    alias->cgrp != counter->cgrp ||
+		    strcmp(alias->unit, counter->unit) ||
+		    nsec_counter(alias) != nsec_counter(counter))
+			break;
+		alias->merged_stat = true;
+		cb(alias, data, false);
+	}
+}
+
+static bool collect_data(struct perf_evsel *counter,
+			    void (*cb)(struct perf_evsel *counter, void *data,
+				       bool first),
+			    void *data)
+{
+	if (counter->merged_stat)
+		return false;
 	cb(counter, data, true);
+	if (!no_merge)
+		collect_all_aliases(counter, cb, data);
+	return true;
 }
 
 struct aggr_data {
@@ -1245,7 +1271,8 @@ static void print_aggr(char *prefix)
 		evlist__for_each_entry(evsel_list, counter) {
 			ad.val = ad.ena = ad.run = 0;
 			ad.nr = 0;
-			collect_data(counter, aggr_cb, &ad);
+			if (!collect_data(counter, aggr_cb, &ad))
+				continue;
 			nr = ad.nr;
 			ena = ad.ena;
 			run = ad.run;
@@ -1318,7 +1345,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 	double uval;
 	struct caggr_data cd = { .avg = 0.0 };
 
-	collect_data(counter, counter_aggr_cb, &cd);
+	if (!collect_data(counter, counter_aggr_cb, &cd))
+		return;
 
 	if (prefix && !metric_only)
 		fprintf(output, "%s", prefix);
@@ -1353,7 +1381,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 	for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
 		struct aggr_data ad = { .cpu = cpu };
 
-		collect_data(counter, counter_cb, &ad);
+		if (!collect_data(counter, counter_cb, &ad))
+			return;
 		val = ad.val;
 		ena = ad.ena;
 		run = ad.run;
@@ -1701,6 +1730,7 @@ static const struct option stat_options[] = {
 		    "list of cpus to monitor in system-wide"),
 	OPT_SET_UINT('A', "no-aggr", &stat_config.aggr_mode,
 		    "disable CPU count aggregation", AGGR_NONE),
+	OPT_BOOLEAN(0, "no-merge", &no_merge, "Do not merge identical named events"),
 	OPT_STRING('x', "field-separator", &csv_sep, "separator",
 		   "print counts with custom separator"),
 	OPT_CALLBACK('G', "cgroup", &evsel_list, "name",
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 06ef6f29efa1..bd2e9b112d49 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -131,6 +131,7 @@ struct perf_evsel {
 	bool			cmdline_group_boundary;
 	struct list_head	config_terms;
 	int			bpf_fd;
+	bool			merged_stat;
 };
 
 union u64_swap {
-- 
2.9.3

  parent reply	other threads:[~2017-03-24 15:46 UTC|newest]

Thread overview: 24+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-03-24 14:57 [GIT PULL 00/20] perf/core improvements and fixes Arnaldo Carvalho de Melo
2017-03-24 14:57 ` Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 01/20] perf probe: Change MAX_CMDLEN Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 02/20] perf probe: Return errno when not hitting any event Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 03/20] perf sdt: Add scanning of sdt probes arguments Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 04/20] perf probe: Add sdt probes arguments into the uprobe cmd string Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 05/20] perf sdt x86: Add renaming logic for rNN and other registers Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 06/20] perf annotate: More exactly grep -v of the objdump command Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 07/20] perf annotate: Add comment clarifying how the source code line is parsed Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 08/20] perf stat: Factor out callback for collecting event values Arnaldo Carvalho de Melo
2017-03-24 14:57 ` Arnaldo Carvalho de Melo [this message]
2017-03-24 14:57 ` [PATCH 10/20] perf stat: Handle partially bad results with merging Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 11/20] perf tools: Factor out PMU matching in parser Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 12/20] perf pmu: Expand PMU events by prefix match Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 13/20] perf pmu: Special case uncore_ prefix Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 14/20] perf tools: Add a simple expression parser for JSON Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 15/20] perf vendor events intel: Update Intel uncore JSON event files Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 16/20] perf pmu: Support MetricExpr header in JSON event list Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 17/20] perf stat: Output JSON MetricExpr metric Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 18/20] perf list: Support printing MetricExpr with --debug Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 19/20] perf pmu: Add support for MetricName JSON attribute Arnaldo Carvalho de Melo
2017-03-24 14:57 ` [PATCH 20/20] perf list: Move extra details printing to new option Arnaldo Carvalho de Melo
2017-03-24 18:39 ` [GIT PULL 00/20] perf/core improvements and fixes Ingo Molnar
2017-03-24 18:39   ` Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170324145731.29350-10-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.