linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Jiri Olsa <jolsa@kernel.org>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Andi Kleen <andi@firstfloor.org>, David Ahern <dsahern@gmail.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 03/15] perf stat: Use group read for event groups
Date: Fri, 28 Jul 2017 17:00:09 -0300	[thread overview]
Message-ID: <20170728200021.11389-4-acme@kernel.org> (raw)
In-Reply-To: <20170728200021.11389-1-acme@kernel.org>

From: Jiri Olsa <jolsa@kernel.org>

Make perf stat use  group read if there  are groups defined. The group
read will get the values for all member of groups within a single
syscall instead of calling read syscall for every event.

We can see considerable less amount of kernel cycles spent on single
group read, than reading each event separately, like for following perf
stat command:

  # perf stat -e {cycles,instructions} -I 10 -a sleep 1

Monitored with "perf stat -r 5 -e '{cycles:u,cycles:k}'"

Before:

        24,325,676      cycles:u
       297,040,775      cycles:k

       1.038554134 seconds time elapsed

After:
        25,034,418      cycles:u
       158,256,395      cycles:k

       1.036864497 seconds time elapsed

The perf_evsel__open fallback changes contributed by Andi Kleen.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: David Ahern <dsahern@gmail.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/20170726120206.9099-4-jolsa@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/builtin-stat.c | 30 +++++++++++++++++++++++++++---
 tools/perf/util/counts.h  |  1 +
 tools/perf/util/evsel.c   | 10 ++++++++++
 3 files changed, 38 insertions(+), 3 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 48ac53b199fc..866da7aa54bf 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -213,10 +213,20 @@ static void perf_stat__reset_stats(void)
 static int create_perf_stat_counter(struct perf_evsel *evsel)
 {
 	struct perf_event_attr *attr = &evsel->attr;
+	struct perf_evsel *leader = evsel->leader;
 
-	if (stat_config.scale)
+	if (stat_config.scale) {
 		attr->read_format = PERF_FORMAT_TOTAL_TIME_ENABLED |
 				    PERF_FORMAT_TOTAL_TIME_RUNNING;
+	}
+
+	/*
+	 * The event is part of non trivial group, let's enable
+	 * the group read (for leader) and ID retrieval for all
+	 * members.
+	 */
+	if (leader->nr_members > 1)
+		attr->read_format |= PERF_FORMAT_ID|PERF_FORMAT_GROUP;
 
 	attr->inherit = !no_inherit;
 
@@ -333,13 +343,21 @@ static int read_counter(struct perf_evsel *counter)
 			struct perf_counts_values *count;
 
 			count = perf_counts(counter->counts, cpu, thread);
-			if (perf_evsel__read(counter, cpu, thread, count)) {
+
+			/*
+			 * The leader's group read loads data into its group members
+			 * (via perf_evsel__read_counter) and sets threir count->loaded.
+			 */
+			if (!count->loaded &&
+			    perf_evsel__read_counter(counter, cpu, thread)) {
 				counter->counts->scaled = -1;
 				perf_counts(counter->counts, cpu, thread)->ena = 0;
 				perf_counts(counter->counts, cpu, thread)->run = 0;
 				return -1;
 			}
 
+			count->loaded = false;
+
 			if (STAT_RECORD) {
 				if (perf_evsel__write_stat_event(counter, cpu, thread, count)) {
 					pr_err("failed to write stat event\n");
@@ -559,6 +577,11 @@ static int store_counter_ids(struct perf_evsel *counter)
 	return __store_counter_ids(counter, cpus, threads);
 }
 
+static bool perf_evsel__should_store_id(struct perf_evsel *counter)
+{
+	return STAT_RECORD || counter->attr.read_format & PERF_FORMAT_ID;
+}
+
 static int __run_perf_stat(int argc, const char **argv)
 {
 	int interval = stat_config.interval;
@@ -631,7 +654,8 @@ static int __run_perf_stat(int argc, const char **argv)
 		if (l > unit_width)
 			unit_width = l;
 
-		if (STAT_RECORD && store_counter_ids(counter))
+		if (perf_evsel__should_store_id(counter) &&
+		    store_counter_ids(counter))
 			return -1;
 	}
 
diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h
index 34d8baaf558a..cb45a6aecf9d 100644
--- a/tools/perf/util/counts.h
+++ b/tools/perf/util/counts.h
@@ -12,6 +12,7 @@ struct perf_counts_values {
 		};
 		u64 values[3];
 	};
+	bool	loaded;
 };
 
 struct perf_counts {
diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c
index 89aecf3a35c7..3735c9e0080d 100644
--- a/tools/perf/util/evsel.c
+++ b/tools/perf/util/evsel.c
@@ -49,6 +49,7 @@ static struct {
 	bool clockid_wrong;
 	bool lbr_flags;
 	bool write_backward;
+	bool group_read;
 } perf_missing_features;
 
 static clockid_t clockid;
@@ -1321,6 +1322,7 @@ perf_evsel__set_count(struct perf_evsel *counter, int cpu, int thread,
 	count->val    = val;
 	count->ena    = ena;
 	count->run    = run;
+	count->loaded = true;
 }
 
 static int
@@ -1677,6 +1679,8 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 	if (perf_missing_features.lbr_flags)
 		evsel->attr.branch_sample_type &= ~(PERF_SAMPLE_BRANCH_NO_FLAGS |
 				     PERF_SAMPLE_BRANCH_NO_CYCLES);
+	if (perf_missing_features.group_read && evsel->attr.inherit)
+		evsel->attr.read_format &= ~(PERF_FORMAT_GROUP|PERF_FORMAT_ID);
 retry_sample_id:
 	if (perf_missing_features.sample_id_all)
 		evsel->attr.sample_id_all = 0;
@@ -1832,6 +1836,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus,
 		perf_missing_features.lbr_flags = true;
 		pr_debug2("switching off branch sample type no (cycles/flags)\n");
 		goto fallback_missing_features;
+	} else if (!perf_missing_features.group_read &&
+		    evsel->attr.inherit &&
+		   (evsel->attr.read_format & PERF_FORMAT_GROUP)) {
+		perf_missing_features.group_read = true;
+		pr_debug2("switching off group read\n");
+		goto fallback_missing_features;
 	}
 out_close:
 	do {
-- 
2.9.4

  parent reply	other threads:[~2017-07-28 20:00 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2017-07-28 20:00 [GIT PULL 00/15] perf/core improvements and fixes Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 01/15] perf tools: Add perf_evsel__read_size function Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 02/15] perf evsel: Add read_counter() Arnaldo Carvalho de Melo
2017-07-28 20:00 ` Arnaldo Carvalho de Melo [this message]
2017-07-28 20:00 ` [PATCH 04/15] perf annotate: Do not overwrite perf_sample->weight Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 05/15] perf sort: Use default sort if evlist is empty Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 06/15] perf annotate stdio: Set enough columns for --show-total-period Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 07/15] perf annotate: Fix storing per line sym_hist_entry Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 08/15] perf annotate TUI: Use sym_hist_entry in disasm_line_samples Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 09/15] perf annotate TUI: Fix --show-total-period Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 10/15] perf annotate TUI: Clarify calculation of column header widths Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 11/15] perf annotate TUI: Fix column header when toggling period/percent Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 12/15] perf annotate TUI: Set appropriate column width for period/percent Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 13/15] perf data: Add callchain to CTF conversion Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 14/15] perf data: Add mmap[2] events " Arnaldo Carvalho de Melo
2017-07-28 20:00 ` [PATCH 15/15] perf data: Add doc when no conversion support compiled Arnaldo Carvalho de Melo
2017-07-30  9:31 ` [GIT PULL 00/15] perf/core improvements and fixes Ingo Molnar
2017-07-30  9:37 ` [PATCH] perf build: Clarify header version warning message Ingo Molnar
2017-07-30  9:51 ` perf build: Clarify open-coded " Ingo Molnar
2017-07-30  9:52 ` tools/include: Sync kernel ABI headers with tooling headers Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20170728200021.11389-4-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@redhat.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=andi@firstfloor.org \
    --cc=dsahern@gmail.com \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).