linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@infradead.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org,
	Stephane Eranian <eranian@google.com>,
	Andi Kleen <ak@linux.intel.com>, Ingo Molnar <mingo@elte.hu>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung.kim@lge.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 10/17] perf stat: Add per processor socket count aggregation
Date: Wed,  6 Feb 2013 18:44:12 -0300	[thread overview]
Message-ID: <1360187059-12661-11-git-send-email-acme@infradead.org> (raw)
In-Reply-To: <1360187059-12661-1-git-send-email-acme@infradead.org>

From: Stephane Eranian <eranian@google.com>

This patch adds per-processor socket count aggregation for system-wide
mode measurements. This is a useful mode to detect imbalance between
sockets.

To enable this mode, use --aggr-socket in addition
to -a. (system-wide).

The output includes the socket number and the number of online
processors on that socket. This is useful to gauge the amount of
aggregation.

 # ./perf stat -I 1000 -a --aggr-socket -e cycles sleep 2
 #           time socket cpus             counts events
      1.000097680 S0        4          5,788,785 cycles
      2.000379943 S0        4         27,361,546 cycles
      2.001167808 S0        4            818,275 cycles

Signed-off-by: Stephane Eranian <eranian@google.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung.kim@lge.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1360161962-9675-3-git-send-email-eranian@google.com
[ committer note: Added missing man page entry based on above comments ]
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/Documentation/perf-stat.txt |   9 ++-
 tools/perf/builtin-stat.c              | 126 ++++++++++++++++++++++++++++++---
 2 files changed, 123 insertions(+), 12 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 5289da3..faf4f4f 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -116,9 +116,16 @@ perf stat --repeat 10 --null --sync --pre 'make -s O=defconfig-build/clean' -- m
 
 -I msecs::
 --interval-print msecs::
-	print count deltas every N milliseconds (minimum: 100ms)
+	Print count deltas every N milliseconds (minimum: 100ms)
 	example: perf stat -I 1000 -e cycles -a sleep 5
 
+--aggr-socket::
+Aggregate counts per processor socket for system-wide mode measurements.  This
+is a useful mode to detect imbalance between sockets.  To enable this mode,
+use --aggr-socket in addition to -a. (system-wide).  The output includes the
+socket number and the number of online processors on that socket. This is
+useful to gauge the amount of aggregation.
+
 EXAMPLES
 --------
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 0368a10..9984876 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -68,6 +68,7 @@
 static void print_stat(int argc, const char **argv);
 static void print_counter_aggr(struct perf_evsel *counter, char *prefix);
 static void print_counter(struct perf_evsel *counter, char *prefix);
+static void print_aggr_socket(char *prefix);
 
 static struct perf_evlist	*evsel_list;
 
@@ -79,6 +80,7 @@ static int			run_count			=  1;
 static bool			no_inherit			= false;
 static bool			scale				=  true;
 static bool			no_aggr				= false;
+static bool			aggr_socket			= false;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
@@ -93,6 +95,7 @@ static const char		*post_cmd			= NULL;
 static bool			sync_run			= false;
 static unsigned int		interval			= 0;
 static struct timespec		ref_time;
+static struct cpu_map		*sock_map;
 
 static volatile int done = 0;
 
@@ -312,7 +315,9 @@ static void print_interval(void)
 	sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep);
 
 	if (num_print_interval == 0 && !csv_output) {
-		if (no_aggr)
+		if (aggr_socket)
+			fprintf(output, "#           time socket cpus             counts events\n");
+		else if (no_aggr)
 			fprintf(output, "#           time CPU                 counts events\n");
 		else
 			fprintf(output, "#           time             counts events\n");
@@ -321,7 +326,9 @@ static void print_interval(void)
 	if (++num_print_interval == 25)
 		num_print_interval = 0;
 
-	if (no_aggr) {
+	if (aggr_socket)
+		print_aggr_socket(prefix);
+	else if (no_aggr) {
 		list_for_each_entry(counter, &evsel_list->entries, node)
 			print_counter(counter, prefix);
 	} else {
@@ -349,6 +356,12 @@ static int __run_perf_stat(int argc __maybe_unused, const char **argv)
 		ts.tv_nsec = 0;
 	}
 
+	if (aggr_socket
+	    && cpu_map__build_socket_map(evsel_list->cpus, &sock_map)) {
+		perror("cannot build socket map");
+		return -1;
+	}
+
 	if (forks && (pipe(child_ready_pipe) < 0 || pipe(go_pipe) < 0)) {
 		perror("failed to create pipes");
 		return -1;
@@ -529,13 +542,21 @@ static void print_noise(struct perf_evsel *evsel, double avg)
 	print_noise_pct(stddev_stats(&ps->res_stats[0]), avg);
 }
 
-static void nsec_printout(int cpu, struct perf_evsel *evsel, double avg)
+static void nsec_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 {
 	double msecs = avg / 1e6;
 	char cpustr[16] = { '\0', };
 	const char *fmt = csv_output ? "%s%.6f%s%s" : "%s%18.6f%s%-25s";
 
-	if (no_aggr)
+	if (aggr_socket)
+		sprintf(cpustr, "S%*d%s%*d%s",
+			csv_output ? 0 : -5,
+			cpu,
+			csv_sep,
+			csv_output ? 0 : 4,
+			nr,
+			csv_sep);
+	else if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
 			csv_output ? 0 : -4,
 			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -734,7 +755,7 @@ static void print_ll_cache_misses(int cpu,
 	fprintf(output, " of all LL-cache hits   ");
 }
 
-static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
+static void abs_printout(int cpu, int nr, struct perf_evsel *evsel, double avg)
 {
 	double total, ratio = 0.0;
 	char cpustr[16] = { '\0', };
@@ -747,7 +768,15 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 	else
 		fmt = "%s%18.0f%s%-25s";
 
-	if (no_aggr)
+	if (aggr_socket)
+		sprintf(cpustr, "S%*d%s%*d%s",
+			csv_output ? 0 : -5,
+			cpu,
+			csv_sep,
+			csv_output ? 0 : 4,
+			nr,
+			csv_sep);
+	else if (no_aggr)
 		sprintf(cpustr, "CPU%*d%s",
 			csv_output ? 0 : -4,
 			perf_evsel__cpus(evsel)->map[cpu], csv_sep);
@@ -853,6 +882,70 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 	}
 }
 
+static void print_aggr_socket(char *prefix)
+{
+	struct perf_evsel *counter;
+	u64 ena, run, val;
+	int cpu, s, s2, sock, nr;
+
+	if (!sock_map)
+		return;
+
+	for (s = 0; s < sock_map->nr; s++) {
+		sock = cpu_map__socket(sock_map, s);
+		list_for_each_entry(counter, &evsel_list->entries, node) {
+			val = ena = run = 0;
+			nr = 0;
+			for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) {
+				s2 = cpu_map__get_socket(evsel_list->cpus, cpu);
+				if (s2 != sock)
+					continue;
+				val += counter->counts->cpu[cpu].val;
+				ena += counter->counts->cpu[cpu].ena;
+				run += counter->counts->cpu[cpu].run;
+				nr++;
+			}
+			if (prefix)
+				fprintf(output, "%s", prefix);
+
+			if (run == 0 || ena == 0) {
+				fprintf(output, "S%*d%s%*d%s%*s%s%*s",
+					csv_output ? 0 : -5,
+					s,
+					csv_sep,
+					csv_output ? 0 : 4,
+					nr,
+					csv_sep,
+					csv_output ? 0 : 18,
+					counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED,
+					csv_sep,
+					csv_output ? 0 : -24,
+					perf_evsel__name(counter));
+				if (counter->cgrp)
+					fprintf(output, "%s%s",
+						csv_sep, counter->cgrp->name);
+
+				fputc('\n', output);
+				continue;
+			}
+
+			if (nsec_counter(counter))
+				nsec_printout(sock, nr, counter, val);
+			else
+				abs_printout(sock, nr, counter, val);
+
+			if (!csv_output) {
+				print_noise(counter, 1.0);
+
+				if (run != ena)
+					fprintf(output, "  (%.2f%%)",
+						100.0 * run / ena);
+			}
+			fputc('\n', output);
+		}
+	}
+}
+
 /*
  * Print out the results of a single counter:
  * aggregated counts in system-wide mode
@@ -882,9 +975,9 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix)
 	}
 
 	if (nsec_counter(counter))
-		nsec_printout(-1, counter, avg);
+		nsec_printout(-1, 0, counter, avg);
 	else
-		abs_printout(-1, counter, avg);
+		abs_printout(-1, 0, counter, avg);
 
 	print_noise(counter, avg);
 
@@ -940,9 +1033,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix)
 		}
 
 		if (nsec_counter(counter))
-			nsec_printout(cpu, counter, val);
+			nsec_printout(cpu, 0, counter, val);
 		else
-			abs_printout(cpu, counter, val);
+			abs_printout(cpu, 0, counter, val);
 
 		if (!csv_output) {
 			print_noise(counter, 1.0);
@@ -980,7 +1073,9 @@ static void print_stat(int argc, const char **argv)
 		fprintf(output, ":\n\n");
 	}
 
-	if (no_aggr) {
+	if (aggr_socket)
+		print_aggr_socket(NULL);
+	else if (no_aggr) {
 		list_for_each_entry(counter, &evsel_list->entries, node)
 			print_counter(counter, NULL);
 	} else {
@@ -1228,6 +1323,7 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 			"command to run after to the measured command"),
 	OPT_UINTEGER('I', "interval-print", &interval,
 		    "print counts at regular interval in ms (>= 100)"),
+	OPT_BOOLEAN(0, "aggr-socket", &aggr_socket, "aggregate counts per processor socket"),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
@@ -1314,6 +1410,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 		usage_with_options(stat_usage, options);
 	}
 
+	if (aggr_socket) {
+		if (!perf_target__has_cpu(&target)) {
+			fprintf(stderr, "--aggr-socket only available in system-wide mode (-a)\n");
+			usage_with_options(stat_usage, options);
+		}
+		no_aggr = true;
+	}
+
 	if (add_default_attributes())
 		goto out;
 
-- 
1.8.1.1.361.gec3ae6e


  parent reply	other threads:[~2013-02-06 21:45 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2013-02-06 21:44 [GIT PULL 00/17] perf/core improvements and fixes Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 01/17] perf hists browser: Add option for runtime switching perf data file Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 02/17] perf report: Enable the runtime switching of " Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 03/17] perf evlist: Fix set event list leader Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 04/17] perf tools: Check for flex and bison before continuing building Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 05/17] perf sort: Drop ip_[lr] arguments from _sort__sym_cmp() Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 06/17] perf sort: Make setup_sorting returns an error code Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 07/17] perf sort: Check return value of strdup() Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 08/17] perf evlist: Make event_copy local to mmaps Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 09/17] perf tools: Add cpu_map processor socket level functions Arnaldo Carvalho de Melo
2013-02-06 21:44 ` Arnaldo Carvalho de Melo [this message]
2013-02-06 21:44 ` [PATCH 11/17] perf hists browser: Add support to display whole group data for raw columns Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 12/17] perf perl scripts: Fix SIGALRM and pipe read race for rwtop Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 13/17] perf tools: Fix perf_evsel::exclude_GH handling Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 14/17] perf tests: Adding automated parsing tests for group :GH modifiers Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 15/17] perf tools: Fix calloc argument ordering Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 16/17] perf evlist: Pass the event_group info via perf_attr_details Arnaldo Carvalho de Melo
2013-02-06 21:44 ` [PATCH 17/17] perf python: Link with sysfs.o Arnaldo Carvalho de Melo
2013-02-06 21:51 ` [GIT PULL 00/17] perf/core improvements and fixes Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1360187059-12661-11-git-send-email-acme@infradead.org \
    --to=acme@infradead.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=mingo@kernel.org \
    --cc=namhyung.kim@lge.com \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).