All of lore.kernel.org
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: linux-kernel@vger.kernel.org
Cc: acme@redhat.com, peterz@infradead.org, jolsa@redhat.com,
	eranian@google.com, mingo@kernel.org,
	Andi Kleen <ak@linux.intel.com>
Subject: [PATCH 29/33] perf, tools: Add perf stat --transaction v2
Date: Fri, 26 Oct 2012 13:30:11 -0700	[thread overview]
Message-ID: <1351283415-13170-30-git-send-email-andi@firstfloor.org> (raw)
In-Reply-To: <1351283415-13170-1-git-send-email-andi@firstfloor.org>

From: Andi Kleen <ak@linux.intel.com>

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the intx and intx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other.

This is implemented by using TM sysfs events exported by the kernel

v2: Only print the extended statistics when the option is enabled.
This avoids negative output when the user specifies the -T events
in separate groups.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |    3 +
 tools/perf/builtin-stat.c              |  101 +++++++++++++++++++++++++++++++-
 tools/perf/util/evsel.h                |    6 ++
 3 files changed, 107 insertions(+), 3 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2fa173b..653bdbd 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -108,7 +108,10 @@ with it.  --append may be used here.  Examples:
      3>results  perf stat --log-fd 3          -- $cmd
      3>>results perf stat --log-fd 3 --append -- $cmd
 
+-T::
+--transaction::
 
+Print statistics of transactional execution if supported.
 
 EXAMPLES
 --------
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 93b9011..a451490 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -64,6 +64,29 @@
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
 
+static const char *transaction_attrs[] = {
+	"task-clock",
+	"{"
+	"instructions,"
+	"cycles,"
+	"cpu/cycles-t/,"
+	"cpu/cycles-ct/,"
+	"cpu/tx-start/,"
+	"cpu/el-start/"
+	"}"
+};
+
+/* must match the transaction_attrs above */
+enum {
+	T_TASK_CLOCK,
+	T_INSTRUCTIONS,
+	T_CYCLES,
+	T_CYCLES_INTX,
+	T_CYCLES_INTX_CP,
+	T_TRANSACTION_START,
+	T_ELISION_START
+};
+
 static struct perf_evlist	*evsel_list;
 
 static struct perf_target	target = {
@@ -77,6 +100,7 @@ static bool			no_aggr				= false;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
+static bool			transaction_run			=  false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
 static const char		*csv_sep			= NULL;
@@ -123,7 +147,11 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_intx_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_intxcp_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static int create_perf_stat_counter(struct perf_evsel *evsel,
 				    struct perf_evsel *first)
@@ -183,6 +211,18 @@ static inline int nsec_counter(struct perf_evsel *evsel)
 	return 0;
 }
 
+static struct perf_evsel *nth_evsel(int n)
+{
+	struct perf_evsel *ev;
+	int j;
+
+	j = 0;
+	list_for_each_entry (ev, &evsel_list->entries, node)
+		if (j++ == n)
+			return ev;
+	return NULL;
+}
+
 /*
  * Update various tracking values we maintain to print
  * more semantic information such as miss/hit ratios,
@@ -194,8 +234,14 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 		update_stats(&runtime_nsecs_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
 		update_stats(&runtime_cycles_stats[0], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
+	else if (perf_evsel__cmp(counter, nth_evsel(T_CYCLES_INTX)))
+		update_stats(&runtime_cycles_intx_stats[0], count[0]);
+	else if (perf_evsel__cmp(counter, nth_evsel(T_CYCLES_INTX_CP)))
+		update_stats(&runtime_cycles_intxcp_stats[0], count[0]);
+	else if (perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
+		update_stats(&runtime_transaction_stats[0], count[0]);
+	else if (perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
+		update_stats(&runtime_elision_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
@@ -633,7 +679,7 @@ static void print_ll_cache_misses(int cpu,
 
 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
-	double total, ratio = 0.0;
+	double total, ratio = 0.0, total2;
 	char cpustr[16] = { '\0', };
 	const char *fmt;
 
@@ -733,6 +779,41 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 			ratio = 1.0 * avg / total;
 
 		fprintf(output, " # %8.3f GHz                    ", ratio);
+	} else if (perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_INTX)) && 
+		   transaction_run) {
+		total = avg_stats(&runtime_cycles_stats[cpu]);
+		if (total)
+			fprintf(output,
+				" #   %5.2f%% transactional cycles   ",
+				100.0 * (avg / total));
+	} else if (perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_INTX_CP)) &&
+		   transaction_run) {
+		total = avg_stats(&runtime_cycles_stats[cpu]);
+		total2 = avg_stats(&runtime_cycles_intx_stats[cpu]);
+		if (total)
+			fprintf(output,
+				" #   %5.2f%% aborted cycles         ",
+				100.0 * ((total2-avg) / total));
+	} else if (perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
+		   avg > 0 &&
+		   runtime_cycles_intx_stats[cpu].n != 0 &&
+		   transaction_run) {
+		total = avg_stats(&runtime_cycles_intx_stats[cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(output, " # %8.0f cycles / transaction ", ratio);
+	} else if (perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
+		   avg > 0 &&
+		   runtime_cycles_intx_stats[cpu].n != 0 &&
+		   transaction_run) {
+		total = avg_stats(&runtime_cycles_intx_stats[cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(output, " # %8.0f cycles / elision     ", ratio);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		char unit = 'M';
 
@@ -1039,6 +1120,18 @@ static int add_default_attributes(void)
 	if (null_run)
 		return 0;
 
+	if (transaction_run) {
+		unsigned i;
+
+		for (i = 0; i < ARRAY_SIZE(transaction_attrs); i++) {
+			if (parse_events(evsel_list, transaction_attrs[i], 0)) {
+				fprintf(stderr, "Cannot set up transaction events\n");
+				return -1;
+			}
+		}
+		return 0;
+	}
+
 	if (!evsel_list->nr_entries) {
 		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
 			return -1;
@@ -1114,6 +1207,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused)
 	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
 	OPT_INTEGER(0, "log-fd", &output_fd,
 		    "log output to fd, instead of stderr"),
+	OPT_BOOLEAN('T', "transaction", &transaction_run,
+		    "hardware transaction statistics"),
 	OPT_END()
 	};
 	const char * const stat_usage[] = {
diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h
index 6f94d6d..418405e 100644
--- a/tools/perf/util/evsel.h
+++ b/tools/perf/util/evsel.h
@@ -158,6 +158,12 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1,
 	       (e1->attr.config == e2->attr.config);
 }
 
+#define perf_evsel__cmp(a, b)			\
+	((a) &&					\
+	 (b) &&					\
+	 (a)->attr.type == (b)->attr.type &&	\
+	 (a)->attr.config == (b)->attr.config)
+
 int __perf_evsel__read_on_cpu(struct perf_evsel *evsel,
 			      int cpu, int thread, bool scale);
 
-- 
1.7.7.6


  parent reply	other threads:[~2012-10-26 20:36 UTC|newest]

Thread overview: 55+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-10-26 20:29 perf PMU support for Haswell v4 Andi Kleen
2012-10-26 20:29 ` [PATCH 01/33] perf, x86: Add PEBSv2 record support Andi Kleen
2012-10-29 10:08   ` Namhyung Kim
2012-10-29 10:13     ` Andi Kleen
2012-10-29 10:23     ` Peter Zijlstra
2012-10-26 20:29 ` [PATCH 02/33] perf, x86: Basic Haswell PMU support v2 Andi Kleen
2012-10-26 20:29 ` [PATCH 03/33] perf, x86: Basic Haswell PEBS support v3 Andi Kleen
2012-10-26 20:29 ` [PATCH 04/33] perf, x86: Support the TSX intx/intx_cp qualifiers v2 Andi Kleen
2012-10-26 20:29 ` [PATCH 05/33] perf, kvm: Support the intx/intx_cp modifiers in KVM arch perfmon emulation v3 Andi Kleen
2012-10-30  9:25   ` Gleb Natapov
2012-10-26 20:29 ` [PATCH 06/33] perf, x86: Support PERF_SAMPLE_ADDR on Haswell Andi Kleen
2012-10-26 20:29 ` [PATCH 07/33] perf, x86: Support Haswell v4 LBR format Andi Kleen
2012-10-26 20:29 ` [PATCH 08/33] perf, x86: Disable LBR recording for unknown LBR_FMT Andi Kleen
2012-10-26 20:29 ` [PATCH 09/33] perf, x86: Support LBR filtering by INTX/NOTX/ABORT v2 Andi Kleen
2012-10-26 20:29 ` [PATCH 10/33] perf, tools: Add abort,notx,intx branch filter options to perf report -j v2 Andi Kleen
2012-10-29 10:19   ` Namhyung Kim
2012-10-26 20:29 ` [PATCH 11/33] perf, tools: Support sorting by intx, abort branch flags Andi Kleen
2012-10-26 20:29 ` [PATCH 12/33] perf, x86: Support full width counting Andi Kleen
2012-10-26 20:29 ` [PATCH 13/33] perf, x86: Avoid checkpointed counters causing excessive TSX aborts v3 Andi Kleen
2012-10-26 20:29 ` [PATCH 14/33] perf, core: Add a concept of a weightened sample Andi Kleen
2012-10-26 20:29 ` [PATCH 15/33] perf, x86: Support weight samples for PEBS Andi Kleen
2012-10-26 20:29 ` [PATCH 16/33] perf, tools: Add support for weight v2 Andi Kleen
2012-10-29 10:44   ` Namhyung Kim
2012-10-29 11:02     ` Andi Kleen
2012-10-26 20:29 ` [PATCH 17/33] perf, tools: Handle XBEGIN like a jump Andi Kleen
2012-10-26 20:30 ` [PATCH 18/33] perf, x86: Support for printing PMU state on spurious PMIs v3 Andi Kleen
2012-10-26 20:30 ` [PATCH 19/33] perf, core: Add generic transaction flags Andi Kleen
2012-10-26 20:30 ` [PATCH 20/33] perf, x86: Add Haswell specific transaction flag reporting Andi Kleen
2012-10-26 20:30 ` [PATCH 21/33] perf, tools: Add support for record transaction flags Andi Kleen
2012-10-29 10:49   ` Namhyung Kim
2012-10-26 20:30 ` [PATCH 22/33] perf, tools: Point --sort documentation to --help Andi Kleen
2012-10-26 20:30 ` [PATCH 23/33] perf, tools: Add browser support for transaction flags Andi Kleen
2012-10-26 20:30 ` [PATCH 24/33] perf, tools: Move parse_events error printing to parse_events_options Andi Kleen
2012-10-27 19:08   ` Jiri Olsa
2012-10-30 11:58   ` [tip:perf/core] perf " tip-bot for Andi Kleen
2012-10-26 20:30 ` [PATCH 25/33] perf, tools: Support events with - in the name Andi Kleen
2012-10-27 19:32   ` Jiri Olsa
2012-10-26 20:30 ` [PATCH 26/33] perf, x86: Report the arch perfmon events in sysfs Andi Kleen
2012-10-26 20:30 ` [PATCH 27/33] tools, perf: Add a precise event qualifier Andi Kleen
2012-10-27 19:35   ` Jiri Olsa
2012-10-28 19:13     ` Andi Kleen
2012-10-28 19:24       ` Jiri Olsa
2012-10-28 20:06         ` Andi Kleen
2012-10-26 20:30 ` [PATCH 28/33] perf, x86: Add Haswell TSX event aliases Andi Kleen
2012-10-26 20:30 ` Andi Kleen [this message]
2012-10-26 20:30 ` [PATCH 30/33] perf, x86: Add a Haswell precise instructions event Andi Kleen
2012-10-26 20:30 ` [PATCH 31/33] perf, tools: Support generic events as pmu event names v2 Andi Kleen
2012-10-27 19:42   ` Jiri Olsa
2012-10-28 19:12     ` Andi Kleen
2012-10-29  9:23       ` Peter Zijlstra
2012-10-26 20:30 ` [PATCH 32/33] perf, tools: Default to cpu// for events v2 Andi Kleen
2012-10-27 20:16   ` Jiri Olsa
2012-10-26 20:30 ` [PATCH 33/33] perf, tools: List kernel supplied event aliases in perf list v2 Andi Kleen
2012-10-27 20:20   ` Jiri Olsa
2012-10-28 19:05     ` Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1351283415-13170-30-git-send-email-andi@firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.