linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Andi Kleen <andi@firstfloor.org>
To: linux-kernel@vger.kernel.org
Cc: x86@kernel.org, a.p.zijlstra@chello.nl, eranian@google.com,
	acme@redhat.com, Andi Kleen <ak@linux.intel.com>
Subject: [PATCH 25/31] perf, tools: Add perf stat --transaction
Date: Thu, 27 Sep 2012 21:31:30 -0700	[thread overview]
Message-ID: <1348806696-31170-26-git-send-email-andi@firstfloor.org> (raw)
In-Reply-To: <1348806696-31170-1-git-send-email-andi@firstfloor.org>

From: Andi Kleen <ak@linux.intel.com>

Add support to perf stat to print the basic transactional execution statistics:
Total cycles, Cycles in Transaction, Cycles in aborted transsactions
using the intx and intx_checkpoint qualifiers.
Transaction Starts and Elision Starts, to compute the average transaction length.

This is a reasonable overview over the success of the transactions.

Enable with a new --transaction / -T option.

This requires measuring these events in a group, since they depend on each
other

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 tools/perf/Documentation/perf-stat.txt |    3 +
 tools/perf/builtin-stat.c              |  104 +++++++++++++++++++++++++++++---
 2 files changed, 99 insertions(+), 8 deletions(-)

diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt
index 2fa173b..6e55bd9 100644
--- a/tools/perf/Documentation/perf-stat.txt
+++ b/tools/perf/Documentation/perf-stat.txt
@@ -108,7 +108,10 @@ with it.  --append may be used here.  Examples:
      3>results  perf stat --log-fd 3          -- $cmd
      3>>results perf stat --log-fd 3 --append -- $cmd
 
+-T::
+--transaction::
 
+Print statistics of transactional execution.  Implies --group.
 
 EXAMPLES
 --------
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 861f0ae..2364605 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -64,6 +64,9 @@
 #define CNTR_NOT_SUPPORTED	"<not supported>"
 #define CNTR_NOT_COUNTED	"<not counted>"
 
+#define is_intx(e)		((e)->attr.intx && !(e)->attr.intx_checkpointed)
+#define is_intx_cp(e)		((e)->attr.intx && (e)->attr.intx_checkpointed)
+
 static struct perf_event_attr default_attrs[] = {
 
   { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
@@ -171,7 +174,21 @@ static struct perf_event_attr very_very_detailed_attrs[] = {
 	(PERF_COUNT_HW_CACHE_RESULT_MISS	<< 16)				},
 };
 
+/*
+ * Transactional memory stats (-T)
+ * Must run as a group.
+ */
+static struct perf_event_attr transaction_attrs[] = {
+  { .type = PERF_TYPE_SOFTWARE, .config = PERF_COUNT_SW_TASK_CLOCK		},
 
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_INSTRUCTIONS		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES		},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, .intx = 1	},
+  { .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES,
+    .intx = 1, .intx_checkpointed = 1 },
+  { .type = PERF_TYPE_HW_TRANSACTION, .config = PERF_COUNT_HW_TRANSACTION_START	},
+  { .type = PERF_TYPE_HW_TRANSACTION, .config = PERF_COUNT_HW_ELISION_START	},
+};
 
 static struct perf_evlist	*evsel_list;
 
@@ -187,6 +204,7 @@ static bool			no_aggr				= false;
 static pid_t			child_pid			= -1;
 static bool			null_run			=  false;
 static int			detailed_run			=  0;
+static bool			transaction_run			=  false;
 static bool			sync_run			=  false;
 static bool			big_num				=  true;
 static int			big_num_opt			=  -1;
@@ -275,7 +293,11 @@ static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
 static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
 static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_intx_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_intxcp_stats[MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
+static struct stats runtime_transaction_stats[MAX_NR_CPUS];
+static struct stats runtime_elision_stats[MAX_NR_CPUS];
 
 static int create_perf_stat_counter(struct perf_evsel *evsel,
 				    struct perf_evsel *first)
@@ -350,10 +372,18 @@ static void update_shadow_stats(struct perf_evsel *counter, u64 *count)
 {
 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
 		update_stats(&runtime_nsecs_stats[0], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[0], count[0]);
-	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) {
+		if (is_intx(counter))
+			update_stats(&runtime_cycles_intx_stats[0], count[0]);
+		else if (is_intx_cp(counter))
+			update_stats(&runtime_cycles_intxcp_stats[0], count[0]);
+		else
+			update_stats(&runtime_cycles_stats[0], count[0]);
+	} else if (perf_evsel__match(counter, HW_TRANSACTION,
+				     HW_TRANSACTION_START))
+		update_stats(&runtime_transaction_stats[0], count[0]);
+	else if (perf_evsel__match(counter, HW_TRANSACTION, HW_ELISION_START))
+		update_stats(&runtime_elision_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
 		update_stats(&runtime_stalled_cycles_back_stats[0], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
@@ -774,7 +804,7 @@ static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel __used, doub
 
 static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 {
-	double total, ratio = 0.0;
+	double total, ratio = 0.0, total2;
 	char cpustr[16] = { '\0', };
 	const char *fmt;
 
@@ -868,12 +898,50 @@ static void abs_printout(int cpu, struct perf_evsel *evsel, double avg)
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) {
 		print_stalled_cycles_backend(cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) {
-		total = avg_stats(&runtime_nsecs_stats[cpu]);
+		if (is_intx(evsel)) {
+			total = avg_stats(&runtime_cycles_stats[cpu]);
+			if (total)
+				fprintf(output,
+					" #   %5.2f%% transactional          ",
+					100.0 * (avg / total));
+		} else if (is_intx_cp(evsel)) {
+			total = avg_stats(&runtime_cycles_stats[cpu]);
+			total2 = avg_stats(&runtime_cycles_intx_stats[cpu]);
+			if (total)
+				fprintf(output,
+					" #   %5.2f%% aborted cycles         ",
+					100.0 * ((total2-avg) / total));
+		} else {
+			total = avg_stats(&runtime_nsecs_stats[cpu]);
+
+			if (total)
+				ratio = 1.0 * avg / total;
+
+			fprintf(output, " # %8.3f GHz                    ", 
+					ratio);
+		}
+	} else if (perf_evsel__match(evsel, HW_TRANSACTION,
+				     HW_TRANSACTION_START) &&
+		   avg > 0 &&
+		   runtime_cycles_intx_stats[cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_intx_stats[cpu]);
+
+		if (total)
+			ratio = total / avg;
+
+		fprintf(output, " # %8.0f cycles / transaction ", ratio);
+
+	} else if (perf_evsel__match(evsel, HW_TRANSACTION,
+				      HW_ELISION_START) &&
+		   avg > 0 &&
+		   runtime_cycles_intx_stats[cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_intx_stats[cpu]);
 
 		if (total)
-			ratio = 1.0 * avg / total;
+			ratio = total / avg;
+
+		fprintf(output, " # %8.0f cycles / elision     ", ratio);
 
-		fprintf(output, " # %8.3f GHz                    ", ratio);
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		char unit = 'M';
 
@@ -1068,6 +1136,16 @@ static int stat__set_big_num(const struct option *opt __used,
 	return 0;
 }
 
+/* Must force groups for transactions */
+static int stat__parse_transaction(const struct option *opt __used,
+				   const char *str __used,
+				   int unset __used)
+{
+	transaction_run = true;
+	group = true;
+	return 0;
+}
+
 static bool append_file;
 
 static const struct option options[] = {
@@ -1115,6 +1193,9 @@ static const struct option options[] = {
 	OPT_BOOLEAN(0, "append", &append_file, "append to the output file"),
 	OPT_INTEGER(0, "log-fd", &output_fd,
 		    "log output to fd, instead of stderr"),
+	OPT_CALLBACK_NOOPT('T', "transaction", NULL, NULL,
+		     "capture hardware transaction success",
+		     stat__parse_transaction),
 	OPT_END()
 };
 
@@ -1128,6 +1209,13 @@ static int add_default_attributes(void)
 	if (null_run)
 		return 0;
 
+	if (transaction_run) {
+		if (perf_evlist__add_attrs_array(evsel_list, 
+						 transaction_attrs) < 0)
+			return -1;
+		return 0;
+	}
+
 	if (!evsel_list->nr_entries) {
 		if (perf_evlist__add_default_attrs(evsel_list, default_attrs) < 0)
 			return -1;
-- 
1.7.7.6


  parent reply	other threads:[~2012-09-28  4:34 UTC|newest]

Thread overview: 58+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2012-09-28  4:31 perf PMU support for Haswell Andi Kleen
2012-09-28  4:31 ` [PATCH 01/31] perf, x86: Add PEBSv2 record support Andi Kleen
2012-09-28  8:43   ` Peter Zijlstra
2012-09-28  8:54     ` Stephane Eranian
2012-09-28  9:28       ` Peter Zijlstra
2012-09-28 11:33         ` Stephane Eranian
2012-09-28 14:42     ` Andi Kleen
2012-09-28  4:31 ` [PATCH 02/31] perf, x86: Basic Haswell PMU support Andi Kleen
2012-09-28  9:05   ` Peter Zijlstra
2012-09-28 14:58     ` Andi Kleen
     [not found]       ` <CABPqkBQ90Crh+EpRQq0Y+xUvrj5vzrX_=SpJQyR4p8uFR_Hr=Q@mail.gmail.com>
2012-09-28 15:21         ` Peter Zijlstra
2012-09-28 15:23         ` Andi Kleen
2012-09-28  4:31 ` [PATCH 03/31] perf, x86: Basic Haswell PEBS support Andi Kleen
2012-09-28  8:50   ` Peter Zijlstra
2012-09-28  4:31 ` [PATCH 04/31] perf, core: Add generic intx/intx_checkpointed counter modifiers Andi Kleen
2012-09-28  9:02   ` Peter Zijlstra
2012-09-28 11:35     ` Stephane Eranian
2012-09-28 14:53     ` Andi Kleen
2012-09-28 15:19       ` Peter Zijlstra
2012-09-28 15:29         ` Andi Kleen
2012-09-28 15:36           ` Peter Zijlstra
2012-09-28 15:23       ` Peter Zijlstra
2012-09-28 15:37         ` Andi Kleen
2012-09-28  4:31 ` [PATCH 05/31] perf, tools: Add :c,:t event modifiers in perf tools Andi Kleen
2012-09-28  4:31 ` [PATCH 06/31] perf, tools: Add intx/intx_checkpoint to perf script and header printing Andi Kleen
2012-09-28  4:31 ` [PATCH 07/31] perf, x86: Implement the :t and :c qualifiers for Haswell Andi Kleen
2012-09-28  4:31 ` [PATCH 08/31] perf, x86: Report PEBS event in a raw format Andi Kleen
2012-09-28  8:54   ` Peter Zijlstra
2012-09-28  8:57     ` Stephane Eranian
2012-09-28  4:31 ` [PATCH 09/31] perf, kvm: Support :t and :c perf modifiers in KVM arch perfmon emulation Andi Kleen
2012-09-28  4:31 ` [PATCH 10/31] perf, x86: Support PERF_SAMPLE_ADDR on Haswell Andi Kleen
2012-09-28  4:31 ` [PATCH 11/31] perf, x86: Support Haswell v4 LBR format Andi Kleen
2012-09-28  4:31 ` [PATCH 12/31] perf, x86: Disable LBR recording for unknown LBR_FMT Andi Kleen
2012-09-28  4:31 ` [PATCH 13/31] perf, x86: Support LBR filtering by INTX/NOTX/ABORT Andi Kleen
2012-09-28  4:31 ` [PATCH 14/31] perf, tools: Add abort,notx,intx branch filter options to perf report -j Andi Kleen
2012-09-28  4:31 ` [PATCH 15/31] perf, tools: Support sorting by intx, abort branch flags Andi Kleen
2012-09-28  4:31 ` [PATCH 16/31] perf, x86: Support full width counting on Haswell Andi Kleen
2012-09-28  4:31 ` [PATCH 17/31] perf, x86: Avoid checkpointed counters causing excessive TSX aborts Andi Kleen
2012-09-28  4:31 ` [PATCH 18/31] perf, core: Add a concept of a weightened sample Andi Kleen
2012-09-28  9:06   ` Stephane Eranian
2012-09-28 14:57     ` Andi Kleen
2012-09-28 17:09       ` Stephane Eranian
2012-09-28  4:31 ` [PATCH 19/31] perf, x86: Support weight samples for PEBS Andi Kleen
2012-09-28  4:31 ` [PATCH 20/31] perf, tools: Add support for weight Andi Kleen
2012-09-28  4:31 ` [PATCH 21/31] perf, tools: Handle XBEGIN like a jump Andi Kleen
2012-09-28  4:31 ` [PATCH 22/31] perf, core: Define generic hardware transaction events Andi Kleen
2012-09-28  9:33   ` Peter Zijlstra
2012-09-28  4:31 ` [PATCH 23/31] perf, tools: Add support for generic transaction events to perf userspace Andi Kleen
2012-09-28  4:31 ` [PATCH 24/31] perf, x86: Add the Haswell implementation of the generic transaction events Andi Kleen
2012-09-28  4:31 ` Andi Kleen [this message]
2012-09-28  4:31 ` [PATCH 26/31] perf, x86: Support for printing PMU state on spurious PMIs Andi Kleen
2012-09-28  9:36   ` Peter Zijlstra
2012-09-28 11:39     ` Stephane Eranian
2012-09-28  4:31 ` [PATCH 27/31] perf, core: Add generic transaction flags Andi Kleen
2012-09-28  4:31 ` [PATCH 28/31] perf, x86: Add Haswell specific transaction flag reporting Andi Kleen
2012-09-28  4:31 ` [PATCH 29/31] perf, tools: Add support for record transaction flags Andi Kleen
2012-09-28  4:31 ` [PATCH 30/31] perf, tools: Point --sort documentation to --help Andi Kleen
2012-09-28  4:31 ` [PATCH 31/31] perf, tools: Add browser support for transaction flags Andi Kleen

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1348806696-31170-26-git-send-email-andi@firstfloor.org \
    --to=andi@firstfloor.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).