public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Jiri Olsa <jolsa@kernel.org>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: lkml <linux-kernel@vger.kernel.org>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	Paul Mackerras <paulus@samba.org>,
	David Ahern <dsahern@gmail.com>,
	Namhyung Kim <namhyung@kernel.org>,
	Ingo Molnar <mingo@kernel.org>, Andi Kleen <andi@firstfloor.org>,
	William Cohen <wcohen@redhat.com>
Subject: [PATCH 2/6] perf stat: Fix metrics calculation with event qualifiers
Date: Tue,  7 Apr 2015 23:25:15 +0200	[thread overview]
Message-ID: <1428441919-23099-3-git-send-email-jolsa@kernel.org> (raw)
In-Reply-To: <1428441919-23099-1-git-send-email-jolsa@kernel.org>

From: Andi Kleen <ak@linux.intel.com>

Currently in perf IPC and other metrics cannot be directly shown
separately for both user and kernel in a single run. The problem was
that the metrics matching code did not check event qualifiers.

With this patch the following case works correctly.

% perf stat -e cycles:k,cycles:u,instructions:k,instructions:u true

 Performance counter stats for 'true':

           531,718      cycles:k
           203,895      cycles:u
           338,151      instructions:k            #    0.64  insns per cycle
           105,961      instructions:u            #    0.52  insns per cycle

       0.002989739 seconds time elapsed

Previously it would misreport the ratios because they were matching
the wrong value.

The patch is fairly big, but quite mechanic as it just
adds context indexes everywhere.

Reported-by: William Cohen <wcohen@redhat.com>
Signed-off-by: Andi Kleen <ak@linux.intel.com>
Link: http://lkml.kernel.org/n/tip-pfvwqm9cppgn45m90punnmib@git.kernel.org
Signed-off-by: Jiri Olsa <jolsa@kernel.org>
---
 tools/perf/builtin-stat.c | 129 +++++++++++++++++++++++++++-------------------
 1 file changed, 77 insertions(+), 52 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index f7b8218785f6..0b163ca4034f 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -247,21 +247,35 @@ out_free:
 	return -1;
 }
 
+#define NUM_CTX 3
+
+enum { CTX_USER, CTX_KERNEL, CTX_ALL };
+
 static struct stats runtime_nsecs_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_stats[MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS];
-static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS];
-static struct stats runtime_branches_stats[MAX_NR_CPUS];
-static struct stats runtime_cacherefs_stats[MAX_NR_CPUS];
-static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS];
-static struct stats runtime_l1_icache_stats[MAX_NR_CPUS];
-static struct stats runtime_ll_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS];
-static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS];
+static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS];
 static struct stats walltime_nsecs_stats;
-static struct stats runtime_transaction_stats[MAX_NR_CPUS];
-static struct stats runtime_elision_stats[MAX_NR_CPUS];
+static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS];
+static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS];
+
+static int evsel_context(struct perf_evsel *evsel)
+{
+	if (evsel->attr.exclude_kernel)
+		return CTX_USER;
+	if (evsel->attr.exclude_user)
+		return CTX_KERNEL;
+	/* Handle hypervisor too? */
+	return CTX_ALL;
+}
 
 static void perf_stat__reset_stats(struct perf_evlist *evlist)
 {
@@ -356,37 +370,39 @@ static struct perf_evsel *nth_evsel(int n)
 static void update_shadow_stats(struct perf_evsel *counter, u64 *count,
 				int cpu)
 {
+	int ctx = evsel_context(counter);
+
 	if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK))
 		update_stats(&runtime_nsecs_stats[cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES))
-		update_stats(&runtime_cycles_stats[cpu], count[0]);
+		update_stats(&runtime_cycles_stats[ctx][cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX)))
-		update_stats(&runtime_cycles_in_tx_stats[cpu], count[0]);
+		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START)))
-		update_stats(&runtime_transaction_stats[cpu], count[0]);
+		update_stats(&runtime_transaction_stats[ctx][cpu], count[0]);
 	else if (transaction_run &&
 		 perf_evsel__cmp(counter, nth_evsel(T_ELISION_START)))
-		update_stats(&runtime_elision_stats[cpu], count[0]);
+		update_stats(&runtime_elision_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND))
-		update_stats(&runtime_stalled_cycles_front_stats[cpu], count[0]);
+		update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND))
-		update_stats(&runtime_stalled_cycles_back_stats[cpu], count[0]);
+		update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS))
-		update_stats(&runtime_branches_stats[cpu], count[0]);
+		update_stats(&runtime_branches_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES))
-		update_stats(&runtime_cacherefs_stats[cpu], count[0]);
+		update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D))
-		update_stats(&runtime_l1_dcache_stats[cpu], count[0]);
+		update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I))
-		update_stats(&runtime_l1_icache_stats[cpu], count[0]);
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL))
-		update_stats(&runtime_ll_cache_stats[cpu], count[0]);
+		update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB))
-		update_stats(&runtime_dtlb_cache_stats[cpu], count[0]);
+		update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]);
 	else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB))
-		update_stats(&runtime_itlb_cache_stats[cpu], count[0]);
+		update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]);
 }
 
 static void zero_per_pkg(struct perf_evsel *counter)
@@ -908,8 +924,9 @@ static void print_stalled_cycles_frontend(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_cycles_stats[cpu]);
+	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -927,8 +944,9 @@ static void print_stalled_cycles_backend(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_cycles_stats[cpu]);
+	total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -946,8 +964,9 @@ static void print_branch_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_branches_stats[cpu]);
+	total = avg_stats(&runtime_branches_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -965,8 +984,9 @@ static void print_l1_dcache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_l1_dcache_stats[cpu]);
+	total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -984,8 +1004,9 @@ static void print_l1_icache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_l1_icache_stats[cpu]);
+	total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -1003,8 +1024,9 @@ static void print_dtlb_cache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_dtlb_cache_stats[cpu]);
+	total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -1022,8 +1044,9 @@ static void print_itlb_cache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_itlb_cache_stats[cpu]);
+	total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -1041,8 +1064,9 @@ static void print_ll_cache_misses(int cpu,
 {
 	double total, ratio = 0.0;
 	const char *color;
+	int ctx = evsel_context(evsel);
 
-	total = avg_stats(&runtime_ll_cache_stats[cpu]);
+	total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]);
 
 	if (total)
 		ratio = avg / total * 100.0;
@@ -1060,6 +1084,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	double sc =  evsel->scale;
 	const char *fmt;
 	int cpu = cpu_map__id_to_cpu(id);
+	int ctx = evsel_context(evsel);
 
 	if (csv_output) {
 		fmt = sc != 1.0 ?  "%.2f%s" : "%.0f%s";
@@ -1091,15 +1116,15 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		return;
 
 	if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) {
-		total = avg_stats(&runtime_cycles_stats[cpu]);
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total) {
 			ratio = avg / total;
 			fprintf(output, " #   %5.2f  insns per cycle        ", ratio);
 		} else {
 			fprintf(output, "                                   ");
 		}
-		total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]);
-		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu]));
+		total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]);
+		total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu]));
 
 		if (total && avg) {
 			ratio = total / avg;
@@ -1110,46 +1135,46 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		}
 
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
-			runtime_branches_stats[cpu].n != 0) {
+			runtime_branches_stats[ctx][cpu].n != 0) {
 		print_branch_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1D |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_dcache_stats[cpu].n != 0) {
+			runtime_l1_dcache_stats[ctx][cpu].n != 0) {
 		print_l1_dcache_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_L1I |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_l1_icache_stats[cpu].n != 0) {
+			runtime_l1_icache_stats[ctx][cpu].n != 0) {
 		print_l1_icache_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_DTLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_dtlb_cache_stats[cpu].n != 0) {
+			runtime_dtlb_cache_stats[ctx][cpu].n != 0) {
 		print_dtlb_cache_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_ITLB |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_itlb_cache_stats[cpu].n != 0) {
+			runtime_itlb_cache_stats[ctx][cpu].n != 0) {
 		print_itlb_cache_misses(cpu, evsel, avg);
 	} else if (
 		evsel->attr.type == PERF_TYPE_HW_CACHE &&
 		evsel->attr.config ==  ( PERF_COUNT_HW_CACHE_LL |
 					((PERF_COUNT_HW_CACHE_OP_READ) << 8) |
 					((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) &&
-			runtime_ll_cache_stats[cpu].n != 0) {
+			runtime_ll_cache_stats[ctx][cpu].n != 0) {
 		print_ll_cache_misses(cpu, evsel, avg);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) &&
-			runtime_cacherefs_stats[cpu].n != 0) {
-		total = avg_stats(&runtime_cacherefs_stats[cpu]);
+			runtime_cacherefs_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]);
 
 		if (total)
 			ratio = avg * 100 / total;
@@ -1171,15 +1196,15 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 		}
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) {
-		total = avg_stats(&runtime_cycles_stats[cpu]);
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
 		if (total)
 			fprintf(output,
 				" #   %5.2f%% transactional cycles   ",
 				100.0 * (avg / total));
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) {
-		total = avg_stats(&runtime_cycles_stats[cpu]);
-		total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+		total = avg_stats(&runtime_cycles_stats[ctx][cpu]);
+		total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 		if (total2 < avg)
 			total2 = avg;
 		if (total)
@@ -1189,8 +1214,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) &&
 		   avg > 0 &&
-		   runtime_cycles_in_tx_stats[cpu].n != 0) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
 		if (total)
 			ratio = total / avg;
@@ -1199,8 +1224,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg)
 	} else if (transaction_run &&
 		   perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) &&
 		   avg > 0 &&
-		   runtime_cycles_in_tx_stats[cpu].n != 0) {
-		total = avg_stats(&runtime_cycles_in_tx_stats[cpu]);
+		   runtime_cycles_in_tx_stats[ctx][cpu].n != 0) {
+		total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]);
 
 		if (total)
 			ratio = total / avg;
-- 
1.9.3


  parent reply	other threads:[~2015-04-07 21:27 UTC|newest]

Thread overview: 19+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-07 21:25 [RFC 0/6] perf stat: Metrics calculation fix Jiri Olsa
2015-04-07 21:25 ` [PATCH 1/6] perf tools: Add 'I' event modifier for exclude_idle bit Jiri Olsa
2015-04-08 12:56   ` Arnaldo Carvalho de Melo
2015-04-08 15:15   ` [tip:perf/core] " tip-bot for Jiri Olsa
2015-04-07 21:25 ` Jiri Olsa [this message]
2015-04-08 13:28   ` [PATCH 2/6] perf stat: Fix metrics calculation with event qualifiers Namhyung Kim
2015-04-23 22:15     ` Arnaldo Carvalho de Melo
2015-05-06  3:07   ` [tip:perf/core] " tip-bot for Andi Kleen
2015-04-07 21:25 ` [PATCH 3/6] perf stat: Change metrics context calculation Jiri Olsa
2015-05-06  3:08   ` [tip:perf/core] " tip-bot for Jiri Olsa
2015-04-07 21:25 ` [PATCH 4/6] perf stat: Add metrics support for exclude_hv Jiri Olsa
2015-05-06  3:08   ` [tip:perf/core] " tip-bot for Jiri Olsa
2015-04-07 21:25 ` [PATCH 5/6] perf stat: Add metrics support for exclude_(host|guest) Jiri Olsa
2015-05-06  3:08   ` [tip:perf/core] perf stat: Add metrics support for exclude_( host|guest) tip-bot for Jiri Olsa
2015-04-07 21:25 ` [PATCH 6/6] perf stat: Add metrics support for exclude_idle Jiri Olsa
2015-05-06  3:08   ` [tip:perf/core] " tip-bot for Jiri Olsa
2015-04-07 21:30 ` [RFC 0/6] perf stat: Metrics calculation fix Jiri Olsa
2015-04-18 13:40 ` Jiri Olsa
2015-04-19  3:46 ` Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1428441919-23099-3-git-send-email-jolsa@kernel.org \
    --to=jolsa@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@kernel.org \
    --cc=andi@firstfloor.org \
    --cc=dsahern@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=paulus@samba.org \
    --cc=wcohen@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox