linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: arun@sharma-home.net
Cc: Stephane Eranian <eranian@google.com>,
	Arnaldo Carvalho de Melo <acme@infradead.org>,
	linux-kernel@vger.kernel.org, Andi Kleen <ak@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Lin Ming <ming.m.lin@intel.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	eranian@gmail.com, Arun Sharma <asharma@fb.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [PATCH] perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES
Date: Sat, 23 Apr 2011 22:14:09 +0200	[thread overview]
Message-ID: <20110423201409.GA20072@elte.hu> (raw)
In-Reply-To: <20110422203022.GA20573@elte.hu>


* Ingo Molnar <mingo@elte.hu> wrote:

> > [...] If there is an expensive load, you'll see that the load instruction 
> > takes many cycles and you can infer that it's a cache miss.
> > 
> > Questions app developers typically ask me:
> > 
> > * If I fix all my top 5 L3 misses how much faster will my app go?
> 
> This has come up: we could add a 'stalled/idle-cycles' generic event - i.e. 
> cycles spent without performing useful work in the pipelines. (Resource-stall 
> events on Intel CPUs.)

How about something like the patch below?

	Ingo
---
Subject: perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES
From: Ingo Molnar <mingo@elte.hu>

The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate
cycles the CPU does nothing useful, because it is stalled on a
cache-miss or some other condition.

Note: this is still a incomplete and will work on Intel Nehalem
      CPUs for now, the intel_perfmon_event_map[] needs to be
      properly split between the major models.

Also update 'perf stat' to print:

           611,527 cycles
           400,553 instructions             # ( 0.7 instructions per cycle )
            77,809 stalled-cycles           # ( 12.7% of all cycles )

        0.000610987  seconds time elapsed

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c |    2 ++
 include/linux/perf_event.h             |    1 +
 tools/perf/builtin-stat.c              |   11 +++++++++--
 tools/perf/util/parse-events.c         |    1 +
 tools/perf/util/python.c               |    1 +
 5 files changed, 14 insertions(+), 2 deletions(-)

Index: linux/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux/arch/x86/kernel/cpu/perf_event_intel.c
@@ -34,6 +34,8 @@ static const u64 intel_perfmon_event_map
   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
   [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+  [PERF_COUNT_HW_STALLED_CYCLES]	= 0xffa2, /* 0xff: All reasons, 0xa2: Resource stalls */
+
 };
 
 static struct event_constraint intel_core_event_constraints[] =
Index: linux/include/linux/perf_event.h
===================================================================
--- linux.orig/include/linux/perf_event.h
+++ linux/include/linux/perf_event.h
@@ -52,6 +52,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES		= 7,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
Index: linux/tools/perf/builtin-stat.c
===================================================================
--- linux.orig/tools/perf/builtin-stat.c
+++ linux/tools/perf/builtin-stat.c
@@ -442,7 +442,7 @@ static void abs_printout(int cpu, struct
 		if (total)
 			ratio = avg / total;
 
-		fprintf(stderr, " # %10.3f IPC  ", ratio);
+		fprintf(stderr, " # ( %3.1f instructions per cycle )", ratio);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
 			runtime_branches_stats[cpu].n != 0) {
 		total = avg_stats(&runtime_branches_stats[cpu]);
@@ -450,7 +450,7 @@ static void abs_printout(int cpu, struct
 		if (total)
 			ratio = avg * 100 / total;
 
-		fprintf(stderr, " # %10.3f %%    ", ratio);
+		fprintf(stderr, " # %10.3f %%", ratio);
 
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
@@ -459,6 +459,13 @@ static void abs_printout(int cpu, struct
 			ratio = 1000.0 * avg / total;
 
 		fprintf(stderr, " # %10.3f M/sec", ratio);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
+		total = avg_stats(&runtime_cycles_stats[cpu]);
+
+		if (total)
+			ratio = avg / total * 100.0;
+
+		fprintf(stderr, " # (%5.1f%% of all cycles )", ratio);
 	}
 }
 
Index: linux/tools/perf/util/parse-events.c
===================================================================
--- linux.orig/tools/perf/util/parse-events.c
+++ linux/tools/perf/util/parse-events.c
@@ -38,6 +38,7 @@ static struct event_symbol event_symbols
   { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
   { CHW(BRANCH_MISSES),		"branch-misses",	""		},
   { CHW(BUS_CYCLES),		"bus-cycles",		""		},
+  { CHW(STALLED_CYCLES),	"stalled-cycles",	""		},
 
   { CSW(CPU_CLOCK),		"cpu-clock",		""		},
   { CSW(TASK_CLOCK),		"task-clock",		""		},
Index: linux/tools/perf/util/python.c
===================================================================
--- linux.orig/tools/perf/util/python.c
+++ linux/tools/perf/util/python.c
@@ -798,6 +798,7 @@ static struct {
 	{ "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
 	{ "COUNT_HW_BRANCH_MISSES",	  PERF_COUNT_HW_BRANCH_MISSES },
 	{ "COUNT_HW_BUS_CYCLES",	  PERF_COUNT_HW_BUS_CYCLES },
+	{ "COUNT_HW_STALLED_CYCLES",	  PERF_COUNT_HW_STALLED_CYCLES },
 	{ "COUNT_HW_CACHE_L1D",		  PERF_COUNT_HW_CACHE_L1D },
 	{ "COUNT_HW_CACHE_L1I",		  PERF_COUNT_HW_CACHE_L1I },
 	{ "COUNT_HW_CACHE_LL",	  	  PERF_COUNT_HW_CACHE_LL },

  parent reply	other threads:[~2011-04-23 20:14 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-22  8:47 [PATCH 1/1] perf tools: Add missing user space support for config1/config2 Stephane Eranian
2011-04-22  9:23 ` Ingo Molnar
2011-04-22  9:41   ` Stephane Eranian
2011-04-22 10:52     ` [generalized cache events] " Ingo Molnar
2011-04-22 12:04       ` Stephane Eranian
2011-04-22 13:18         ` Ingo Molnar
2011-04-22 20:31           ` Stephane Eranian
2011-04-22 20:47             ` Ingo Molnar
2011-04-23 12:13               ` Stephane Eranian
2011-04-23 12:49                 ` Ingo Molnar
2011-04-22 21:03             ` Ingo Molnar
2011-04-23 12:27               ` Stephane Eranian
2011-04-22 16:51         ` Andi Kleen
2011-04-22 19:57           ` Ingo Molnar
2011-04-26  9:25           ` Peter Zijlstra
2011-04-22 16:50       ` arun
2011-04-22 17:00         ` Andi Kleen
2011-04-22 20:30         ` Ingo Molnar
2011-04-22 20:32           ` Ingo Molnar
2011-04-23  0:03             ` Andi Kleen
2011-04-23  7:50               ` Peter Zijlstra
2011-04-23 12:06                 ` Stephane Eranian
2011-04-23 12:36                   ` Ingo Molnar
2011-04-23 13:16                   ` Peter Zijlstra
2011-04-25 18:48                     ` Stephane Eranian
2011-04-25 19:40                     ` Andi Kleen
2011-04-25 19:55                       ` Ingo Molnar
2011-04-24  2:15                   ` Andi Kleen
2011-04-24  2:19                 ` Andi Kleen
2011-04-25 17:41                   ` Ingo Molnar
2011-04-25 18:00                     ` Dehao Chen
     [not found]                     ` <BANLkTiks31-pMJe4zCKrppsrA1d6KanJFA@mail.gmail.com>
2011-04-25 18:05                       ` Ingo Molnar
2011-04-25 18:39                         ` Stephane Eranian
2011-04-25 19:45                           ` Ingo Molnar
2011-04-23  8:02               ` Ingo Molnar
2011-04-23 20:14           ` Ingo Molnar [this message]
2011-04-24  6:16             ` [PATCH] perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES Arun Sharma
2011-04-25 17:37               ` Ingo Molnar
2011-04-26  9:25               ` Peter Zijlstra
2011-04-26 14:00               ` Ingo Molnar
2011-04-27 11:11               ` Ingo Molnar
2011-04-27 14:47                 ` Arun Sharma
2011-04-27 15:48                   ` Ingo Molnar
2011-04-27 16:27                     ` Ingo Molnar
2011-04-27 19:05                       ` Arun Sharma
2011-04-27 19:03                     ` Arun Sharma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110423201409.GA20072@elte.hu \
    --to=mingo@elte.hu \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@infradead.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arun@sharma-home.net \
    --cc=asharma@fb.com \
    --cc=eranian@gmail.com \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.m.lin@intel.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).