All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: arun@sharma-home.net
Cc: Stephane Eranian <eranian@google.com>,
	Arnaldo Carvalho de Melo <acme@infradead.org>,
	linux-kernel@vger.kernel.org, Andi Kleen <ak@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Lin Ming <ming.m.lin@intel.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	eranian@gmail.com, Arun Sharma <asharma@fb.com>,
	Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: [PATCH] perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES
Date: Sat, 23 Apr 2011 22:14:09 +0200	[thread overview]
Message-ID: <20110423201409.GA20072@elte.hu> (raw)
In-Reply-To: <20110422203022.GA20573@elte.hu>


* Ingo Molnar <mingo@elte.hu> wrote:

> > [...] If there is an expensive load, you'll see that the load instruction 
> > takes many cycles and you can infer that it's a cache miss.
> > 
> > Questions app developers typically ask me:
> > 
> > * If I fix all my top 5 L3 misses how much faster will my app go?
> 
> This has come up: we could add a 'stalled/idle-cycles' generic event - i.e. 
> cycles spent without performing useful work in the pipelines. (Resource-stall 
> events on Intel CPUs.)

How about something like the patch below?

	Ingo
---
Subject: perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES
From: Ingo Molnar <mingo@elte.hu>

The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate
cycles the CPU does nothing useful, because it is stalled on a
cache-miss or some other condition.

Note: this is still a incomplete and will work on Intel Nehalem
      CPUs for now, the intel_perfmon_event_map[] needs to be
      properly split between the major models.

Also update 'perf stat' to print:

           611,527 cycles
           400,553 instructions             # ( 0.7 instructions per cycle )
            77,809 stalled-cycles           # ( 12.7% of all cycles )

        0.000610987  seconds time elapsed

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c |    2 ++
 include/linux/perf_event.h             |    1 +
 tools/perf/builtin-stat.c              |   11 +++++++++--
 tools/perf/util/parse-events.c         |    1 +
 tools/perf/util/python.c               |    1 +
 5 files changed, 14 insertions(+), 2 deletions(-)

Index: linux/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux/arch/x86/kernel/cpu/perf_event_intel.c
@@ -34,6 +34,8 @@ static const u64 intel_perfmon_event_map
   [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
   [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+  [PERF_COUNT_HW_STALLED_CYCLES]	= 0xffa2, /* 0xff: All reasons, 0xa2: Resource stalls */
+
 };
 
 static struct event_constraint intel_core_event_constraints[] =
Index: linux/include/linux/perf_event.h
===================================================================
--- linux.orig/include/linux/perf_event.h
+++ linux/include/linux/perf_event.h
@@ -52,6 +52,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES		= 7,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
Index: linux/tools/perf/builtin-stat.c
===================================================================
--- linux.orig/tools/perf/builtin-stat.c
+++ linux/tools/perf/builtin-stat.c
@@ -442,7 +442,7 @@ static void abs_printout(int cpu, struct
 		if (total)
 			ratio = avg / total;
 
-		fprintf(stderr, " # %10.3f IPC  ", ratio);
+		fprintf(stderr, " # ( %3.1f instructions per cycle )", ratio);
 	} else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) &&
 			runtime_branches_stats[cpu].n != 0) {
 		total = avg_stats(&runtime_branches_stats[cpu]);
@@ -450,7 +450,7 @@ static void abs_printout(int cpu, struct
 		if (total)
 			ratio = avg * 100 / total;
 
-		fprintf(stderr, " # %10.3f %%    ", ratio);
+		fprintf(stderr, " # %10.3f %%", ratio);
 
 	} else if (runtime_nsecs_stats[cpu].n != 0) {
 		total = avg_stats(&runtime_nsecs_stats[cpu]);
@@ -459,6 +459,13 @@ static void abs_printout(int cpu, struct
 			ratio = 1000.0 * avg / total;
 
 		fprintf(stderr, " # %10.3f M/sec", ratio);
+	} else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES)) {
+		total = avg_stats(&runtime_cycles_stats[cpu]);
+
+		if (total)
+			ratio = avg / total * 100.0;
+
+		fprintf(stderr, " # (%5.1f%% of all cycles )", ratio);
 	}
 }
 
Index: linux/tools/perf/util/parse-events.c
===================================================================
--- linux.orig/tools/perf/util/parse-events.c
+++ linux/tools/perf/util/parse-events.c
@@ -38,6 +38,7 @@ static struct event_symbol event_symbols
   { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
   { CHW(BRANCH_MISSES),		"branch-misses",	""		},
   { CHW(BUS_CYCLES),		"bus-cycles",		""		},
+  { CHW(STALLED_CYCLES),	"stalled-cycles",	""		},
 
   { CSW(CPU_CLOCK),		"cpu-clock",		""		},
   { CSW(TASK_CLOCK),		"task-clock",		""		},
Index: linux/tools/perf/util/python.c
===================================================================
--- linux.orig/tools/perf/util/python.c
+++ linux/tools/perf/util/python.c
@@ -798,6 +798,7 @@ static struct {
 	{ "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
 	{ "COUNT_HW_BRANCH_MISSES",	  PERF_COUNT_HW_BRANCH_MISSES },
 	{ "COUNT_HW_BUS_CYCLES",	  PERF_COUNT_HW_BUS_CYCLES },
+	{ "COUNT_HW_STALLED_CYCLES",	  PERF_COUNT_HW_STALLED_CYCLES },
 	{ "COUNT_HW_CACHE_L1D",		  PERF_COUNT_HW_CACHE_L1D },
 	{ "COUNT_HW_CACHE_L1I",		  PERF_COUNT_HW_CACHE_L1I },
 	{ "COUNT_HW_CACHE_LL",	  	  PERF_COUNT_HW_CACHE_LL },

  parent reply	other threads:[~2011-04-23 20:14 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-22  8:47 [PATCH 1/1] perf tools: Add missing user space support for config1/config2 Stephane Eranian
2011-04-22  9:23 ` Ingo Molnar
2011-04-22  9:41   ` Stephane Eranian
2011-04-22 10:52     ` [generalized cache events] " Ingo Molnar
2011-04-22 12:04       ` Stephane Eranian
2011-04-22 13:18         ` Ingo Molnar
2011-04-22 20:31           ` Stephane Eranian
2011-04-22 20:47             ` Ingo Molnar
2011-04-23 12:13               ` Stephane Eranian
2011-04-23 12:49                 ` Ingo Molnar
2011-04-22 21:03             ` Ingo Molnar
2011-04-23 12:27               ` Stephane Eranian
2011-04-22 16:51         ` Andi Kleen
2011-04-22 19:57           ` Ingo Molnar
2011-04-26  9:25           ` Peter Zijlstra
2011-04-22 16:50       ` arun
2011-04-22 17:00         ` Andi Kleen
2011-04-22 20:30         ` Ingo Molnar
2011-04-22 20:32           ` Ingo Molnar
2011-04-23  0:03             ` Andi Kleen
2011-04-23  7:50               ` Peter Zijlstra
2011-04-23 12:06                 ` Stephane Eranian
2011-04-23 12:36                   ` Ingo Molnar
2011-04-23 13:16                   ` Peter Zijlstra
2011-04-25 18:48                     ` Stephane Eranian
2011-04-25 19:40                     ` Andi Kleen
2011-04-25 19:55                       ` Ingo Molnar
2011-04-24  2:15                   ` Andi Kleen
2011-04-24  2:19                 ` Andi Kleen
2011-04-25 17:41                   ` Ingo Molnar
2011-04-25 18:00                     ` Dehao Chen
     [not found]                     ` <BANLkTiks31-pMJe4zCKrppsrA1d6KanJFA@mail.gmail.com>
2011-04-25 18:05                       ` Ingo Molnar
2011-04-25 18:39                         ` Stephane Eranian
2011-04-25 19:45                           ` Ingo Molnar
2011-04-23  8:02               ` Ingo Molnar
2011-04-23 20:14           ` Ingo Molnar [this message]
2011-04-24  6:16             ` [PATCH] perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES Arun Sharma
2011-04-25 17:37               ` Ingo Molnar
2011-04-26  9:25               ` Peter Zijlstra
2011-04-26 14:00               ` Ingo Molnar
2011-04-27 11:11               ` Ingo Molnar
2011-04-27 14:47                 ` Arun Sharma
2011-04-27 15:48                   ` Ingo Molnar
2011-04-27 16:27                     ` Ingo Molnar
2011-04-27 19:05                       ` Arun Sharma
2011-04-27 19:03                     ` Arun Sharma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110423201409.GA20072@elte.hu \
    --to=mingo@elte.hu \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@infradead.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arun@sharma-home.net \
    --cc=asharma@fb.com \
    --cc=eranian@gmail.com \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.m.lin@intel.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.