All of lore.kernel.org
 help / color / mirror / Atom feed
From: Ingo Molnar <mingo@elte.hu>
To: Arun Sharma <asharma@fb.com>
Cc: arun@sharma-home.net, Stephane Eranian <eranian@google.com>,
	Arnaldo Carvalho de Melo <acme@infradead.org>,
	linux-kernel@vger.kernel.org, Andi Kleen <ak@linux.intel.com>,
	Peter Zijlstra <peterz@infradead.org>,
	Lin Ming <ming.m.lin@intel.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>,
	Thomas Gleixner <tglx@linutronix.de>,
	Peter Zijlstra <a.p.zijlstra@chello.nl>,
	eranian@gmail.com, Linus Torvalds <torvalds@linux-foundation.org>,
	Andrew Morton <akpm@linux-foundation.org>
Subject: Re: [PATCH] perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES
Date: Tue, 26 Apr 2011 16:00:23 +0200	[thread overview]
Message-ID: <20110426140023.GA21083@elte.hu> (raw)
In-Reply-To: <20110424061645.GA12013@radium.snc4.facebook.com>


* Arun Sharma <asharma@fb.com> wrote:

> On Sat, Apr 23, 2011 at 10:14:09PM +0200, Ingo Molnar wrote:
> > 
> > The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate
> > cycles the CPU does nothing useful, because it is stalled on a
> > cache-miss or some other condition.
> 
> Conceptually looks fine. I'd prefer a more precise name such as:
> PERF_COUNT_EXECUTION_STALLED_CYCLES (to differentiate from frontend or
> retirement stalls).

How about this naming convention:

  PERF_COUNT_HW_STALLED_CYCLES                              # execution
  PERF_COUNT_HW_STALLED_CYCLES_FRONTEND                     # frontend
  PERF_COUNT_HW_STALLED_CYCLES_ICACHE_MISS                  # icache

So STALLED_CYCLES would be the most general metric, the one that shows the real 
impact to the application. The other events would then help disambiguate this 
metric some more.

Below is the updated patch - this version makes the backend stalls event 
properly per model. (with the Nehalem table filled in.)

What do you think?

Thanks,

	Ingo

--------------------->
Subject: perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES
From: Ingo Molnar <mingo@elte.hu>
Date: Sun Apr 24 08:18:31 CEST 2011

The new PERF_COUNT_HW_STALLED_CYCLES event tries to approximate
cycles the CPU does nothing useful, because it is stalled on a
cache-miss or some other condition.

Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event_intel.c |   42 +++++++++++++++++++++++++--------
 include/linux/perf_event.h             |    1 
 tools/perf/util/parse-events.c         |    1 
 tools/perf/util/python.c               |    1 
 4 files changed, 36 insertions(+), 9 deletions(-)

Index: linux/arch/x86/kernel/cpu/perf_event_intel.c
===================================================================
--- linux.orig/arch/x86/kernel/cpu/perf_event_intel.c
+++ linux/arch/x86/kernel/cpu/perf_event_intel.c
@@ -36,6 +36,23 @@ static const u64 intel_perfmon_event_map
   [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
 };
 
+/*
+ * Other generic events, Nehalem:
+ */
+static const u64 intel_nhm_event_map[] =
+{
+  /* Arch-perfmon events: */
+  [PERF_COUNT_HW_CPU_CYCLES]		= 0x003c,
+  [PERF_COUNT_HW_INSTRUCTIONS]		= 0x00c0,
+  [PERF_COUNT_HW_CACHE_REFERENCES]	= 0x4f2e,
+  [PERF_COUNT_HW_CACHE_MISSES]		= 0x412e,
+  [PERF_COUNT_HW_BRANCH_INSTRUCTIONS]	= 0x00c4,
+  [PERF_COUNT_HW_BRANCH_MISSES]		= 0x00c5,
+  [PERF_COUNT_HW_BUS_CYCLES]		= 0x013c,
+
+  [PERF_COUNT_HW_STALLED_CYCLES]	= 0xffa2, /* 0xff: All reasons, 0xa2: Resource stalls */
+};
+
 static struct event_constraint intel_core_event_constraints[] =
 {
 	INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */
@@ -150,6 +167,12 @@ static u64 intel_pmu_event_map(int hw_ev
 	return intel_perfmon_event_map[hw_event];
 }
 
+static u64 intel_pmu_nhm_event_map(int hw_event)
+{
+	return intel_nhm_event_map[hw_event];
+}
+
+
 static __initconst const u64 snb_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1400,18 +1423,19 @@ static __init int intel_pmu_init(void)
 	case 26: /* 45 nm nehalem, "Bloomfield" */
 	case 30: /* 45 nm nehalem, "Lynnfield" */
 	case 46: /* 45 nm nehalem-ex, "Beckton" */
-		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
-		       sizeof(hw_cache_event_ids));
-		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs,
-		       sizeof(hw_cache_extra_regs));
+		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+		memcpy(hw_cache_extra_regs, nehalem_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
 
 		intel_pmu_lbr_init_nhm();
 
-		x86_pmu.event_constraints = intel_nehalem_event_constraints;
-		x86_pmu.pebs_constraints = intel_nehalem_pebs_event_constraints;
-		x86_pmu.percore_constraints = intel_nehalem_percore_constraints;
-		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
-		x86_pmu.extra_regs = intel_nehalem_extra_regs;
+		x86_pmu.event_constraints	= intel_nehalem_event_constraints;
+		x86_pmu.pebs_constraints	= intel_nehalem_pebs_event_constraints;
+		x86_pmu.percore_constraints	= intel_nehalem_percore_constraints;
+		x86_pmu.enable_all		= intel_pmu_nhm_enable_all;
+		x86_pmu.extra_regs		= intel_nehalem_extra_regs;
+		x86_pmu.event_map		= intel_pmu_nhm_event_map;
+		x86_pmu.max_events		= ARRAY_SIZE(intel_perfmon_event_map),
+
 		pr_cont("Nehalem events, ");
 		break;
 
Index: linux/include/linux/perf_event.h
===================================================================
--- linux.orig/include/linux/perf_event.h
+++ linux/include/linux/perf_event.h
@@ -52,6 +52,7 @@ enum perf_hw_id {
 	PERF_COUNT_HW_BRANCH_INSTRUCTIONS	= 4,
 	PERF_COUNT_HW_BRANCH_MISSES		= 5,
 	PERF_COUNT_HW_BUS_CYCLES		= 6,
+	PERF_COUNT_HW_STALLED_CYCLES		= 7,
 
 	PERF_COUNT_HW_MAX,			/* non-ABI */
 };
Index: linux/tools/perf/util/parse-events.c
===================================================================
--- linux.orig/tools/perf/util/parse-events.c
+++ linux/tools/perf/util/parse-events.c
@@ -38,6 +38,7 @@ static struct event_symbol event_symbols
   { CHW(BRANCH_INSTRUCTIONS),	"branch-instructions",	"branches"	},
   { CHW(BRANCH_MISSES),		"branch-misses",	""		},
   { CHW(BUS_CYCLES),		"bus-cycles",		""		},
+  { CHW(STALLED_CYCLES),	"stalled-cycles",	""		},
 
   { CSW(CPU_CLOCK),		"cpu-clock",		""		},
   { CSW(TASK_CLOCK),		"task-clock",		""		},
Index: linux/tools/perf/util/python.c
===================================================================
--- linux.orig/tools/perf/util/python.c
+++ linux/tools/perf/util/python.c
@@ -798,6 +798,7 @@ static struct {
 	{ "COUNT_HW_BRANCH_INSTRUCTIONS", PERF_COUNT_HW_BRANCH_INSTRUCTIONS },
 	{ "COUNT_HW_BRANCH_MISSES",	  PERF_COUNT_HW_BRANCH_MISSES },
 	{ "COUNT_HW_BUS_CYCLES",	  PERF_COUNT_HW_BUS_CYCLES },
+	{ "COUNT_HW_STALLED_CYCLES",	  PERF_COUNT_HW_STALLED_CYCLES },
 	{ "COUNT_HW_CACHE_L1D",		  PERF_COUNT_HW_CACHE_L1D },
 	{ "COUNT_HW_CACHE_L1I",		  PERF_COUNT_HW_CACHE_L1I },
 	{ "COUNT_HW_CACHE_LL",	  	  PERF_COUNT_HW_CACHE_LL },

  parent reply	other threads:[~2011-04-26 14:00 UTC|newest]

Thread overview: 46+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2011-04-22  8:47 [PATCH 1/1] perf tools: Add missing user space support for config1/config2 Stephane Eranian
2011-04-22  9:23 ` Ingo Molnar
2011-04-22  9:41   ` Stephane Eranian
2011-04-22 10:52     ` [generalized cache events] " Ingo Molnar
2011-04-22 12:04       ` Stephane Eranian
2011-04-22 13:18         ` Ingo Molnar
2011-04-22 20:31           ` Stephane Eranian
2011-04-22 20:47             ` Ingo Molnar
2011-04-23 12:13               ` Stephane Eranian
2011-04-23 12:49                 ` Ingo Molnar
2011-04-22 21:03             ` Ingo Molnar
2011-04-23 12:27               ` Stephane Eranian
2011-04-22 16:51         ` Andi Kleen
2011-04-22 19:57           ` Ingo Molnar
2011-04-26  9:25           ` Peter Zijlstra
2011-04-22 16:50       ` arun
2011-04-22 17:00         ` Andi Kleen
2011-04-22 20:30         ` Ingo Molnar
2011-04-22 20:32           ` Ingo Molnar
2011-04-23  0:03             ` Andi Kleen
2011-04-23  7:50               ` Peter Zijlstra
2011-04-23 12:06                 ` Stephane Eranian
2011-04-23 12:36                   ` Ingo Molnar
2011-04-23 13:16                   ` Peter Zijlstra
2011-04-25 18:48                     ` Stephane Eranian
2011-04-25 19:40                     ` Andi Kleen
2011-04-25 19:55                       ` Ingo Molnar
2011-04-24  2:15                   ` Andi Kleen
2011-04-24  2:19                 ` Andi Kleen
2011-04-25 17:41                   ` Ingo Molnar
2011-04-25 18:00                     ` Dehao Chen
     [not found]                     ` <BANLkTiks31-pMJe4zCKrppsrA1d6KanJFA@mail.gmail.com>
2011-04-25 18:05                       ` Ingo Molnar
2011-04-25 18:39                         ` Stephane Eranian
2011-04-25 19:45                           ` Ingo Molnar
2011-04-23  8:02               ` Ingo Molnar
2011-04-23 20:14           ` [PATCH] perf events: Add stalled cycles generic event - PERF_COUNT_HW_STALLED_CYCLES Ingo Molnar
2011-04-24  6:16             ` Arun Sharma
2011-04-25 17:37               ` Ingo Molnar
2011-04-26  9:25               ` Peter Zijlstra
2011-04-26 14:00               ` Ingo Molnar [this message]
2011-04-27 11:11               ` Ingo Molnar
2011-04-27 14:47                 ` Arun Sharma
2011-04-27 15:48                   ` Ingo Molnar
2011-04-27 16:27                     ` Ingo Molnar
2011-04-27 19:05                       ` Arun Sharma
2011-04-27 19:03                     ` Arun Sharma

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20110426140023.GA21083@elte.hu \
    --to=mingo@elte.hu \
    --cc=a.p.zijlstra@chello.nl \
    --cc=acme@infradead.org \
    --cc=acme@redhat.com \
    --cc=ak@linux.intel.com \
    --cc=akpm@linux-foundation.org \
    --cc=arun@sharma-home.net \
    --cc=asharma@fb.com \
    --cc=eranian@gmail.com \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=ming.m.lin@intel.com \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=torvalds@linux-foundation.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.