public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [GIT-PULL -tip][PATCH 0/6] perf_counter patches
@ 2009-07-01  9:33 Jaswinder Singh Rajput
  2009-07-01  9:35 ` [PATCH 1/6 -tip] perf stat: define MATCH_EVENT for easy attrs checking Jaswinder Singh Rajput
  2009-07-01 11:45 ` [GIT-PULL -tip][PATCH 0/6] perf_counter patches Ingo Molnar
  0 siblings, 2 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-01  9:33 UTC (permalink / raw)
  To: Ingo Molnar, Thomas Gleixner, Alan Cox, Peter Zijlstra,
	x86 maintainers, LKML

Ingo,

Please pull perf_counter patches :
The following changes since commit 092304de242705abf24edcb0fc7beed4c4276865:
  Ingo Molnar (1):
        Merge branch 'perfcounters/urgent'

are available in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/jaswinder/linux-2.6-tip.git master

Jaswinder Singh Rajput (6):
      perf stat: define MATCH_EVENT for easy attrs checking
      perf stat: treat same behaviour for all CYCLES and CLOCKS
      perf_counter: Add Generalized Hardware vectored co-processor support for AMD
      perf_counter: Add Generalized Hardware interrupt support for AMD
      perf_counter: Add hardware vector events for nehalem
      perf_counter: Add hardware interrupt events for nehalem, core2 and atom

 arch/x86/kernel/cpu/perf_counter.c |   95 ++++++++++++++++++++++++++++++++++++
 include/linux/perf_counter.h       |   27 ++++++++++
 kernel/perf_counter.c              |    2 +
 tools/perf/builtin-stat.c          |   60 ++++++++++++++---------
 tools/perf/util/parse-events.c     |   73 +++++++++++++++++++++++++++
 5 files changed, 233 insertions(+), 24 deletions(-)

Complete diff:
diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d4cf4ce..4ef1838 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -372,6 +372,42 @@ static const u64 atom_hw_cache_event_ids
  },
 };
 
+/*
+ * Generalized hw vectored co-processor event table
+ */
+
+static u64 __read_mostly hw_vector_event_ids[PERF_COUNT_HW_VECTOR_MAX];
+
+static const u64 nehalem_hw_vector_event_ids[] =
+{
+  [PERF_COUNT_HW_VECTOR_ADD]		= 0x01B1, /* UOPS_EXECUTED.PORT0     */
+  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0214, /* ARITH.MUL               */
+  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0114, /* ARITH.CYCLES_DIV_BUSY   */
+  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0,
+  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x60A2, /* RESOURCE_STALLS.FPCW|MXCSR*/
+  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0710, /* FP_COMP_OPS_EXE.X87|MMX|SSE_FP*/
+};
+
+/*
+ * Generalized hw interrupt event table
+ */
+
+static u64 __read_mostly hw_interrupt_event_ids[PERF_COUNT_HW_INTERRUPT_MAX];
+
+static const u64 nehalem_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x011D, /* HW_INT.RCV              */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x021D, /* HW_INT.CYCLES_MASKED    */
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x041D, /* HW_INT.CYCLES_PENDING_AND_MASKED*/
+};
+
+static const u64 core2_atom_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x00C8, /* HW_INT_RCV              */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x01C6, /* CYCLES_INT_MASKED.CYCLES_INT_MASKED*/
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x02C6, /* CYCLES_INT_MASKED.CYCLES_INT_PENDING_AND_MASKED*/
+};
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -481,6 +517,25 @@ static const u64 amd_hw_cache_event_ids
  },
 };
 
+static const u64 amd_hw_vector_event_ids[] =
+{
+  [PERF_COUNT_HW_VECTOR_ADD]		= 0x0100, /* Dispatched FPU Add	     */
+  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0200, /* Dispatched FPU Multiply */
+  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0400, /* Dispatched FPU Store    */
+  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0001, /* FPU Empty cycles        */
+  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x00D7, /* Dispatch stall for FPU  */
+  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0FCB, /* Retired x87|(MMX & 3Dnow)
+						   |SSE & SSE2) Instructions */
+};
+
+
+static const u64 amd_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x00CF, /* Interrupts Taken        */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x00CD, /* Interrupts-Masked Cycles*/
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x00CE, /* Int Mask+Pending Cycles */
+};
+
 /*
  * AMD Performance Monitor K7 and later.
  */
@@ -659,6 +714,28 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
 	return 0;
 }
 
+static inline int
+set_hw_vector_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	if (attr->config >= PERF_COUNT_HW_VECTOR_MAX)
+		return -EINVAL;
+
+	hwc->config |= hw_vector_event_ids[attr->config];
+
+	return 0;
+}
+
+static inline int
+set_hw_interrupt_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	if (attr->config >= PERF_COUNT_HW_INTERRUPT_MAX)
+		return -EINVAL;
+
+	hwc->config |= hw_interrupt_event_ids[attr->config];
+
+	return 0;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -716,6 +793,12 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, attr);
 
+	if (attr->type == PERF_TYPE_HW_VECTOR)
+		return set_hw_vector_attr(hwc, attr);
+
+	if (attr->type == PERF_TYPE_HW_INTERRUPT)
+		return set_hw_interrupt_attr(hwc, attr);
+
 	if (attr->config >= x86_pmu.max_events)
 		return -EINVAL;
 	/*
@@ -1437,6 +1520,8 @@ static int intel_pmu_init(void)
 	case 29: /* six-core 45 nm xeon "Dunnington" */
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_interrupt_event_ids, core2_atom_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Core2 events, ");
 		break;
@@ -1444,12 +1529,18 @@ static int intel_pmu_init(void)
 	case 26:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_vector_event_ids, nehalem_hw_vector_event_ids,
+		       sizeof(hw_vector_event_ids));
+		memcpy(hw_interrupt_event_ids, nehalem_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
 	case 28:
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_interrupt_event_ids, core2_atom_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Atom events, ");
 		break;
@@ -1468,6 +1559,10 @@ static int amd_pmu_init(void)
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
+	memcpy(hw_vector_event_ids, amd_hw_vector_event_ids,
+	       sizeof(hw_vector_event_ids));
+	memcpy(hw_interrupt_event_ids, amd_hw_interrupt_event_ids,
+	       sizeof(hw_interrupt_event_ids));
 
 	return 0;
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5e970c7..c7165b9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -31,6 +31,8 @@ enum perf_type_id {
 	PERF_TYPE_TRACEPOINT			= 2,
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_HW_VECTOR			= 5,
+	PERF_TYPE_HW_INTERRUPT			= 6,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -89,6 +91,31 @@ enum perf_hw_cache_op_result_id {
 };
 
 /*
+ * Generalized hardware vectored co-processor counters:
+ */
+enum perf_hw_vector_id {
+	PERF_COUNT_HW_VECTOR_ADD		= 0,
+	PERF_COUNT_HW_VECTOR_MULTIPLY		= 1,
+	PERF_COUNT_HW_VECTOR_DIVIDE		= 2,
+	PERF_COUNT_HW_VECTOR_IDLE_CYCLES	= 3,
+	PERF_COUNT_HW_VECTOR_STALL_CYCLES	= 4,
+	PERF_COUNT_HW_VECTOR_OPS		= 5,
+
+	PERF_COUNT_HW_VECTOR_MAX,		/* non-ABI */
+};
+
+/*
+ * Generalized hardware inturrupt counters:
+ */
+enum perf_hw_interrupt_id {
+	PERF_COUNT_HW_INTERRUPT			= 0,
+	PERF_COUNT_HW_INTERRUPT_MASK		= 1,
+	PERF_COUNT_HW_INTERRUPT_PENDING_MASK	= 2,
+
+	PERF_COUNT_HW_INTERRUPT_MAX,		/* non-ABI */
+};
+
+/*
  * Special "software" counters provided by the kernel, even if the hardware
  * does not support performance counters. These counters measure various
  * physical and sw events of the kernel (and allow the profiling of them as
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d55a50d..7a529a8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3838,6 +3838,8 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	case PERF_TYPE_RAW:
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_HW_VECTOR:
+	case PERF_TYPE_HW_INTERRUPT:
 		pmu = hw_perf_counter_init(counter);
 		break;
 
diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2e03524..af61c29 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -96,6 +96,10 @@ static u64			walltime_nsecs_noise;
 static u64			runtime_cycles_avg;
 static u64			runtime_cycles_noise;
 
+#define MATCH_EVENT(t, c, counter)			\
+	(attrs[counter].type == PERF_TYPE_##t &&	\
+	 attrs[counter].config == PERF_COUNT_##c)
+
 #define ERR_PERF_OPEN \
 "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
 
@@ -132,13 +136,31 @@ static void create_perf_stat_counter(int counter, int pid)
  */
 static inline int nsec_counter(int counter)
 {
-	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
-		return 0;
+	if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
+	    MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
+		return 1;
+
+	return 0;
+}
 
-	if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK)
+/*
+ * Does the counter have cycles as a unit?
+ */
+static inline int cycle_counter(int counter)
+{
+	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter) ||
+	    MATCH_EVENT(HARDWARE, HW_BUS_CYCLES, counter))
 		return 1;
 
-	if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
+	return 0;
+}
+
+/*
+ * Does the counter have instructions as a unit?
+ */
+static inline int instruction_counter(int counter)
+{
+	if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter))
 		return 1;
 
 	return 0;
@@ -192,11 +214,9 @@ static void read_counter(int counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
+	if (nsec_counter(counter))
 		runtime_nsecs[run_idx] = count[0];
-	if (attrs[counter].type == PERF_TYPE_HARDWARE &&
-		attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
+	else if (cycle_counter(counter))
 		runtime_cycles[run_idx] = count[0];
 }
 
@@ -290,13 +310,10 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
 
 	fprintf(stderr, " %14.6f  %-24s", msecs, event_name(counter));
 
-	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
+	if (nsec_counter(counter) && walltime_nsecs_avg)
+		fprintf(stderr, " # %10.3f CPUs ",
+			(double)count[0] / (double)walltime_nsecs_avg);
 
-		if (walltime_nsecs_avg)
-			fprintf(stderr, " # %10.3f CPUs ",
-				(double)count[0] / (double)walltime_nsecs_avg);
-	}
 	print_noise(count, noise);
 }
 
@@ -304,18 +321,13 @@ static void abs_printout(int counter, u64 *count, u64 *noise)
 {
 	fprintf(stderr, " %14Ld  %-24s", count[0], event_name(counter));
 
-	if (runtime_cycles_avg &&
-		attrs[counter].type == PERF_TYPE_HARDWARE &&
-			attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
-
+	if (instruction_counter(counter) && runtime_cycles_avg)
 		fprintf(stderr, " # %10.3f IPC  ",
 			(double)count[0] / (double)runtime_cycles_avg);
-	} else {
-		if (runtime_nsecs_avg) {
-			fprintf(stderr, " # %10.3f M/sec",
-				(double)count[0]/runtime_nsecs_avg*1000.0);
-		}
-	}
+	else if (runtime_nsecs_avg)
+		fprintf(stderr, " # %10.3f M/sec",
+			(double)count[0]/runtime_nsecs_avg*1000.0);
+
 	print_noise(count, noise);
 }
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4d042f1..5ea4c12 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,25 @@ static struct event_symbol event_symbols[] = {
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
 };
 
+#define CHVECTOR(x) .type = PERF_TYPE_HW_VECTOR, .config = PERF_COUNT_HW_VECTOR_##x
+
+static struct event_symbol vector_event_symbols[] = {
+  { CHVECTOR(ADD),		"vec-adds",		"add"		},
+  { CHVECTOR(MULTIPLY),		"vec-muls",		"multiply"	},
+  { CHVECTOR(DIVIDE),		"vec-divs",		"divide"	},
+  { CHVECTOR(IDLE_CYCLES),	"vec-idle-cycles",	"vec-empty-cycles"},
+  { CHVECTOR(STALL_CYCLES),	"vec-stall-cycles",	"vec-busy-cycles"},
+  { CHVECTOR(OPS),		"vec-ops",		"vec-operations"},
+};
+
+#define CHINT(x) .type = PERF_TYPE_HW_INTERRUPT, .config = PERF_COUNT_HW_##x
+
+static struct event_symbol interrupt_event_symbols[] = {
+  { CHINT(INTERRUPT),		"interrupts",		"interrupt"	},
+  { CHINT(INTERRUPT_MASK),	"int-mask-cycles",	"masked"	},
+  { CHINT(INTERRUPT_PENDING_MASK),"int-pending-mask-cycles",	""	},
+};
+
 #define __PERF_COUNTER_FIELD(config, name) \
 	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
 
@@ -172,6 +191,16 @@ char *event_name(int counter)
 		return event_cache_name(cache_type, cache_op, cache_result);
 	}
 
+	case PERF_TYPE_HW_VECTOR:
+		if (config < PERF_COUNT_HW_VECTOR_MAX)
+			return vector_event_symbols[config].symbol;
+		return "unknown-vector";
+
+	case PERF_TYPE_HW_INTERRUPT:
+		if (config < PERF_COUNT_HW_INTERRUPT_MAX)
+			return interrupt_event_symbols[config].symbol;
+		return "unknown-interrupt";
+
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_COUNT_SW_MAX)
 			return sw_event_names[config];
@@ -250,6 +279,32 @@ static int check_events(const char *str, unsigned int i)
 	return 0;
 }
 
+static int check_vector_events(const char *str, unsigned int i)
+{
+	if (!strncmp(str, vector_event_symbols[i].symbol,
+		     strlen(vector_event_symbols[i].symbol)))
+		return 1;
+
+	if (strlen(vector_event_symbols[i].alias))
+		if (!strncmp(str, vector_event_symbols[i].alias,
+			     strlen(vector_event_symbols[i].alias)))
+			return 1;
+	return 0;
+}
+
+static int check_interrupt_events(const char *str, unsigned int i)
+{
+	if (!strncmp(str, interrupt_event_symbols[i].symbol,
+		     strlen(interrupt_event_symbols[i].symbol)))
+		return 1;
+
+	if (strlen(interrupt_event_symbols[i].alias))
+		if (!strncmp(str, interrupt_event_symbols[i].alias,
+			     strlen(interrupt_event_symbols[i].alias)))
+			return 1;
+	return 0;
+}
+
 /*
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
@@ -297,6 +352,24 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
 		}
 	}
 
+	for (i = 0; i < ARRAY_SIZE(vector_event_symbols); i++) {
+		if (check_vector_events(str, i)) {
+			attr->type = vector_event_symbols[i].type;
+			attr->config = vector_event_symbols[i].config;
+
+			return 0;
+		}
+	}
+
+	for (i = 0; i < ARRAY_SIZE(interrupt_event_symbols); i++) {
+		if (check_interrupt_events(str, i)) {
+			attr->type = interrupt_event_symbols[i].type;
+			attr->config = interrupt_event_symbols[i].config;
+
+			return 0;
+		}
+	}
+
 	return parse_generic_hw_symbols(str, attr);
 }
 



^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 1/6 -tip] perf stat: define MATCH_EVENT for easy attrs checking
  2009-07-01  9:33 [GIT-PULL -tip][PATCH 0/6] perf_counter patches Jaswinder Singh Rajput
@ 2009-07-01  9:35 ` Jaswinder Singh Rajput
  2009-07-01  9:36   ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Jaswinder Singh Rajput
  2009-07-01 11:30   ` [tip:perfcounters/urgent] perf stat: Define MATCH_EVENT for easy attr checking tip-bot for Jaswinder Singh Rajput
  2009-07-01 11:45 ` [GIT-PULL -tip][PATCH 0/6] perf_counter patches Ingo Molnar
  1 sibling, 2 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-01  9:35 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML


MATCH_EVENT is useful :
 1. for multiple attrs checking
 2. avoid repetition of PERF_TYPE_ and PERF_COUNT_ and save space
 3. avoids line breakage

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 tools/perf/builtin-stat.c |   27 ++++++++++-----------------
 1 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 2e03524..6bf2b80 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -96,6 +96,10 @@ static u64			walltime_nsecs_noise;
 static u64			runtime_cycles_avg;
 static u64			runtime_cycles_noise;
 
+#define MATCH_EVENT(t, c, counter)			\
+	(attrs[counter].type == PERF_TYPE_##t &&	\
+	 attrs[counter].config == PERF_COUNT_##c)
+
 #define ERR_PERF_OPEN \
 "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
 
@@ -132,13 +136,8 @@ static void create_perf_stat_counter(int counter, int pid)
  */
 static inline int nsec_counter(int counter)
 {
-	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
-		return 0;
-
-	if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK)
-		return 1;
-
-	if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
+	if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
+	    MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
 		return 1;
 
 	return 0;
@@ -192,11 +191,9 @@ static void read_counter(int counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
+	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
 		runtime_nsecs[run_idx] = count[0];
-	if (attrs[counter].type == PERF_TYPE_HARDWARE &&
-		attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
+	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
 		runtime_cycles[run_idx] = count[0];
 }
 
@@ -290,9 +287,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
 
 	fprintf(stderr, " %14.6f  %-24s", msecs, event_name(counter));
 
-	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
-
+	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
 		if (walltime_nsecs_avg)
 			fprintf(stderr, " # %10.3f CPUs ",
 				(double)count[0] / (double)walltime_nsecs_avg);
@@ -305,9 +300,7 @@ static void abs_printout(int counter, u64 *count, u64 *noise)
 	fprintf(stderr, " %14Ld  %-24s", count[0], event_name(counter));
 
 	if (runtime_cycles_avg &&
-		attrs[counter].type == PERF_TYPE_HARDWARE &&
-			attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
-
+	    MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
 		fprintf(stderr, " # %10.3f IPC  ",
 			(double)count[0] / (double)runtime_cycles_avg);
 	} else {
-- 
1.6.0.6




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS
  2009-07-01  9:35 ` [PATCH 1/6 -tip] perf stat: define MATCH_EVENT for easy attrs checking Jaswinder Singh Rajput
@ 2009-07-01  9:36   ` Jaswinder Singh Rajput
  2009-07-01  9:37     ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD Jaswinder Singh Rajput
  2009-07-01 11:39     ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Ingo Molnar
  2009-07-01 11:30   ` [tip:perfcounters/urgent] perf stat: Define MATCH_EVENT for easy attr checking tip-bot for Jaswinder Singh Rajput
  1 sibling, 2 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-01  9:36 UTC (permalink / raw)
  To: Ingo Molnar; +Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML


For normalization also added SW_CPU_CLOCK and HW_BUS_CYCLES

For nsec_printout also added SW_CPU_CLOCK

Added helper functions to check counter unit as cycles and instructions

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 tools/perf/builtin-stat.c |   49 +++++++++++++++++++++++++++++++-------------
 1 files changed, 34 insertions(+), 15 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 6bf2b80..af61c29 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -144,6 +144,29 @@ static inline int nsec_counter(int counter)
 }
 
 /*
+ * Does the counter have cycles as a unit?
+ */
+static inline int cycle_counter(int counter)
+{
+	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter) ||
+	    MATCH_EVENT(HARDWARE, HW_BUS_CYCLES, counter))
+		return 1;
+
+	return 0;
+}
+
+/*
+ * Does the counter have instructions as a unit?
+ */
+static inline int instruction_counter(int counter)
+{
+	if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter))
+		return 1;
+
+	return 0;
+}
+
+/*
  * Read out the results of a single counter:
  */
 static void read_counter(int counter)
@@ -191,9 +214,9 @@ static void read_counter(int counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
+	if (nsec_counter(counter))
 		runtime_nsecs[run_idx] = count[0];
-	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
+	else if (cycle_counter(counter))
 		runtime_cycles[run_idx] = count[0];
 }
 
@@ -287,11 +310,10 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
 
 	fprintf(stderr, " %14.6f  %-24s", msecs, event_name(counter));
 
-	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
-		if (walltime_nsecs_avg)
-			fprintf(stderr, " # %10.3f CPUs ",
-				(double)count[0] / (double)walltime_nsecs_avg);
-	}
+	if (nsec_counter(counter) && walltime_nsecs_avg)
+		fprintf(stderr, " # %10.3f CPUs ",
+			(double)count[0] / (double)walltime_nsecs_avg);
+
 	print_noise(count, noise);
 }
 
@@ -299,16 +321,13 @@ static void abs_printout(int counter, u64 *count, u64 *noise)
 {
 	fprintf(stderr, " %14Ld  %-24s", count[0], event_name(counter));
 
-	if (runtime_cycles_avg &&
-	    MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
+	if (instruction_counter(counter) && runtime_cycles_avg)
 		fprintf(stderr, " # %10.3f IPC  ",
 			(double)count[0] / (double)runtime_cycles_avg);
-	} else {
-		if (runtime_nsecs_avg) {
-			fprintf(stderr, " # %10.3f M/sec",
-				(double)count[0]/runtime_nsecs_avg*1000.0);
-		}
-	}
+	else if (runtime_nsecs_avg)
+		fprintf(stderr, " # %10.3f M/sec",
+			(double)count[0]/runtime_nsecs_avg*1000.0);
+
 	print_noise(count, noise);
 }
 
-- 
1.6.0.6




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD
  2009-07-01  9:36   ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Jaswinder Singh Rajput
@ 2009-07-01  9:37     ` Jaswinder Singh Rajput
  2009-07-01  9:38       ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt " Jaswinder Singh Rajput
  2009-07-01 11:20       ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor " Ingo Molnar
  2009-07-01 11:39     ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Ingo Molnar
  1 sibling, 2 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-01  9:37 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- ls -lR /usr/include/ > /dev/null

 Performance counter stats for 'ls -lR /usr/include/':

           4218  vec-adds                  (scaled from 66.60%)
           7426  vec-muls                  (scaled from 66.67%)
           5441  vec-divs                  (scaled from 66.29%)
      821982187  vec-idle-cycles           (scaled from 66.45%)
           2681  vec-stall-cycles          (scaled from 67.11%)
           7887  vec-ops                   (scaled from 66.88%)

    0.417614573  seconds time elapsed

$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/rhythmbox ~jaswinder/Music/singhiskinng.mp3

 Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':

       17552264  vec-adds                  (scaled from 66.28%)
       19715258  vec-muls                  (scaled from 66.63%)
       15862733  vec-divs                  (scaled from 66.82%)
    23735187095  vec-idle-cycles           (scaled from 66.89%)
       11353159  vec-stall-cycles          (scaled from 66.90%)
       36628571  vec-ops                   (scaled from 66.48%)

  298.350012843  seconds time elapsed

$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/vlc ~jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv

 Performance counter stats for '/usr/bin/vlc /home/jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv':

    20177177044  vec-adds                  (scaled from 66.63%)
    34101687027  vec-muls                  (scaled from 66.64%)
     3984060862  vec-divs                  (scaled from 66.71%)
    26349684710  vec-idle-cycles           (scaled from 66.65%)
     9052001905  vec-stall-cycles          (scaled from 66.66%)
    76440734242  vec-ops                   (scaled from 66.71%)

  272.523058097  seconds time elapsed

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 arch/x86/kernel/cpu/perf_counter.c |   33 +++++++++++++++++++++++++++++++
 include/linux/perf_counter.h       |   15 ++++++++++++++
 kernel/perf_counter.c              |    1 +
 tools/perf/util/parse-events.c     |   38 ++++++++++++++++++++++++++++++++++++
 4 files changed, 87 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index d4cf4ce..8092200 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -372,6 +372,12 @@ static const u64 atom_hw_cache_event_ids
  },
 };
 
+/*
+ * Generalized hw vectored co-processor event table
+ */
+
+static u64 __read_mostly hw_vector_event_ids[PERF_COUNT_HW_VECTOR_MAX];
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -481,6 +487,17 @@ static const u64 amd_hw_cache_event_ids
  },
 };
 
+static const u64 amd_hw_vector_event_ids[] =
+{
+  [PERF_COUNT_HW_VECTOR_ADD]		= 0x0100, /* Dispatched FPU Add	     */
+  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0200, /* Dispatched FPU Multiply */
+  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0400, /* Dispatched FPU Store    */
+  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0001, /* FPU Empty cycles        */
+  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x00D7, /* Dispatch stall for FPU  */
+  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0FCB, /* Retired x87|(MMX & 3Dnow)
+						   |SSE & SSE2) Instructions */
+};
+
 /*
  * AMD Performance Monitor K7 and later.
  */
@@ -659,6 +676,17 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
 	return 0;
 }
 
+static inline int
+set_hw_vector_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	if (attr->config >= PERF_COUNT_HW_VECTOR_MAX)
+		return -EINVAL;
+
+	hwc->config |= hw_vector_event_ids[attr->config];
+
+	return 0;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -716,6 +744,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, attr);
 
+	if (attr->type == PERF_TYPE_HW_VECTOR)
+		return set_hw_vector_attr(hwc, attr);
+
 	if (attr->config >= x86_pmu.max_events)
 		return -EINVAL;
 	/*
@@ -1468,6 +1499,8 @@ static int amd_pmu_init(void)
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
+	memcpy(hw_vector_event_ids, amd_hw_vector_event_ids,
+	       sizeof(hw_vector_event_ids));
 
 	return 0;
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5e970c7..e91b712 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -31,6 +31,7 @@ enum perf_type_id {
 	PERF_TYPE_TRACEPOINT			= 2,
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_HW_VECTOR			= 5,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -89,6 +90,20 @@ enum perf_hw_cache_op_result_id {
 };
 
 /*
+ * Generalized hardware vectored co-processor counters:
+ */
+enum perf_hw_vector_id {
+	PERF_COUNT_HW_VECTOR_ADD		= 0,
+	PERF_COUNT_HW_VECTOR_MULTIPLY		= 1,
+	PERF_COUNT_HW_VECTOR_DIVIDE		= 2,
+	PERF_COUNT_HW_VECTOR_IDLE_CYCLES	= 3,
+	PERF_COUNT_HW_VECTOR_STALL_CYCLES	= 4,
+	PERF_COUNT_HW_VECTOR_OPS		= 5,
+
+	PERF_COUNT_HW_VECTOR_MAX,		/* non-ABI */
+};
+
+/*
  * Special "software" counters provided by the kernel, even if the hardware
  * does not support performance counters. These counters measure various
  * physical and sw events of the kernel (and allow the profiling of them as
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d55a50d..dd3848a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3838,6 +3838,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	case PERF_TYPE_RAW:
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_HW_VECTOR:
 		pmu = hw_perf_counter_init(counter);
 		break;
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 4d042f1..5e5d17e 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,17 @@ static struct event_symbol event_symbols[] = {
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
 };
 
+#define CHVECTOR(x) .type = PERF_TYPE_HW_VECTOR, .config = PERF_COUNT_HW_VECTOR_##x
+
+static struct event_symbol vector_event_symbols[] = {
+  { CHVECTOR(ADD),		"vec-adds",		"add"		},
+  { CHVECTOR(MULTIPLY),		"vec-muls",		"multiply"	},
+  { CHVECTOR(DIVIDE),		"vec-divs",		"divide"	},
+  { CHVECTOR(IDLE_CYCLES),	"vec-idle-cycles",	"vec-empty-cycles"},
+  { CHVECTOR(STALL_CYCLES),	"vec-stall-cycles",	"vec-busy-cycles"},
+  { CHVECTOR(OPS),		"vec-ops",		"vec-operations"},
+};
+
 #define __PERF_COUNTER_FIELD(config, name) \
 	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
 
@@ -172,6 +183,11 @@ char *event_name(int counter)
 		return event_cache_name(cache_type, cache_op, cache_result);
 	}
 
+	case PERF_TYPE_HW_VECTOR:
+		if (config < PERF_COUNT_HW_VECTOR_MAX)
+			return vector_event_symbols[config].symbol;
+		return "unknown-vector";
+
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_COUNT_SW_MAX)
 			return sw_event_names[config];
@@ -250,6 +266,19 @@ static int check_events(const char *str, unsigned int i)
 	return 0;
 }
 
+static int check_vector_events(const char *str, unsigned int i)
+{
+	if (!strncmp(str, vector_event_symbols[i].symbol,
+		     strlen(vector_event_symbols[i].symbol)))
+		return 1;
+
+	if (strlen(vector_event_symbols[i].alias))
+		if (!strncmp(str, vector_event_symbols[i].alias,
+			     strlen(vector_event_symbols[i].alias)))
+			return 1;
+	return 0;
+}
+
 /*
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
@@ -297,6 +326,15 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
 		}
 	}
 
+	for (i = 0; i < ARRAY_SIZE(vector_event_symbols); i++) {
+		if (check_vector_events(str, i)) {
+			attr->type = vector_event_symbols[i].type;
+			attr->config = vector_event_symbols[i].config;
+
+			return 0;
+		}
+	}
+
 	return parse_generic_hw_symbols(str, attr);
 }
 
-- 
1.6.0.6




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-01  9:37     ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD Jaswinder Singh Rajput
@ 2009-07-01  9:38       ` Jaswinder Singh Rajput
  2009-07-01  9:38         ` [PATCH 5/6 -tip] perf_counter: Add hardware vector events for nehalem Jaswinder Singh Rajput
  2009-07-01 11:24         ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD Ingo Molnar
  2009-07-01 11:20       ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor " Ingo Molnar
  1 sibling, 2 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-01  9:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


$ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null

 Performance counter stats for 'ls -lR /usr/include/':

            377  interrupts
       53429936  int-mask-cycles
           1119  int-pending-mask-cycles

    0.371457539  seconds time elapsed

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 arch/x86/kernel/cpu/perf_counter.c |   30 ++++++++++++++++++++++++++++++
 include/linux/perf_counter.h       |   12 ++++++++++++
 kernel/perf_counter.c              |    1 +
 tools/perf/util/parse-events.c     |   35 +++++++++++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8092200..487df5c 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -378,6 +378,12 @@ static const u64 atom_hw_cache_event_ids
 
 static u64 __read_mostly hw_vector_event_ids[PERF_COUNT_HW_VECTOR_MAX];
 
+/*
+ * Generalized hw interrupt event table
+ */
+
+static u64 __read_mostly hw_interrupt_event_ids[PERF_COUNT_HW_INTERRUPT_MAX];
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -498,6 +504,14 @@ static const u64 amd_hw_vector_event_ids[] =
 						   |SSE & SSE2) Instructions */
 };
 
+
+static const u64 amd_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x00CF, /* Interrupts Taken        */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x00CD, /* Interrupts-Masked Cycles*/
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x00CE, /* Int Mask+Pending Cycles */
+};
+
 /*
  * AMD Performance Monitor K7 and later.
  */
@@ -687,6 +701,17 @@ set_hw_vector_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
 	return 0;
 }
 
+static inline int
+set_hw_interrupt_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	if (attr->config >= PERF_COUNT_HW_INTERRUPT_MAX)
+		return -EINVAL;
+
+	hwc->config |= hw_interrupt_event_ids[attr->config];
+
+	return 0;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -747,6 +772,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (attr->type == PERF_TYPE_HW_VECTOR)
 		return set_hw_vector_attr(hwc, attr);
 
+	if (attr->type == PERF_TYPE_HW_INTERRUPT)
+		return set_hw_interrupt_attr(hwc, attr);
+
 	if (attr->config >= x86_pmu.max_events)
 		return -EINVAL;
 	/*
@@ -1501,6 +1529,8 @@ static int amd_pmu_init(void)
 	       sizeof(hw_cache_event_ids));
 	memcpy(hw_vector_event_ids, amd_hw_vector_event_ids,
 	       sizeof(hw_vector_event_ids));
+	memcpy(hw_interrupt_event_ids, amd_hw_interrupt_event_ids,
+	       sizeof(hw_interrupt_event_ids));
 
 	return 0;
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e91b712..c7165b9 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -32,6 +32,7 @@ enum perf_type_id {
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
 	PERF_TYPE_HW_VECTOR			= 5,
+	PERF_TYPE_HW_INTERRUPT			= 6,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -104,6 +105,17 @@ enum perf_hw_vector_id {
 };
 
 /*
+ * Generalized hardware inturrupt counters:
+ */
+enum perf_hw_interrupt_id {
+	PERF_COUNT_HW_INTERRUPT			= 0,
+	PERF_COUNT_HW_INTERRUPT_MASK		= 1,
+	PERF_COUNT_HW_INTERRUPT_PENDING_MASK	= 2,
+
+	PERF_COUNT_HW_INTERRUPT_MAX,		/* non-ABI */
+};
+
+/*
  * Special "software" counters provided by the kernel, even if the hardware
  * does not support performance counters. These counters measure various
  * physical and sw events of the kernel (and allow the profiling of them as
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index dd3848a..7a529a8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3839,6 +3839,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
 	case PERF_TYPE_HW_VECTOR:
+	case PERF_TYPE_HW_INTERRUPT:
 		pmu = hw_perf_counter_init(counter);
 		break;
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5e5d17e..5ea4c12 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -51,6 +51,14 @@ static struct event_symbol vector_event_symbols[] = {
   { CHVECTOR(OPS),		"vec-ops",		"vec-operations"},
 };
 
+#define CHINT(x) .type = PERF_TYPE_HW_INTERRUPT, .config = PERF_COUNT_HW_##x
+
+static struct event_symbol interrupt_event_symbols[] = {
+  { CHINT(INTERRUPT),		"interrupts",		"interrupt"	},
+  { CHINT(INTERRUPT_MASK),	"int-mask-cycles",	"masked"	},
+  { CHINT(INTERRUPT_PENDING_MASK),"int-pending-mask-cycles",	""	},
+};
+
 #define __PERF_COUNTER_FIELD(config, name) \
 	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
 
@@ -188,6 +196,11 @@ char *event_name(int counter)
 			return vector_event_symbols[config].symbol;
 		return "unknown-vector";
 
+	case PERF_TYPE_HW_INTERRUPT:
+		if (config < PERF_COUNT_HW_INTERRUPT_MAX)
+			return interrupt_event_symbols[config].symbol;
+		return "unknown-interrupt";
+
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_COUNT_SW_MAX)
 			return sw_event_names[config];
@@ -279,6 +292,19 @@ static int check_vector_events(const char *str, unsigned int i)
 	return 0;
 }
 
+static int check_interrupt_events(const char *str, unsigned int i)
+{
+	if (!strncmp(str, interrupt_event_symbols[i].symbol,
+		     strlen(interrupt_event_symbols[i].symbol)))
+		return 1;
+
+	if (strlen(interrupt_event_symbols[i].alias))
+		if (!strncmp(str, interrupt_event_symbols[i].alias,
+			     strlen(interrupt_event_symbols[i].alias)))
+			return 1;
+	return 0;
+}
+
 /*
  * Each event can have multiple symbolic names.
  * Symbolic names are (almost) exactly matched.
@@ -335,6 +361,15 @@ static int parse_event_symbols(const char *str, struct perf_counter_attr *attr)
 		}
 	}
 
+	for (i = 0; i < ARRAY_SIZE(interrupt_event_symbols); i++) {
+		if (check_interrupt_events(str, i)) {
+			attr->type = interrupt_event_symbols[i].type;
+			attr->config = interrupt_event_symbols[i].config;
+
+			return 0;
+		}
+	}
+
 	return parse_generic_hw_symbols(str, attr);
 }
 
-- 
1.6.0.6




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 5/6 -tip] perf_counter: Add hardware vector events for nehalem
  2009-07-01  9:38       ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt " Jaswinder Singh Rajput
@ 2009-07-01  9:38         ` Jaswinder Singh Rajput
  2009-07-01  9:40           ` [PATCH 6/6 -tip] perf_counter: Add hardware interrupt events for nehalem, core2 and atom Jaswinder Singh Rajput
  2009-07-01 11:24         ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD Ingo Molnar
  1 sibling, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-01  9:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


Add hardware vector events for nehalem

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 arch/x86/kernel/cpu/perf_counter.c |   12 ++++++++++++
 1 files changed, 12 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 487df5c..8f05226 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -378,6 +378,16 @@ static const u64 atom_hw_cache_event_ids
 
 static u64 __read_mostly hw_vector_event_ids[PERF_COUNT_HW_VECTOR_MAX];
 
+static const u64 nehalem_hw_vector_event_ids[] =
+{
+  [PERF_COUNT_HW_VECTOR_ADD]		= 0x01B1, /* UOPS_EXECUTED.PORT0     */
+  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0214, /* ARITH.MUL               */
+  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0114, /* ARITH.CYCLES_DIV_BUSY   */
+  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0,
+  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x60A2, /* RESOURCE_STALLS.FPCW|MXCSR*/
+  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0710, /* FP_COMP_OPS_EXE.X87|MMX|SSE_FP*/
+};
+
 /*
  * Generalized hw interrupt event table
  */
@@ -1503,6 +1513,8 @@ static int intel_pmu_init(void)
 	case 26:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_vector_event_ids, nehalem_hw_vector_event_ids,
+		       sizeof(hw_vector_event_ids));
 
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
-- 
1.6.0.6




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 6/6 -tip] perf_counter: Add hardware interrupt events for nehalem, core2 and atom
  2009-07-01  9:38         ` [PATCH 5/6 -tip] perf_counter: Add hardware vector events for nehalem Jaswinder Singh Rajput
@ 2009-07-01  9:40           ` Jaswinder Singh Rajput
  0 siblings, 0 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-01  9:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


Add hardware interrupt events for nehalem, core2 and atom

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 arch/x86/kernel/cpu/perf_counter.c |   20 ++++++++++++++++++++
 1 files changed, 20 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 8f05226..4ef1838 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -394,6 +394,20 @@ static const u64 nehalem_hw_vector_event_ids[] =
 
 static u64 __read_mostly hw_interrupt_event_ids[PERF_COUNT_HW_INTERRUPT_MAX];
 
+static const u64 nehalem_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x011D, /* HW_INT.RCV              */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x021D, /* HW_INT.CYCLES_MASKED    */
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x041D, /* HW_INT.CYCLES_PENDING_AND_MASKED*/
+};
+
+static const u64 core2_atom_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x00C8, /* HW_INT_RCV              */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x01C6, /* CYCLES_INT_MASKED.CYCLES_INT_MASKED*/
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x02C6, /* CYCLES_INT_MASKED.CYCLES_INT_PENDING_AND_MASKED*/
+};
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -1506,6 +1520,8 @@ static int intel_pmu_init(void)
 	case 29: /* six-core 45 nm xeon "Dunnington" */
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_interrupt_event_ids, core2_atom_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Core2 events, ");
 		break;
@@ -1515,12 +1531,16 @@ static int intel_pmu_init(void)
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_vector_event_ids, nehalem_hw_vector_event_ids,
 		       sizeof(hw_vector_event_ids));
+		memcpy(hw_interrupt_event_ids, nehalem_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
 	case 28:
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_interrupt_event_ids, core2_atom_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Atom events, ");
 		break;
-- 
1.6.0.6




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD
  2009-07-01  9:37     ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD Jaswinder Singh Rajput
  2009-07-01  9:38       ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt " Jaswinder Singh Rajput
@ 2009-07-01 11:20       ` Ingo Molnar
  2009-07-01 11:27         ` Ingo Molnar
  1 sibling, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-01 11:20 UTC (permalink / raw)
  To: Jaswinder Singh Rajput, Arjan van de Ven, Paul Mackerras,
	Benjamin Herrenschmidt, Anton Blanchard
  Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> $ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/vlc ~jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv
> 
>  Performance counter stats for '/usr/bin/vlc /home/jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv':
> 
>     20177177044  vec-adds                  (scaled from 66.63%)
>     34101687027  vec-muls                  (scaled from 66.64%)
>      3984060862  vec-divs                  (scaled from 66.71%)
>     26349684710  vec-idle-cycles           (scaled from 66.65%)
>      9052001905  vec-stall-cycles          (scaled from 66.66%)
>     76440734242  vec-ops                   (scaled from 66.71%)
> 
>   272.523058097  seconds time elapsed

Ok, this looks very nice now - a highly generic and still very 
useful looking categorization of FPU/MMX/SSE related co-processor hw 
events.

I'm still waiting for feedback from Paulus, BenH and Anton, whether 
this kind of generic enumeration fits PowerPC well enough.

I think from a pure logic/math/physics POV this categorization is 
pretty complete: a modern co-processor has three fundamental states 
we are interested in: idle, busy and busy-stalled. It has an 'ops' 
metric that counts instructions, plus the main operations are add, 
mul and div.

Cell is i guess a complication to be solved, as there the various 
vector units have separate decoders and separate thread state. This 
above abstraction only covers the portion of CPU designs where there 
are vector operations in the main ALU decoder stream of instructions

One thing that might be worth exposing is vectored loads/stores in 
general. But we dont have those in the generic ALU enumeration yet 
and if then it should be done together.

Also, the Nehalem bits need to be tested, i'll try to find time for 
that.

Good stuff.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-01  9:38       ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt " Jaswinder Singh Rajput
  2009-07-01  9:38         ` [PATCH 5/6 -tip] perf_counter: Add hardware vector events for nehalem Jaswinder Singh Rajput
@ 2009-07-01 11:24         ` Ingo Molnar
  2009-07-03 12:01           ` Jaswinder Singh Rajput
  1 sibling, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-01 11:24 UTC (permalink / raw)
  To: Jaswinder Singh Rajput, Peter Zijlstra, Arjan van de Ven,
	Frédéric Weisbecker, Arnaldo Carvalho de Melo,
	Paul Mackerras, Anton Blanchard
  Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> 
> $ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null
> 
>  Performance counter stats for 'ls -lR /usr/include/':
> 
>             377  interrupts
>        53429936  int-mask-cycles
>            1119  int-pending-mask-cycles
> 
>     0.371457539  seconds time elapsed

Agreed, this is another useful generalization - and the 'cycles 
pending' metrics are not retrievable via any software means.

We could and should probably add a software counter for hardirqs as 
wel. That would allow the vector/irqnr information to be passed in, 
and it would allow architectures without irq metrics in the PMU to 
have this counter too.

This way we could profile based on a specific interrupt source only 
- say based on the networking card.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD
  2009-07-01 11:20       ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor " Ingo Molnar
@ 2009-07-01 11:27         ` Ingo Molnar
  2009-07-01 11:40           ` Jaswinder Singh Rajput
  0 siblings, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-01 11:27 UTC (permalink / raw)
  To: Jaswinder Singh Rajput, Arjan van de Ven, Paul Mackerras,
	Benjamin Herrenschmidt, Anton Blanchard
  Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


* Ingo Molnar <mingo@elte.hu> wrote:

> >  Performance counter stats for '/usr/bin/vlc /home/jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv':
> > 
> >     20177177044  vec-adds                  (scaled from 66.63%)
> >     34101687027  vec-muls                  (scaled from 66.64%)
> >      3984060862  vec-divs                  (scaled from 66.71%)
> >     26349684710  vec-idle-cycles           (scaled from 66.65%)
> >      9052001905  vec-stall-cycles          (scaled from 66.66%)
> >     76440734242  vec-ops                   (scaled from 66.71%)
> > 
> >   272.523058097  seconds time elapsed

btw., the 'perf list' bits are missing - any new counter added 
should be listed by 'perf list' as well - otherwise people wont know 
what we have and what to use.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [tip:perfcounters/urgent] perf stat: Define MATCH_EVENT for easy attr checking
  2009-07-01  9:35 ` [PATCH 1/6 -tip] perf stat: define MATCH_EVENT for easy attrs checking Jaswinder Singh Rajput
  2009-07-01  9:36   ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Jaswinder Singh Rajput
@ 2009-07-01 11:30   ` tip-bot for Jaswinder Singh Rajput
  1 sibling, 0 replies; 41+ messages in thread
From: tip-bot for Jaswinder Singh Rajput @ 2009-07-01 11:30 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, hpa, mingo, peterz, jaswinder, jaswinderrajput,
	tglx, mingo

Commit-ID:  b9ebdcc0ce1c676ebf5dc4f6df6b440d8fcf88ab
Gitweb:     http://git.kernel.org/tip/b9ebdcc0ce1c676ebf5dc4f6df6b440d8fcf88ab
Author:     Jaswinder Singh Rajput <jaswinder@kernel.org>
AuthorDate: Wed, 1 Jul 2009 15:05:09 +0530
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Wed, 1 Jul 2009 13:28:38 +0200

perf stat: Define MATCH_EVENT for easy attr checking

MATCH_EVENT is useful:

 1. for multiple attrs checking
 2. avoid repetition of PERF_TYPE_ and PERF_COUNT_ and save space
 3. avoids line breakage

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Cc: Peter Zijlstra <peterz@infradead.org>
LKML-Reference: <1246440909.3403.5.camel@hpdv5.satnam>
Signed-off-by: Ingo Molnar <mingo@elte.hu>


---
 tools/perf/builtin-stat.c |   27 ++++++++++-----------------
 1 files changed, 10 insertions(+), 17 deletions(-)

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index 095a90e..01cc07e 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -96,6 +96,10 @@ static u64			walltime_nsecs_noise;
 static u64			runtime_cycles_avg;
 static u64			runtime_cycles_noise;
 
+#define MATCH_EVENT(t, c, counter)			\
+	(attrs[counter].type == PERF_TYPE_##t &&	\
+	 attrs[counter].config == PERF_COUNT_##c)
+
 #define ERR_PERF_OPEN \
 "Error: counter %d, sys_perf_counter_open() syscall returned with %d (%s)\n"
 
@@ -133,13 +137,8 @@ static void create_perf_stat_counter(int counter, int pid)
  */
 static inline int nsec_counter(int counter)
 {
-	if (attrs[counter].type != PERF_TYPE_SOFTWARE)
-		return 0;
-
-	if (attrs[counter].config == PERF_COUNT_SW_CPU_CLOCK)
-		return 1;
-
-	if (attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
+	if (MATCH_EVENT(SOFTWARE, SW_CPU_CLOCK, counter) ||
+	    MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
 		return 1;
 
 	return 0;
@@ -194,11 +193,9 @@ static void read_counter(int counter)
 	/*
 	 * Save the full runtime - to allow normalization during printout:
 	 */
-	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK)
+	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter))
 		runtime_nsecs[run_idx] = count[0];
-	if (attrs[counter].type == PERF_TYPE_HARDWARE &&
-		attrs[counter].config == PERF_COUNT_HW_CPU_CYCLES)
+	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter))
 		runtime_cycles[run_idx] = count[0];
 }
 
@@ -292,9 +289,7 @@ static void nsec_printout(int counter, u64 *count, u64 *noise)
 
 	fprintf(stderr, " %14.6f  %-24s", msecs, event_name(counter));
 
-	if (attrs[counter].type == PERF_TYPE_SOFTWARE &&
-		attrs[counter].config == PERF_COUNT_SW_TASK_CLOCK) {
-
+	if (MATCH_EVENT(SOFTWARE, SW_TASK_CLOCK, counter)) {
 		if (walltime_nsecs_avg)
 			fprintf(stderr, " # %10.3f CPUs ",
 				(double)count[0] / (double)walltime_nsecs_avg);
@@ -307,9 +302,7 @@ static void abs_printout(int counter, u64 *count, u64 *noise)
 	fprintf(stderr, " %14Ld  %-24s", count[0], event_name(counter));
 
 	if (runtime_cycles_avg &&
-		attrs[counter].type == PERF_TYPE_HARDWARE &&
-			attrs[counter].config == PERF_COUNT_HW_INSTRUCTIONS) {
-
+	    MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter)) {
 		fprintf(stderr, " # %10.3f IPC  ",
 			(double)count[0] / (double)runtime_cycles_avg);
 	} else {

^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS
  2009-07-01  9:36   ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Jaswinder Singh Rajput
  2009-07-01  9:37     ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD Jaswinder Singh Rajput
@ 2009-07-01 11:39     ` Ingo Molnar
  2009-07-03  8:18       ` Paul Mackerras
  1 sibling, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-01 11:39 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> For normalization also added SW_CPU_CLOCK and HW_BUS_CYCLES
> 
> For nsec_printout also added SW_CPU_CLOCK
> 
> Added helper functions to check counter unit as cycles and instructions
> 
> Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
> ---
>  tools/perf/builtin-stat.c |   49 +++++++++++++++++++++++++++++++-------------
>  1 files changed, 34 insertions(+), 15 deletions(-)
> 
> diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
> index 6bf2b80..af61c29 100644
> --- a/tools/perf/builtin-stat.c
> +++ b/tools/perf/builtin-stat.c
> @@ -144,6 +144,29 @@ static inline int nsec_counter(int counter)
>  }
>  
>  /*
> + * Does the counter have cycles as a unit?
> + */
> +static inline int cycle_counter(int counter)
> +{
> +	if (MATCH_EVENT(HARDWARE, HW_CPU_CYCLES, counter) ||
> +	    MATCH_EVENT(HARDWARE, HW_BUS_CYCLES, counter))
> +		return 1;
> +
> +	return 0;
> +}
> +
> +/*
> + * Does the counter have instructions as a unit?
> + */
> +static inline int instruction_counter(int counter)
> +{
> +	if (MATCH_EVENT(HARDWARE, HW_INSTRUCTIONS, counter))
> +		return 1;
> +
> +	return 0;
> +}

This should be done a bit differently. Each event should have a 
'unit type' index in its descriptor table, which links back to 
_another_ event, specifying its unit.

For example:

   (PERF_COUNT_HW_INSTRUCTIONS,     -1                        , "instructions")
   (PERF_COUNT_HW_CACHE_REFERENCES, PERF_COUNT_HW_INSTRUCTIONS)
   (PERF_COUNT_HW_CACHE_MISSES,     PERF_COUNT_HW_INSTRUCTIONS)

'-1' signals an event that has itself as a unit, and a string field 
gives us the pretty-print form of the unit.

The same could be done for other types of events as well, such as 
software events:

   (PERF_COUNT_SW_CPU_CLOCK,        -1                        , "nsecs")
   (PERF_COUNT_SW_TASK_CLOCK,       PERF_COUNT_SW_CPU_CLOCK   )

This way normalization can be fully automated: say if we print out 
PERF_COUNT_HW_CACHE_MISSES, we see that it is in units of 
PERF_COUNT_HW_INSTRUCTIONS so we can print out that unit and can 
normalize it to that metric.

the 'IPC' (Instructions Per Cycle) field is special, and if you are 
interested in this then i think it should be implemented as a 
special 'compound' event: it is represented by the division of two 
events.

( If it's implemented like that then IPC will be printed in a
  separate line, not as part of the instructions line - but that's 
  OK. )

Other 'compound' events might be possible too: for example a new 
cache-hits field could be is cache-refs minus cache-misses.

I.e. the simplest model for 'compound' events would be:

  X = A / B
  X = A - B
  X = A + B

We could list them in the event table, with a flag that specifies 
which arithmetic operation connects two 'atomic' counters.

Then the adding of a new compound event would only be the matter of 
adding one more line to the event table.

Can you see any problems with this approach?

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD
  2009-07-01 11:27         ` Ingo Molnar
@ 2009-07-01 11:40           ` Jaswinder Singh Rajput
  2009-07-01 11:49             ` Ingo Molnar
  0 siblings, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-01 11:40 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

On Wed, 2009-07-01 at 13:27 +0200, Ingo Molnar wrote:
> * Ingo Molnar <mingo@elte.hu> wrote:
> 
> > >  Performance counter stats for '/usr/bin/vlc /home/jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv':
> > > 
> > >     20177177044  vec-adds                  (scaled from 66.63%)
> > >     34101687027  vec-muls                  (scaled from 66.64%)
> > >      3984060862  vec-divs                  (scaled from 66.71%)
> > >     26349684710  vec-idle-cycles           (scaled from 66.65%)
> > >      9052001905  vec-stall-cycles          (scaled from 66.66%)
> > >     76440734242  vec-ops                   (scaled from 66.71%)
> > > 
> > >   272.523058097  seconds time elapsed
> 
> btw., the 'perf list' bits are missing - any new counter added 
> should be listed by 'perf list' as well - otherwise people wont know 
> what we have and what to use.
> 

Even cache is not available for 'perf list'. Should I also resend patch
for adding cache along with vector and interrupt.

Thanks,
--
JSR


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [GIT-PULL -tip][PATCH 0/6] perf_counter patches
  2009-07-01  9:33 [GIT-PULL -tip][PATCH 0/6] perf_counter patches Jaswinder Singh Rajput
  2009-07-01  9:35 ` [PATCH 1/6 -tip] perf stat: define MATCH_EVENT for easy attrs checking Jaswinder Singh Rajput
@ 2009-07-01 11:45 ` Ingo Molnar
  1 sibling, 0 replies; 41+ messages in thread
From: Ingo Molnar @ 2009-07-01 11:45 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Thomas Gleixner, Alan Cox, Peter Zijlstra, x86 maintainers, LKML


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> Ingo,
> 
> Please pull perf_counter patches :
> The following changes since commit 092304de242705abf24edcb0fc7beed4c4276865:
>   Ingo Molnar (1):
>         Merge branch 'perfcounters/urgent'
> 
> are available in the git repository at:
> 
>   git://git.kernel.org/pub/scm/linux/kernel/git/jaswinder/linux-2.6-tip.git master
> 
> Jaswinder Singh Rajput (6):
>       perf stat: define MATCH_EVENT for easy attrs checking
>       perf stat: treat same behaviour for all CYCLES and CLOCKS
>       perf_counter: Add Generalized Hardware vectored co-processor support for AMD
>       perf_counter: Add Generalized Hardware interrupt support for AMD
>       perf_counter: Add hardware vector events for nehalem
>       perf_counter: Add hardware interrupt events for nehalem, core2 and atom

A patch nit-picking sidenote, please try to use more consistent 
capitalization in commit titles. I fixed the first commit's title
to be:

   b9ebdcc: perf stat: Define MATCH_EVENT for easy attr checking

Also, the way we want to refer to the above Intel CPU models is 
"Corei7/Nehalem, Core2 and Atom".

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD
  2009-07-01 11:40           ` Jaswinder Singh Rajput
@ 2009-07-01 11:49             ` Ingo Molnar
  2009-07-02  9:44               ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
  0 siblings, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-01 11:49 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> On Wed, 2009-07-01 at 13:27 +0200, Ingo Molnar wrote:
> > * Ingo Molnar <mingo@elte.hu> wrote:
> > 
> > > >  Performance counter stats for '/usr/bin/vlc /home/jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv':
> > > > 
> > > >     20177177044  vec-adds                  (scaled from 66.63%)
> > > >     34101687027  vec-muls                  (scaled from 66.64%)
> > > >      3984060862  vec-divs                  (scaled from 66.71%)
> > > >     26349684710  vec-idle-cycles           (scaled from 66.65%)
> > > >      9052001905  vec-stall-cycles          (scaled from 66.66%)
> > > >     76440734242  vec-ops                   (scaled from 66.71%)
> > > > 
> > > >   272.523058097  seconds time elapsed
> > 
> > btw., the 'perf list' bits are missing - any new counter added 
> > should be listed by 'perf list' as well - otherwise people wont know 
> > what we have and what to use.
> > 
> 
> Even cache is not available for 'perf list'. Should I also resend 
> patch for adding cache along with vector and interrupt.

I'd suggest for you to send a separate patch for the cache bits 
first (that way it's not held up by other dependencies) - and keep 
the vector and irq bits in their respective patches.

I.e. when we add new generic events, we want to enable it in the 
full tool-space in a single patch.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-01 11:49             ` Ingo Molnar
@ 2009-07-02  9:44               ` Jaswinder Singh Rajput
  2009-07-02  9:45                 ` [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom Jaswinder Singh Rajput
                                   ` (2 more replies)
  0 siblings, 3 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-02  9:44 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


This output is from AMD box:

$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- ls -lR /usr/include/ > /dev/null

 Performance counter stats for 'ls -lR /usr/include/':

           4218  vec-adds                  (scaled from 66.60%)
           7426  vec-muls                  (scaled from 66.67%)
           5441  vec-divs                  (scaled from 66.29%)
      821982187  vec-idle-cycles           (scaled from 66.45%)
           2681  vec-stall-cycles          (scaled from 67.11%)
           7887  vec-ops                   (scaled from 66.88%)

    0.417614573  seconds time elapsed

$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/rhythmbox ~jaswinder/Music/singhiskinng.mp3

 Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':

       17552264  vec-adds                  (scaled from 66.28%)
       19715258  vec-muls                  (scaled from 66.63%)
       15862733  vec-divs                  (scaled from 66.82%)
    23735187095  vec-idle-cycles           (scaled from 66.89%)
       11353159  vec-stall-cycles          (scaled from 66.90%)
       36628571  vec-ops                   (scaled from 66.48%)

  298.350012843  seconds time elapsed

$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/vlc ~jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv

 Performance counter stats for '/usr/bin/vlc /home/jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv':

    20177177044  vec-adds                  (scaled from 66.63%)
    34101687027  vec-muls                  (scaled from 66.64%)
     3984060862  vec-divs                  (scaled from 66.71%)
    26349684710  vec-idle-cycles           (scaled from 66.65%)
     9052001905  vec-stall-cycles          (scaled from 66.66%)
    76440734242  vec-ops                   (scaled from 66.71%)

  272.523058097  seconds time elapsed

$ ./perf list shows vector events like :

  vec-adds OR add                          [Hardware vector event]
  vec-muls OR multiply                     [Hardware vector event]
  vec-divs OR divide                       [Hardware vector event]
  vec-idle-cycles OR vec-empty-cycles      [Hardware vector event]
  vec-stall-cycles OR vec-busy-cycles      [Hardware vector event]
  vec-ops OR vec-operations                [Hardware vector event]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 arch/x86/kernel/cpu/perf_counter.c |   45 +++++++++++++++++++++++++++++
 include/linux/perf_counter.h       |   15 ++++++++++
 kernel/perf_counter.c              |    1 +
 tools/perf/util/parse-events.c     |   55 ++++++++++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 36c3dc7..48f28b7 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -372,6 +372,22 @@ static const u64 atom_hw_cache_event_ids
  },
 };
 
+/*
+ * Generalized hw vectored co-processor event table
+ */
+
+static u64 __read_mostly hw_vector_event_ids[PERF_COUNT_HW_VECTOR_MAX];
+
+static const u64 nehalem_hw_vector_event_ids[] =
+{
+  [PERF_COUNT_HW_VECTOR_ADD]		= 0x01B1, /* UOPS_EXECUTED.PORT0     */
+  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0214, /* ARITH.MUL               */
+  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0114, /* ARITH.CYCLES_DIV_BUSY   */
+  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0,
+  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x60A2, /* RESOURCE_STALLS.FPCW|MXCSR*/
+  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0710, /* FP_COMP_OPS_EXE.X87|MMX|SSE_FP*/
+};
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -481,6 +497,17 @@ static const u64 amd_hw_cache_event_ids
  },
 };
 
+static const u64 amd_hw_vector_event_ids[] =
+{
+  [PERF_COUNT_HW_VECTOR_ADD]		= 0x0100, /* Dispatched FPU Add	     */
+  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0200, /* Dispatched FPU Multiply */
+  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0400, /* Dispatched FPU Store    */
+  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0001, /* FPU Empty cycles        */
+  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x00D7, /* Dispatch stall for FPU  */
+  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0FCB, /* Retired x87|(MMX & 3Dnow)
+						   |SSE & SSE2) Instructions */
+};
+
 /*
  * AMD Performance Monitor K7 and later.
  */
@@ -659,6 +686,17 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
 	return 0;
 }
 
+static inline int
+set_hw_vector_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	if (attr->config >= PERF_COUNT_HW_VECTOR_MAX)
+		return -EINVAL;
+
+	hwc->config |= hw_vector_event_ids[attr->config];
+
+	return 0;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -716,6 +754,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, attr);
 
+	if (attr->type == PERF_TYPE_HW_VECTOR)
+		return set_hw_vector_attr(hwc, attr);
+
 	if (attr->config >= x86_pmu.max_events)
 		return -EINVAL;
 	/*
@@ -1444,6 +1485,8 @@ static int intel_pmu_init(void)
 	case 26:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_vector_event_ids, nehalem_hw_vector_event_ids,
+		       sizeof(hw_vector_event_ids));
 
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
@@ -1468,6 +1511,8 @@ static int amd_pmu_init(void)
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
+	memcpy(hw_vector_event_ids, amd_hw_vector_event_ids,
+	       sizeof(hw_vector_event_ids));
 
 	return 0;
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5e970c7..e91b712 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -31,6 +31,7 @@ enum perf_type_id {
 	PERF_TYPE_TRACEPOINT			= 2,
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_HW_VECTOR			= 5,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -89,6 +90,20 @@ enum perf_hw_cache_op_result_id {
 };
 
 /*
+ * Generalized hardware vectored co-processor counters:
+ */
+enum perf_hw_vector_id {
+	PERF_COUNT_HW_VECTOR_ADD		= 0,
+	PERF_COUNT_HW_VECTOR_MULTIPLY		= 1,
+	PERF_COUNT_HW_VECTOR_DIVIDE		= 2,
+	PERF_COUNT_HW_VECTOR_IDLE_CYCLES	= 3,
+	PERF_COUNT_HW_VECTOR_STALL_CYCLES	= 4,
+	PERF_COUNT_HW_VECTOR_OPS		= 5,
+
+	PERF_COUNT_HW_VECTOR_MAX,		/* non-ABI */
+};
+
+/*
  * Special "software" counters provided by the kernel, even if the hardware
  * does not support performance counters. These counters measure various
  * physical and sw events of the kernel (and allow the profiling of them as
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d55a50d..dd3848a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3838,6 +3838,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	case PERF_TYPE_RAW:
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_HW_VECTOR:
 		pmu = hw_perf_counter_init(counter);
 		break;
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5184959..8213dfb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,17 @@ static struct event_symbol event_symbols[] = {
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
 };
 
+#define CHVECTOR(x) .type = PERF_TYPE_HW_VECTOR, .config = PERF_COUNT_HW_VECTOR_##x
+
+static struct event_symbol vector_event_symbols[] = {
+  { CHVECTOR(ADD),		"vec-adds",		"add"		},
+  { CHVECTOR(MULTIPLY),		"vec-muls",		"multiply"	},
+  { CHVECTOR(DIVIDE),		"vec-divs",		"divide"	},
+  { CHVECTOR(IDLE_CYCLES),	"vec-idle-cycles",	"vec-empty-cycles"},
+  { CHVECTOR(STALL_CYCLES),	"vec-stall-cycles",	"vec-busy-cycles"},
+  { CHVECTOR(OPS),		"vec-ops",		"vec-operations"},
+};
+
 #define __PERF_COUNTER_FIELD(config, name) \
 	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
 
@@ -172,6 +183,11 @@ char *event_name(int counter)
 		return event_cache_name(cache_type, cache_op, cache_result);
 	}
 
+	case PERF_TYPE_HW_VECTOR:
+		if (config < PERF_COUNT_HW_VECTOR_MAX)
+			return vector_event_symbols[config].symbol;
+		return "unknown-vector";
+
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_COUNT_SW_MAX)
 			return sw_event_names[config];
@@ -280,6 +296,21 @@ static int check_events(const char *str, unsigned int i)
 	return 0;
 }
 
+static int check_vector_events(const char *str, unsigned int i)
+{
+	int n;
+
+	n = strlen(vector_event_symbols[i].symbol);
+	if (!strncmp(str, vector_event_symbols[i].symbol, n))
+		return n;
+
+	n = strlen(vector_event_symbols[i].alias);
+	if (n)
+		if (!strncmp(str, vector_event_symbols[i].alias, n))
+			return n;
+	return 0;
+}
+
 static int
 parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 {
@@ -296,6 +327,17 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 			return 1;
 		}
 	}
+
+	for (i = 0; i < ARRAY_SIZE(vector_event_symbols); i++) {
+		n = check_vector_events(str, i);
+		if (n > 0) {
+			attr->type = vector_event_symbols[i].type;
+			attr->config = vector_event_symbols[i].config;
+			*strp = str + n;
+			return 1;
+		}
+	}
+
 	return 0;
 }
 
@@ -420,6 +462,7 @@ static const char * const event_type_descriptors[] = {
 	"Software event",
 	"Tracepoint event",
 	"Hardware cache event",
+	"Hardware vector event",
 };
 
 /*
@@ -468,6 +511,18 @@ void print_events(void)
 	}
 
 	fprintf(stderr, "\n");
+	syms = vector_event_symbols;
+	type = syms->type;
+	for (i = 0; i < ARRAY_SIZE(vector_event_symbols); i++, syms++) {
+		if (strlen(syms->alias))
+			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
+		else
+			strcpy(name, syms->symbol);
+		fprintf(stderr, "  %-40s [%s]\n", name,
+			event_type_descriptors[type]);
+	}
+
+	fprintf(stderr, "\n");
 	fprintf(stderr, "  %-40s [raw hardware event descriptor]\n",
 		"rNNN");
 	fprintf(stderr, "\n");
-- 
1.6.0.6




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom
  2009-07-02  9:44               ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
@ 2009-07-02  9:45                 ` Jaswinder Singh Rajput
  2009-07-03 10:33                   ` Ingo Molnar
  2009-07-03  7:38                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
  2009-07-03 10:29                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Ingo Molnar
  2 siblings, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-02  9:45 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


$ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null

 Performance counter stats for 'ls -lR /usr/include/':

            377  interrupts
       53429936  int-mask-cycles
           1119  int-pending-mask-cycles

    0.371457539  seconds time elapsed

$ ./perf list shows interrupt events like :

  interrupts OR interrupt                  [Hardware interrupt event]
  int-mask-cycles OR masked                [Hardware interrupt event]
  int-pending-mask-cycles                  [Hardware interrupt event]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 arch/x86/kernel/cpu/perf_counter.c |   50 +++++++++++++++++++++++++++++++++++
 include/linux/perf_counter.h       |   12 ++++++++
 kernel/perf_counter.c              |    1 +
 tools/perf/util/parse-events.c     |   51 ++++++++++++++++++++++++++++++++++++
 4 files changed, 114 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 48f28b7..43b24ad 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -388,6 +388,26 @@ static const u64 nehalem_hw_vector_event_ids[] =
   [PERF_COUNT_HW_VECTOR_OPS]		= 0x0710, /* FP_COMP_OPS_EXE.X87|MMX|SSE_FP*/
 };
 
+/*
+ * Generalized hw interrupt event table
+ */
+
+static u64 __read_mostly hw_interrupt_event_ids[PERF_COUNT_HW_INTERRUPT_MAX];
+
+static const u64 nehalem_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x011D, /* HW_INT.RCV              */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x021D, /* HW_INT.CYCLES_MASKED    */
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x041D, /* HW_INT.CYCLES_PENDING_AND_MASKED*/
+};
+
+static const u64 core2_atom_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x00C8, /* HW_INT_RCV              */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x01C6, /* CYCLES_INT_MASKED.CYCLES_INT_MASKED*/
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x02C6, /* CYCLES_INT_MASKED.CYCLES_INT_PENDING_AND_MASKED*/
+};
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -508,6 +528,14 @@ static const u64 amd_hw_vector_event_ids[] =
 						   |SSE & SSE2) Instructions */
 };
 
+
+static const u64 amd_hw_interrupt_event_ids[] =
+{
+  [PERF_COUNT_HW_INTERRUPT]		= 0x00CF, /* Interrupts Taken        */
+  [PERF_COUNT_HW_INTERRUPT_MASK]	= 0x00CD, /* Interrupts-Masked Cycles*/
+  [PERF_COUNT_HW_INTERRUPT_PENDING_MASK]= 0x00CE, /* Int Mask+Pending Cycles */
+};
+
 /*
  * AMD Performance Monitor K7 and later.
  */
@@ -697,6 +725,17 @@ set_hw_vector_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
 	return 0;
 }
 
+static inline int
+set_hw_interrupt_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	if (attr->config >= PERF_COUNT_HW_INTERRUPT_MAX)
+		return -EINVAL;
+
+	hwc->config |= hw_interrupt_event_ids[attr->config];
+
+	return 0;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -757,6 +796,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (attr->type == PERF_TYPE_HW_VECTOR)
 		return set_hw_vector_attr(hwc, attr);
 
+	if (attr->type == PERF_TYPE_HW_INTERRUPT)
+		return set_hw_interrupt_attr(hwc, attr);
+
 	if (attr->config >= x86_pmu.max_events)
 		return -EINVAL;
 	/*
@@ -1478,6 +1520,8 @@ static int intel_pmu_init(void)
 	case 29: /* six-core 45 nm xeon "Dunnington" */
 		memcpy(hw_cache_event_ids, core2_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_interrupt_event_ids, core2_atom_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Core2 events, ");
 		break;
@@ -1487,12 +1531,16 @@ static int intel_pmu_init(void)
 		       sizeof(hw_cache_event_ids));
 		memcpy(hw_vector_event_ids, nehalem_hw_vector_event_ids,
 		       sizeof(hw_vector_event_ids));
+		memcpy(hw_interrupt_event_ids, nehalem_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
 	case 28:
 		memcpy(hw_cache_event_ids, atom_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_interrupt_event_ids, core2_atom_hw_interrupt_event_ids,
+		       sizeof(hw_interrupt_event_ids));
 
 		pr_cont("Atom events, ");
 		break;
@@ -1513,6 +1561,8 @@ static int amd_pmu_init(void)
 	       sizeof(hw_cache_event_ids));
 	memcpy(hw_vector_event_ids, amd_hw_vector_event_ids,
 	       sizeof(hw_vector_event_ids));
+	memcpy(hw_interrupt_event_ids, amd_hw_interrupt_event_ids,
+	       sizeof(hw_interrupt_event_ids));
 
 	return 0;
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index e91b712..a53081b 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -32,6 +32,7 @@ enum perf_type_id {
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
 	PERF_TYPE_HW_VECTOR			= 5,
+	PERF_TYPE_HW_INTERRUPT			= 6,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -104,6 +105,17 @@ enum perf_hw_vector_id {
 };
 
 /*
+ * Generalized hardware interrupt counters:
+ */
+enum perf_hw_interrupt_id {
+	PERF_COUNT_HW_INTERRUPT			= 0,
+	PERF_COUNT_HW_INTERRUPT_MASK		= 1,
+	PERF_COUNT_HW_INTERRUPT_PENDING_MASK	= 2,
+
+	PERF_COUNT_HW_INTERRUPT_MAX,		/* non-ABI */
+};
+
+/*
  * Special "software" counters provided by the kernel, even if the hardware
  * does not support performance counters. These counters measure various
  * physical and sw events of the kernel (and allow the profiling of them as
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index dd3848a..7a529a8 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3839,6 +3839,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
 	case PERF_TYPE_HW_VECTOR:
+	case PERF_TYPE_HW_INTERRUPT:
 		pmu = hw_perf_counter_init(counter);
 		break;
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 8213dfb..d085b8f 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -51,6 +51,14 @@ static struct event_symbol vector_event_symbols[] = {
   { CHVECTOR(OPS),		"vec-ops",		"vec-operations"},
 };
 
+#define CHINT(x) .type = PERF_TYPE_HW_INTERRUPT, .config = PERF_COUNT_HW_##x
+
+static struct event_symbol interrupt_event_symbols[] = {
+  { CHINT(INTERRUPT),		"interrupts",		"interrupt"	},
+  { CHINT(INTERRUPT_MASK),	"int-mask-cycles",	"masked"	},
+  { CHINT(INTERRUPT_PENDING_MASK),"int-pending-mask-cycles",	""	},
+};
+
 #define __PERF_COUNTER_FIELD(config, name) \
 	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
 
@@ -188,6 +196,11 @@ char *event_name(int counter)
 			return vector_event_symbols[config].symbol;
 		return "unknown-vector";
 
+	case PERF_TYPE_HW_INTERRUPT:
+		if (config < PERF_COUNT_HW_INTERRUPT_MAX)
+			return interrupt_event_symbols[config].symbol;
+		return "unknown-interrupt";
+
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_COUNT_SW_MAX)
 			return sw_event_names[config];
@@ -311,6 +324,21 @@ static int check_vector_events(const char *str, unsigned int i)
 	return 0;
 }
 
+static int check_interrupt_events(const char *str, unsigned int i)
+{
+	int n;
+
+	n = strlen(interrupt_event_symbols[i].symbol);
+	if (!strncmp(str, interrupt_event_symbols[i].symbol, n))
+		return n;
+
+	n = strlen(interrupt_event_symbols[i].alias);
+	if (n)
+		if (!strncmp(str, interrupt_event_symbols[i].alias, n))
+			return n;
+	return 0;
+}
+
 static int
 parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 {
@@ -338,6 +366,16 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 		}
 	}
 
+	for (i = 0; i < ARRAY_SIZE(interrupt_event_symbols); i++) {
+		n = check_interrupt_events(str, i);
+		if (n > 0) {
+			attr->type = interrupt_event_symbols[i].type;
+			attr->config = interrupt_event_symbols[i].config;
+			*strp = str + n;
+			return 1;
+		}
+	}
+
 	return 0;
 }
 
@@ -463,6 +501,7 @@ static const char * const event_type_descriptors[] = {
 	"Tracepoint event",
 	"Hardware cache event",
 	"Hardware vector event",
+	"Hardware interrupt event",
 };
 
 /*
@@ -523,6 +562,18 @@ void print_events(void)
 	}
 
 	fprintf(stderr, "\n");
+	syms = interrupt_event_symbols;
+	type = syms->type;
+	for (i = 0; i < ARRAY_SIZE(interrupt_event_symbols); i++, syms++) {
+		if (strlen(syms->alias))
+			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
+		else
+			strcpy(name, syms->symbol);
+		fprintf(stderr, "  %-40s [%s]\n", name,
+			event_type_descriptors[type]);
+	}
+
+	fprintf(stderr, "\n");
 	fprintf(stderr, "  %-40s [raw hardware event descriptor]\n",
 		"rNNN");
 	fprintf(stderr, "\n");
-- 
1.6.0.6




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-02  9:44               ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
  2009-07-02  9:45                 ` [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom Jaswinder Singh Rajput
@ 2009-07-03  7:38                 ` Jaswinder Singh Rajput
  2009-07-03  9:30                   ` Ingo Molnar
  2009-07-03 10:29                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Ingo Molnar
  2 siblings, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-03  7:38 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

Hello Ingo,

On Thu, 2009-07-02 at 15:14 +0530, Jaswinder Singh Rajput wrote:
> This output is from AMD box:
> 
> $ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- ls -lR /usr/include/ > /dev/null
> 
>  Performance counter stats for 'ls -lR /usr/include/':
> 
>            4218  vec-adds                  (scaled from 66.60%)
>            7426  vec-muls                  (scaled from 66.67%)
>            5441  vec-divs                  (scaled from 66.29%)
>       821982187  vec-idle-cycles           (scaled from 66.45%)
>            2681  vec-stall-cycles          (scaled from 67.11%)
>            7887  vec-ops                   (scaled from 66.88%)
> 
>     0.417614573  seconds time elapsed
> 
> $ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/rhythmbox ~jaswinder/Music/singhiskinng.mp3
> 
>  Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':
> 
>        17552264  vec-adds                  (scaled from 66.28%)
>        19715258  vec-muls                  (scaled from 66.63%)
>        15862733  vec-divs                  (scaled from 66.82%)
>     23735187095  vec-idle-cycles           (scaled from 66.89%)
>        11353159  vec-stall-cycles          (scaled from 66.90%)
>        36628571  vec-ops                   (scaled from 66.48%)
> 
>   298.350012843  seconds time elapsed
> 
> $ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/vlc ~jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv
> 
>  Performance counter stats for '/usr/bin/vlc /home/jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv':
> 
>     20177177044  vec-adds                  (scaled from 66.63%)
>     34101687027  vec-muls                  (scaled from 66.64%)
>      3984060862  vec-divs                  (scaled from 66.71%)
>     26349684710  vec-idle-cycles           (scaled from 66.65%)
>      9052001905  vec-stall-cycles          (scaled from 66.66%)
>     76440734242  vec-ops                   (scaled from 66.71%)
> 
>   272.523058097  seconds time elapsed
> 
> $ ./perf list shows vector events like :
> 
>   vec-adds OR add                          [Hardware vector event]
>   vec-muls OR multiply                     [Hardware vector event]
>   vec-divs OR divide                       [Hardware vector event]
>   vec-idle-cycles OR vec-empty-cycles      [Hardware vector event]
>   vec-stall-cycles OR vec-busy-cycles      [Hardware vector event]
>   vec-ops OR vec-operations                [Hardware vector event]
> 
> Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
> ---
>  arch/x86/kernel/cpu/perf_counter.c |   45 +++++++++++++++++++++++++++++
>  include/linux/perf_counter.h       |   15 ++++++++++
>  kernel/perf_counter.c              |    1 +
>  tools/perf/util/parse-events.c     |   55 ++++++++++++++++++++++++++++++++++++
>  4 files changed, 116 insertions(+), 0 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
> index 36c3dc7..48f28b7 100644
> --- a/arch/x86/kernel/cpu/perf_counter.c
> +++ b/arch/x86/kernel/cpu/perf_counter.c
> @@ -372,6 +372,22 @@ static const u64 atom_hw_cache_event_ids
>   },
>  };
>  
> +/*
> + * Generalized hw vectored co-processor event table
> + */
> +
> +static u64 __read_mostly hw_vector_event_ids[PERF_COUNT_HW_VECTOR_MAX];
> +
> +static const u64 nehalem_hw_vector_event_ids[] =
> +{
> +  [PERF_COUNT_HW_VECTOR_ADD]		= 0x01B1, /* UOPS_EXECUTED.PORT0     */
> +  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0214, /* ARITH.MUL               */
> +  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0114, /* ARITH.CYCLES_DIV_BUSY   */
> +  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0,
> +  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x60A2, /* RESOURCE_STALLS.FPCW|MXCSR*/
> +  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0710, /* FP_COMP_OPS_EXE.X87|MMX|SSE_FP*/
> +};
> +

Have you tested this patch on Intel Corei7/Nehalem.

Thanks,
--
JSR

http://userweb.kernel.org/~jaswinder/




^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS
  2009-07-01 11:39     ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Ingo Molnar
@ 2009-07-03  8:18       ` Paul Mackerras
  2009-07-03  8:27         ` Ingo Molnar
  0 siblings, 1 reply; 41+ messages in thread
From: Paul Mackerras @ 2009-07-03  8:18 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Jaswinder Singh Rajput, Thomas Gleixner, Peter Zijlstra,
	x86 maintainers, LKML

Ingo Molnar writes:

> Other 'compound' events might be possible too: for example a new 
> cache-hits field could be is cache-refs minus cache-misses.

Hmmm, on the MPC7450 family there are events for cache-hits and
cache-misses, so there it would be nice to be able to ask for
cache-refs and have it compute cache-hits plus cache-misses.

> I.e. the simplest model for 'compound' events would be:
> 
>   X = A / B
>   X = A - B
>   X = A + B
> 
> We could list them in the event table, with a flag that specifies 
> which arithmetic operation connects two 'atomic' counters.
> 
> Then the adding of a new compound event would only be the matter of 
> adding one more line to the event table.

Sounds nice.  If we do this we should ensure that the two events
get put into one group if possible.

Paul.

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS
  2009-07-03  8:18       ` Paul Mackerras
@ 2009-07-03  8:27         ` Ingo Molnar
  0 siblings, 0 replies; 41+ messages in thread
From: Ingo Molnar @ 2009-07-03  8:27 UTC (permalink / raw)
  To: Paul Mackerras
  Cc: Jaswinder Singh Rajput, Thomas Gleixner, Peter Zijlstra,
	x86 maintainers, LKML


* Paul Mackerras <paulus@samba.org> wrote:

> Ingo Molnar writes:
> 
> > Other 'compound' events might be possible too: for example a new 
> > cache-hits field could be is cache-refs minus cache-misses.
> 
> Hmmm, on the MPC7450 family there are events for cache-hits and 
> cache-misses, so there it would be nice to be able to ask for 
> cache-refs and have it compute cache-hits plus cache-misses.

Yes. I think the API is structured enough so that user-space knows 
enough about the meaning of the events here. We can certainly 
stipulate this rule:

	refs == hits + misses

And if the kernel returns -ENODEV for a particular component 
user-space can fall back using the other two events.

I.e. this would allow transparent support for all 3 permutations:

	hw has refs and hits
	hw has refs and misses
	hw has hits and misses

For sampling it's a tiny bit tricky but still doable:j a compound 
counter could still sample because we handle weighted samples 
throughout the tools and negative weight can be subtraced.

Intuitive annotation output would have to be thought out for this as 
entries/function could go negative statistically.

> > I.e. the simplest model for 'compound' events would be:
> > 
> >   X = A / B
> >   X = A - B
> >   X = A + B
> > 
> > We could list them in the event table, with a flag that 
> > specifies which arithmetic operation connects two 'atomic' 
> > counters.
> > 
> > Then the adding of a new compound event would only be the matter 
> > of adding one more line to the event table.
> 
> Sounds nice.  If we do this we should ensure that the two events 
> get put into one group if possible.

Correct. Are you interested in adding this, so that it fits the 
MPC7450 family perfectly?

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-03  7:38                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
@ 2009-07-03  9:30                   ` Ingo Molnar
  2009-07-03 10:10                     ` Jaswinder Singh Rajput
  2009-07-03 12:17                     ` [PATCH 3/3 -tip] perf list: avoid replicating functions Jaswinder Singh Rajput
  0 siblings, 2 replies; 41+ messages in thread
From: Ingo Molnar @ 2009-07-03  9:30 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> > $ ./perf list shows vector events like :
> > 
> >   vec-adds OR add                          [Hardware vector event]
> >   vec-muls OR multiply                     [Hardware vector event]
> >   vec-divs OR divide                       [Hardware vector event]
> >   vec-idle-cycles OR vec-empty-cycles      [Hardware vector event]
> >   vec-stall-cycles OR vec-busy-cycles      [Hardware vector event]
> >   vec-ops OR vec-operations                [Hardware vector event]

btw., why does this printout SHOUT the 'or'? It's certainly not an 
important piece of information. Something like:

   vec-adds | add                          [Hardware vector event]
   vec-muls | multiply                     [Hardware vector event]
   vec-divs | divide                       [Hardware vector event]
   vec-idle-cycles | vec-empty-cycles      [Hardware vector event]
   vec-stall-cycles | vec-busy-cycles      [Hardware vector event]
   vec-ops | vec-operations                [Hardware vector event]

looks better on all levels.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-03  9:30                   ` Ingo Molnar
@ 2009-07-03 10:10                     ` Jaswinder Singh Rajput
  2009-07-03 12:17                     ` [PATCH 3/3 -tip] perf list: avoid replicating functions Jaswinder Singh Rajput
  1 sibling, 0 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-03 10:10 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

On Fri, 2009-07-03 at 11:30 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 
> > > $ ./perf list shows vector events like :
> > > 
> > >   vec-adds OR add                          [Hardware vector event]
> > >   vec-muls OR multiply                     [Hardware vector event]
> > >   vec-divs OR divide                       [Hardware vector event]
> > >   vec-idle-cycles OR vec-empty-cycles      [Hardware vector event]
> > >   vec-stall-cycles OR vec-busy-cycles      [Hardware vector event]
> > >   vec-ops OR vec-operations                [Hardware vector event]
> 
> btw., why does this printout SHOUT the 'or'? It's certainly not an 
> important piece of information. Something like:
> 
>    vec-adds | add                          [Hardware vector event]
>    vec-muls | multiply                     [Hardware vector event]
>    vec-divs | divide                       [Hardware vector event]
>    vec-idle-cycles | vec-empty-cycles      [Hardware vector event]
>    vec-stall-cycles | vec-busy-cycles      [Hardware vector event]
>    vec-ops | vec-operations                [Hardware vector event]
> 
> looks better on all levels.
> 

'OR' is also used for other events.
If this is the only issue, I request you to accept these 2 patches.

I will send incremental patch which will fix these 'OR's and also avoid
duplicating of these functions.

Thanks,
--
JSR

http://userweb.kernel.org/~jaswinder/



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-02  9:44               ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
  2009-07-02  9:45                 ` [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom Jaswinder Singh Rajput
  2009-07-03  7:38                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
@ 2009-07-03 10:29                 ` Ingo Molnar
  2009-07-03 11:55                   ` Jaswinder Singh Rajput
  2 siblings, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-03 10:29 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

>  Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':
> 
>        17552264  vec-adds                  (scaled from 66.28%)
>        19715258  vec-muls                  (scaled from 66.63%)
>        15862733  vec-divs                  (scaled from 66.82%)
>     23735187095  vec-idle-cycles           (scaled from 66.89%)
>        11353159  vec-stall-cycles          (scaled from 66.90%)
>        36628571  vec-ops                   (scaled from 66.48%)

Is stall-cycles equivalent to busy-cycles? I.e. do we have this 
general relationship to the cycle event:

	cycles = vec-stall-cycles + vec-idle-cycles

?

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom
  2009-07-02  9:45                 ` [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom Jaswinder Singh Rajput
@ 2009-07-03 10:33                   ` Ingo Molnar
  0 siblings, 0 replies; 41+ messages in thread
From: Ingo Molnar @ 2009-07-03 10:33 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> $ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null
> 
>  Performance counter stats for 'ls -lR /usr/include/':
> 
>             377  interrupts
>        53429936  int-mask-cycles
>            1119  int-pending-mask-cycles

What's your take on my review feedback:

> We could and should probably add a software counter for hardirqs 
> as well. That would allow the vector/irqnr information to be 
> passed in, and it would allow architectures without irq metrics in 
> the PMU to have this counter too.
>
> This way we could profile based on a specific interrupt source 
> only - say based on the networking card.

Why did you resend the patch while there was still unanswered review 
feedback?

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-03 10:29                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Ingo Molnar
@ 2009-07-03 11:55                   ` Jaswinder Singh Rajput
  2009-07-03 12:49                     ` Jaswinder Singh Rajput
  2009-07-04  9:49                     ` Ingo Molnar
  0 siblings, 2 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-03 11:55 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

On Fri, 2009-07-03 at 12:29 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 
> >  Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':
> > 
> >        17552264  vec-adds                  (scaled from 66.28%)
> >        19715258  vec-muls                  (scaled from 66.63%)
> >        15862733  vec-divs                  (scaled from 66.82%)
> >     23735187095  vec-idle-cycles           (scaled from 66.89%)
> >        11353159  vec-stall-cycles          (scaled from 66.90%)
> >        36628571  vec-ops                   (scaled from 66.48%)
> 
> Is stall-cycles equivalent to busy-cycles? 


hmm, normally we can use these terms interchangeably. But they can be
different some times.

busy means it is already executing some instructions so it will not take
another instruction.

stall can be busy(executing) or non-executing may be it is waiting for
some operands due to cache miss.


> I.e. do we have this 
> general relationship to the cycle event:
> 
> 	cycles = vec-stall-cycles + vec-idle-cycles
> 
> ?

This patch is already big enough, having 206 lines. Do you want
everything in this patch ;-)

Or we can do these things later on.

Thanks,
--
JSR

http://userweb.kernel.org/~jaswinder/



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-01 11:24         ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD Ingo Molnar
@ 2009-07-03 12:01           ` Jaswinder Singh Rajput
  2009-07-04 10:22             ` Ingo Molnar
  0 siblings, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-03 12:01 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Arjan van de Ven, Frédéric Weisbecker,
	Arnaldo Carvalho de Melo, Paul Mackerras, Anton Blanchard,
	Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox

On Wed, 2009-07-01 at 13:24 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 
> > 
> > $ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null
> > 
> >  Performance counter stats for 'ls -lR /usr/include/':
> > 
> >             377  interrupts
> >        53429936  int-mask-cycles
> >            1119  int-pending-mask-cycles
> > 
> >     0.371457539  seconds time elapsed
> 
> Agreed, this is another useful generalization - and the 'cycles 
> pending' metrics are not retrievable via any software means.
> 
> We could and should probably add a software counter for hardirqs as 
> wel. That would allow the vector/irqnr information to be passed in, 
> and it would allow architectures without irq metrics in the PMU to 
> have this counter too.
> 

Please let me know that addition of software counter will be in this
patch or we can do it incrementally after this patch.

Thanks,
--
JSR

http://userweb.kernel.org/~jaswinder/



^ permalink raw reply	[flat|nested] 41+ messages in thread

* [PATCH 3/3 -tip] perf list: avoid replicating functions
  2009-07-03  9:30                   ` Ingo Molnar
  2009-07-03 10:10                     ` Jaswinder Singh Rajput
@ 2009-07-03 12:17                     ` Jaswinder Singh Rajput
  2009-07-04  9:50                       ` Ingo Molnar
  1 sibling, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-03 12:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

On Fri, 2009-07-03 at 11:30 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 
> > > $ ./perf list shows vector events like :
> > > 
> > >   vec-adds OR add                          [Hardware vector event]
> > >   vec-muls OR multiply                     [Hardware vector event]
> > >   vec-divs OR divide                       [Hardware vector event]
> > >   vec-idle-cycles OR vec-empty-cycles      [Hardware vector event]
> > >   vec-stall-cycles OR vec-busy-cycles      [Hardware vector event]
> > >   vec-ops OR vec-operations                [Hardware vector event]
> 
> btw., why does this printout SHOUT the 'or'? It's certainly not an 
> important piece of information. Something like:
> 
>    vec-adds | add                          [Hardware vector event]
>    vec-muls | multiply                     [Hardware vector event]
>    vec-divs | divide                       [Hardware vector event]
>    vec-idle-cycles | vec-empty-cycles      [Hardware vector event]
>    vec-stall-cycles | vec-busy-cycles      [Hardware vector event]
>    vec-ops | vec-operations                [Hardware vector event]
> 
> looks better on all levels.
> 

I prepared this patch incrementally on : 
[PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
[PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom

[PATCH 3/3] perf list: avoid replicating functions

vector and interrupt can use same function made for hardware and software generic events.

Also replaced 'OR' with '|'

$ ./perf list

List of pre-defined events (to be used in -e):

  cpu-cycles | cycles                      [Hardware event]
  instructions                             [Hardware event]
  cache-references                         [Hardware event]
  cache-misses                             [Hardware event]
  branch-instructions | branches           [Hardware event]
  branch-misses                            [Hardware event]
  bus-cycles                               [Hardware event]

  cpu-clock                                [Software event]
  task-clock                               [Software event]
  page-faults | faults                     [Software event]
  minor-faults                             [Software event]
  major-faults                             [Software event]
  context-switches | cs                    [Software event]
  cpu-migrations | migrations              [Software event]

  L1-d$-loads                              [Hardware cache event]
  L1-d$-load-misses                        [Hardware cache event]
  L1-d$-stores                             [Hardware cache event]
  L1-d$-store-misses                       [Hardware cache event]
  L1-d$-prefetches                         [Hardware cache event]
  L1-d$-prefetch-misses                    [Hardware cache event]
  L1-i$-loads                              [Hardware cache event]
  L1-i$-load-misses                        [Hardware cache event]
  L1-i$-prefetches                         [Hardware cache event]
  L1-i$-prefetch-misses                    [Hardware cache event]
  LLC-loads                                [Hardware cache event]
  LLC-load-misses                          [Hardware cache event]
  LLC-stores                               [Hardware cache event]
  LLC-store-misses                         [Hardware cache event]
  LLC-prefetches                           [Hardware cache event]
  LLC-prefetch-misses                      [Hardware cache event]
  dTLB-loads                               [Hardware cache event]
  dTLB-load-misses                         [Hardware cache event]
  dTLB-stores                              [Hardware cache event]
  dTLB-store-misses                        [Hardware cache event]
  dTLB-prefetches                          [Hardware cache event]
  dTLB-prefetch-misses                     [Hardware cache event]
  iTLB-loads                               [Hardware cache event]
  iTLB-load-misses                         [Hardware cache event]
  branch-loads                             [Hardware cache event]
  branch-load-misses                       [Hardware cache event]

  vec-adds | add                           [Hardware vector event]
  vec-muls | multiply                      [Hardware vector event]
  vec-divs | divide                        [Hardware vector event]
  vec-idle-cycles | vec-empty-cycles       [Hardware vector event]
  vec-stall-cycles | vec-busy-cycles       [Hardware vector event]
  vec-ops | vec-operations                 [Hardware vector event]

  interrupts | interrupt                   [Hardware interrupt event]
  int-mask-cycles | masked                 [Hardware interrupt event]
  int-pending-mask-cycles                  [Hardware interrupt event]

  rNNN                                     [Hardware raw event]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 tools/perf/util/parse-events.c |   83 +++++++++++++++++-----------------------
 1 files changed, 35 insertions(+), 48 deletions(-)

diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index d085b8f..c2a7dc2 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -494,46 +494,51 @@ int parse_events(const struct option *opt __used, const char *str, int unset __u
 	return 0;
 }
 
-static const char * const event_type_descriptors[] = {
-	"",
+static const char *event_types[PERF_TYPE_MAX] = {
 	"Hardware event",
 	"Software event",
 	"Tracepoint event",
 	"Hardware cache event",
+	"Hardware raw event",
 	"Hardware vector event",
 	"Hardware interrupt event",
 };
 
-/*
- * Print the help text for the event symbols:
- */
-void print_events(void)
+static void print_desc(struct event_symbol *syms, unsigned int size)
 {
-	struct event_symbol *syms = event_symbols;
-	unsigned int i, type, op, prev_type = -1;
+	unsigned int i, type, prev_type = -1;
 	char name[40];
 
-	fprintf(stderr, "\n");
-	fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
-
-	for (i = 0; i < ARRAY_SIZE(event_symbols); i++, syms++) {
-		type = syms->type + 1;
-		if (type > ARRAY_SIZE(event_type_descriptors))
-			type = 0;
-
-		if (type != prev_type)
+	for (i = 0; i < size; i++, syms++) {
+		type = syms->type;
+		if (type != prev_type) {
+			prev_type = type;
 			fprintf(stderr, "\n");
+		}
 
 		if (strlen(syms->alias))
-			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
+			sprintf(name, "%s | %s", syms->symbol, syms->alias);
 		else
 			strcpy(name, syms->symbol);
-		fprintf(stderr, "  %-40s [%s]\n", name,
-			event_type_descriptors[type]);
 
-		prev_type = type;
+		fprintf(stderr, "  %-40s [%s]\n", name, event_types[type]);
 	}
+}
+
+/*
+ * Print the help text for the event symbols:
+ */
+void print_events(void)
+{
+	unsigned int type, op, r;
+
+	fprintf(stderr, "\n");
+	fprintf(stderr, "List of pre-defined events (to be used in -e):\n");
 
+	/* List hardware and software event descriptors */
+	print_desc(event_symbols, ARRAY_SIZE(event_symbols));
+
+	/* List hardware cache event descriptors */
 	fprintf(stderr, "\n");
 	for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) {
 		for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) {
@@ -541,41 +546,23 @@ void print_events(void)
 			if (!is_cache_op_valid(type, op))
 				continue;
 
-			for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) {
+			for (r = 0; r < PERF_COUNT_HW_CACHE_RESULT_MAX; r++) {
 				fprintf(stderr, "  %-40s [%s]\n",
-					event_cache_name(type, op, i),
-					event_type_descriptors[4]);
+					event_cache_name(type, op, r),
+					event_types[PERF_TYPE_HW_CACHE]);
 			}
 		}
 	}
 
-	fprintf(stderr, "\n");
-	syms = vector_event_symbols;
-	type = syms->type;
-	for (i = 0; i < ARRAY_SIZE(vector_event_symbols); i++, syms++) {
-		if (strlen(syms->alias))
-			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
-		else
-			strcpy(name, syms->symbol);
-		fprintf(stderr, "  %-40s [%s]\n", name,
-			event_type_descriptors[type]);
-	}
+	/* List hardware vectored co-processor event descriptors */
+	print_desc(vector_event_symbols, ARRAY_SIZE(vector_event_symbols));
 
-	fprintf(stderr, "\n");
-	syms = interrupt_event_symbols;
-	type = syms->type;
-	for (i = 0; i < ARRAY_SIZE(interrupt_event_symbols); i++, syms++) {
-		if (strlen(syms->alias))
-			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
-		else
-			strcpy(name, syms->symbol);
-		fprintf(stderr, "  %-40s [%s]\n", name,
-			event_type_descriptors[type]);
-	}
+	/* List hardware interrupt event descriptors */
+	print_desc(interrupt_event_symbols, ARRAY_SIZE(interrupt_event_symbols));
 
+	/* List hardware raw event descriptors */
 	fprintf(stderr, "\n");
-	fprintf(stderr, "  %-40s [raw hardware event descriptor]\n",
-		"rNNN");
+	fprintf(stderr, "  %-40s [%s]\n", "rNNN", event_types[PERF_TYPE_RAW]);
 	fprintf(stderr, "\n");
 
 	exit(129);
-- 
1.6.2.5




^ permalink raw reply related	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-03 11:55                   ` Jaswinder Singh Rajput
@ 2009-07-03 12:49                     ` Jaswinder Singh Rajput
  2009-07-03 13:25                       ` Jaswinder Singh Rajput
  2009-07-04  9:49                     ` Ingo Molnar
  1 sibling, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-03 12:49 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

On Fri, 2009-07-03 at 17:25 +0530, Jaswinder Singh Rajput wrote:
> On Fri, 2009-07-03 at 12:29 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > 
> > >  Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':
> > > 
> > >        17552264  vec-adds                  (scaled from 66.28%)
> > >        19715258  vec-muls                  (scaled from 66.63%)
> > >        15862733  vec-divs                  (scaled from 66.82%)
> > >     23735187095  vec-idle-cycles           (scaled from 66.89%)
> > >        11353159  vec-stall-cycles          (scaled from 66.90%)
> > >        36628571  vec-ops                   (scaled from 66.48%)
> > 
> > Is stall-cycles equivalent to busy-cycles? 
> 
> 
> hmm, normally we can use these terms interchangeably. But they can be
> different some times.
> 
> busy means it is already executing some instructions so it will not take
> another instruction.
> 
> stall can be busy(executing) or non-executing may be it is waiting for
> some operands due to cache miss.
> 
> 
> > I.e. do we have this 
> > general relationship to the cycle event:
> > 
> > 	cycles = vec-stall-cycles + vec-idle-cycles
> > 
> > ?

Like on AMD :

    13390918485  vec-adds                  (scaled from 57.07%)
    22465091289  vec-muls                  (scaled from 57.22%)
     2643789384  vec-divs                  (scaled from 57.21%)
    17922784596  vec-idle-cycles           (scaled from 57.23%)
     6402888606  vec-stall-cycles          (scaled from 57.17%)
    55823491597  cycles                    (scaled from 57.05%)
    51035264218  vec-ops                   (scaled from 57.05%)

  187.494664172  seconds time elapsed

vec-idle-cycles + vec-stall-cycles = 24325673202

so cycles = 2.29 * (vec-idle-cycles + vec-stall-cycles)

On AMD I used : EventSelect 0D7h Dispatch Stall for FPU Full
The number of processor cycles the decoder is stalled because the
scheduler for the Floating Point Unit is full. This condition can be
caused by a lack of parallelism in FP-intensive code, or by cache misses
on FP operand loads (which could also show up as EventSelect 0D8h
instead, depending on the nature of the instruction sequences). May
occur simultaneously with certain other stall conditions; see
EventSelect 0D1h

So stall is due to lack of parallelism and cache misses.
If we keep on increasing the size of FP units and cache may at some
point be we can get vec-stall-cycles = zero.

Thanks,
--
JSR

http://userweb.kernel.org/~jaswinder/


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-03 12:49                     ` Jaswinder Singh Rajput
@ 2009-07-03 13:25                       ` Jaswinder Singh Rajput
  2009-07-04 10:03                         ` Ingo Molnar
  0 siblings, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-03 13:25 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

On Fri, 2009-07-03 at 18:19 +0530, Jaswinder Singh Rajput wrote:
> On Fri, 2009-07-03 at 17:25 +0530, Jaswinder Singh Rajput wrote:
> > On Fri, 2009-07-03 at 12:29 +0200, Ingo Molnar wrote:
> > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > 
> > > >  Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':
> > > > 
> > > >        17552264  vec-adds                  (scaled from 66.28%)
> > > >        19715258  vec-muls                  (scaled from 66.63%)
> > > >        15862733  vec-divs                  (scaled from 66.82%)
> > > >     23735187095  vec-idle-cycles           (scaled from 66.89%)
> > > >        11353159  vec-stall-cycles          (scaled from 66.90%)
> > > >        36628571  vec-ops                   (scaled from 66.48%)
> > > 
> > > Is stall-cycles equivalent to busy-cycles? 
> > 
> > 
> > hmm, normally we can use these terms interchangeably. But they can be
> > different some times.
> > 
> > busy means it is already executing some instructions so it will not take
> > another instruction.
> > 
> > stall can be busy(executing) or non-executing may be it is waiting for
> > some operands due to cache miss.
> > 
> > 
> > > I.e. do we have this 
> > > general relationship to the cycle event:
> > > 
> > > 	cycles = vec-stall-cycles + vec-idle-cycles
> > > 
> > > ?
> 
> Like on AMD :
> 
>     13390918485  vec-adds                  (scaled from 57.07%)
>     22465091289  vec-muls                  (scaled from 57.22%)
>      2643789384  vec-divs                  (scaled from 57.21%)
>     17922784596  vec-idle-cycles           (scaled from 57.23%)
>      6402888606  vec-stall-cycles          (scaled from 57.17%)
>     55823491597  cycles                    (scaled from 57.05%)
>     51035264218  vec-ops                   (scaled from 57.05%)
> 
>   187.494664172  seconds time elapsed
> 
> vec-idle-cycles + vec-stall-cycles = 24325673202
> 
> so cycles = 2.29 * (vec-idle-cycles + vec-stall-cycles)
> 
> On AMD I used : EventSelect 0D7h Dispatch Stall for FPU Full
> The number of processor cycles the decoder is stalled because the
> scheduler for the Floating Point Unit is full. This condition can be
> caused by a lack of parallelism in FP-intensive code, or by cache misses
> on FP operand loads (which could also show up as EventSelect 0D8h
> instead, depending on the nature of the instruction sequences). May
> occur simultaneously with certain other stall conditions; see
> EventSelect 0D1h
> 
> So stall is due to lack of parallelism and cache misses.
> If we keep on increasing the size of FP units and cache may at some
> point be we can get vec-stall-cycles = zero.
> 

I mean, So stall is majorly due to lack of parallelism and cache misses.
If we keep on increasing the size of FP units and cache then stall time
will keep on decreasing (ofcourse it will be never Zero ;)

And same thing will be happen for Intel.

So stall is not equal to busy.

Please let me know what is next, should I remove busy term from alias.

Thanks,
--
JSR


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-03 11:55                   ` Jaswinder Singh Rajput
  2009-07-03 12:49                     ` Jaswinder Singh Rajput
@ 2009-07-04  9:49                     ` Ingo Molnar
  2009-07-04 13:54                       ` Jaswinder Singh Rajput
  1 sibling, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-04  9:49 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> On Fri, 2009-07-03 at 12:29 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > 
> > >  Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':
> > > 
> > >        17552264  vec-adds                  (scaled from 66.28%)
> > >        19715258  vec-muls                  (scaled from 66.63%)
> > >        15862733  vec-divs                  (scaled from 66.82%)
> > >     23735187095  vec-idle-cycles           (scaled from 66.89%)
> > >        11353159  vec-stall-cycles          (scaled from 66.90%)
> > >        36628571  vec-ops                   (scaled from 66.48%)
> > 
> > Is stall-cycles equivalent to busy-cycles? 
> 
> 
> hmm, normally we can use these terms interchangeably. But they can 
> be different some times.
> 
> busy means it is already executing some instructions so it will 
> not take another instruction.
> 
> stall can be busy(executing) or non-executing may be it is waiting 
> for some operands due to cache miss.
> 
> 
> > I.e. do we have this 
> > general relationship to the cycle event:
> > 
> > 	cycles = vec-stall-cycles + vec-idle-cycles
> > 
> > ?
> 
> This patch is already big enough, having 206 lines. Do you want 
> everything in this patch ;-)

The question i asked is whether the above relationship is true. You 
can test this by displaying the 'cycles' metric too in your test, 
alongside vec-stall-cycles and vec-idle-cycles. Do the numbers add 
up?

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 3/3 -tip] perf list: avoid replicating functions
  2009-07-03 12:17                     ` [PATCH 3/3 -tip] perf list: avoid replicating functions Jaswinder Singh Rajput
@ 2009-07-04  9:50                       ` Ingo Molnar
  0 siblings, 0 replies; 41+ messages in thread
From: Ingo Molnar @ 2009-07-04  9:50 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> On Fri, 2009-07-03 at 11:30 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > 
> > > > $ ./perf list shows vector events like :
> > > > 
> > > >   vec-adds OR add                          [Hardware vector event]
> > > >   vec-muls OR multiply                     [Hardware vector event]
> > > >   vec-divs OR divide                       [Hardware vector event]
> > > >   vec-idle-cycles OR vec-empty-cycles      [Hardware vector event]
> > > >   vec-stall-cycles OR vec-busy-cycles      [Hardware vector event]
> > > >   vec-ops OR vec-operations                [Hardware vector event]
> > 
> > btw., why does this printout SHOUT the 'or'? It's certainly not an 
> > important piece of information. Something like:
> > 
> >    vec-adds | add                          [Hardware vector event]
> >    vec-muls | multiply                     [Hardware vector event]
> >    vec-divs | divide                       [Hardware vector event]
> >    vec-idle-cycles | vec-empty-cycles      [Hardware vector event]
> >    vec-stall-cycles | vec-busy-cycles      [Hardware vector event]
> >    vec-ops | vec-operations                [Hardware vector event]
> > 
> > looks better on all levels.
> > 
> 
> I prepared this patch incrementally on : 
> [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
> [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom
> 
> [PATCH 3/3] perf list: avoid replicating functions
> 
> vector and interrupt can use same function made for hardware and 
> software generic events.
> 
> Also replaced 'OR' with '|'

Please submit a clean series of patches instead of a mixture of 
patches plus fixes to patches.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-03 13:25                       ` Jaswinder Singh Rajput
@ 2009-07-04 10:03                         ` Ingo Molnar
  2009-07-04 14:05                           ` Jaswinder Singh Rajput
  0 siblings, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-04 10:03 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> On Fri, 2009-07-03 at 18:19 +0530, Jaswinder Singh Rajput wrote:
> > On Fri, 2009-07-03 at 17:25 +0530, Jaswinder Singh Rajput wrote:
> > > On Fri, 2009-07-03 at 12:29 +0200, Ingo Molnar wrote:
> > > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > > 
> > > > >  Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':
> > > > > 
> > > > >        17552264  vec-adds                  (scaled from 66.28%)
> > > > >        19715258  vec-muls                  (scaled from 66.63%)
> > > > >        15862733  vec-divs                  (scaled from 66.82%)
> > > > >     23735187095  vec-idle-cycles           (scaled from 66.89%)
> > > > >        11353159  vec-stall-cycles          (scaled from 66.90%)
> > > > >        36628571  vec-ops                   (scaled from 66.48%)
> > > > 
> > > > Is stall-cycles equivalent to busy-cycles? 
> > > 
> > > 
> > > hmm, normally we can use these terms interchangeably. But they can be
> > > different some times.
> > > 
> > > busy means it is already executing some instructions so it will not take
> > > another instruction.
> > > 
> > > stall can be busy(executing) or non-executing may be it is waiting for
> > > some operands due to cache miss.
> > > 
> > > 
> > > > I.e. do we have this 
> > > > general relationship to the cycle event:
> > > > 
> > > > 	cycles = vec-stall-cycles + vec-idle-cycles
> > > > 
> > > > ?
> > 
> > Like on AMD :
> > 
> >     13390918485  vec-adds                  (scaled from 57.07%)
> >     22465091289  vec-muls                  (scaled from 57.22%)
> >      2643789384  vec-divs                  (scaled from 57.21%)
> >     17922784596  vec-idle-cycles           (scaled from 57.23%)
> >      6402888606  vec-stall-cycles          (scaled from 57.17%)
> >     55823491597  cycles                    (scaled from 57.05%)
> >     51035264218  vec-ops                   (scaled from 57.05%)
> > 
> >   187.494664172  seconds time elapsed
> > 
> > vec-idle-cycles + vec-stall-cycles = 24325673202
> > 
> > so cycles = 2.29 * (vec-idle-cycles + vec-stall-cycles)

that equation is entirely bogus.

> > 
> > On AMD I used : EventSelect 0D7h Dispatch Stall for FPU Full The 
> > number of processor cycles the decoder is stalled because the 
> > scheduler for the Floating Point Unit is full. This condition 
> > can be caused by a lack of parallelism in FP-intensive code, or 
> > by cache misses on FP operand loads (which could also show up as 
> > EventSelect 0D8h instead, depending on the nature of the 
> > instruction sequences). May occur simultaneously with certain 
> > other stall conditions; see EventSelect 0D1h
> > 
> > So stall is due to lack of parallelism and cache misses. If we 
> > keep on increasing the size of FP units and cache may at some 
> > point be we can get vec-stall-cycles = zero.
> > 
> 
> I mean, So stall is majorly due to lack of parallelism and cache 
> misses. If we keep on increasing the size of FP units and cache 
> then stall time will keep on decreasing (ofcourse it will be never 
> Zero ;)
> 
> And same thing will be happen for Intel.
> 
> So stall is not equal to busy.
> 
> Please let me know what is next, should I remove busy term from 
> alias.

What is needed is for you to understand these events and provide a 
generalization around them that makes sense. Or to declare it 
honestly when you dont.

The numbers simply dont add up:

> >     13390918485  vec-adds                  (scaled from 57.07%)
> >     22465091289  vec-muls                  (scaled from 57.22%)
> >      2643789384  vec-divs                  (scaled from 57.21%)
> >     17922784596  vec-idle-cycles           (scaled from 57.23%)
> >      6402888606  vec-stall-cycles          (scaled from 57.17%)
> >     55823491597  cycles                    (scaled from 57.05%)
> >     51035264218  vec-ops                   (scaled from 57.05%)

vec-idle-cycles + vec-stall-cycles does not add up to cycles - 
because a stall is not an 'interchangeable' term with 'busy' as you 
claimed before, but a special state of the pipeline, a subset of 
busy.

I prefer to apply patches from people who understand what they are 
doing - and more importantly, who express and declare their own 
limits properly when they _dont_ understand something and are 
guessing.

Frankly, your patches dont give me this impression and you are also 
babbling way too much about things you clearly dont understand, and 
thus you hinder the discussions with noise.

It's not bad at all to not understand something (we all are at 
various stages of a big and constantly refreshing learning curves), 
but it's very bad to pretend you understand something while you 
clearly dont. What we need in lkml discussions is an honest laying 
down of facts, opinions and doubts.

Why the heck didnt you say:

 " I dont know much about PMUs or vector units yet, but I have found
   these blurbs in the Intel and AMD docs and what do you think 
   about structuring these events the following way. Someone who 
   knows this stuff should review this first, it is quite likely 
   incomplete. "

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-03 12:01           ` Jaswinder Singh Rajput
@ 2009-07-04 10:22             ` Ingo Molnar
  2009-07-04 14:17               ` Jaswinder Singh Rajput
  0 siblings, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-04 10:22 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Peter Zijlstra, Arjan van de Ven, Frédéric Weisbecker,
	Arnaldo Carvalho de Melo, Paul Mackerras, Anton Blanchard,
	Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> On Wed, 2009-07-01 at 13:24 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > 
> > > 
> > > $ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null
> > > 
> > >  Performance counter stats for 'ls -lR /usr/include/':
> > > 
> > >             377  interrupts
> > >        53429936  int-mask-cycles
> > >            1119  int-pending-mask-cycles
> > > 
> > >     0.371457539  seconds time elapsed
> > 
> > Agreed, this is another useful generalization - and the 'cycles 
> > pending' metrics are not retrievable via any software means.
> > 
> > We could and should probably add a software counter for hardirqs 
> > as wel. That would allow the vector/irqnr information to be 
> > passed in, and it would allow architectures without irq metrics 
> > in the PMU to have this counter too.
> > 
> 
> Please let me know that addition of software counter will be in 
> this patch or we can do it incrementally after this patch.

It should be in this series. That way we can cross-check whether the 
soft counts and the hard counts match up and find potential bugs 
that way, etc.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-04  9:49                     ` Ingo Molnar
@ 2009-07-04 13:54                       ` Jaswinder Singh Rajput
  0 siblings, 0 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-04 13:54 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

On Sat, 2009-07-04 at 11:49 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 
> > On Fri, 2009-07-03 at 12:29 +0200, Ingo Molnar wrote:
> > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > 
> > > >  Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':
> > > > 
> > > >        17552264  vec-adds                  (scaled from 66.28%)
> > > >        19715258  vec-muls                  (scaled from 66.63%)
> > > >        15862733  vec-divs                  (scaled from 66.82%)
> > > >     23735187095  vec-idle-cycles           (scaled from 66.89%)
> > > >        11353159  vec-stall-cycles          (scaled from 66.90%)
> > > >        36628571  vec-ops                   (scaled from 66.48%)
> > > 
> > > Is stall-cycles equivalent to busy-cycles? 
> > 
> > 
> > hmm, normally we can use these terms interchangeably. But they can 
> > be different some times.
> > 
> > busy means it is already executing some instructions so it will 
> > not take another instruction.
> > 
> > stall can be busy(executing) or non-executing may be it is waiting 
> > for some operands due to cache miss.
> > 
> > 
> > > I.e. do we have this 
> > > general relationship to the cycle event:
> > > 
> > > 	cycles = vec-stall-cycles + vec-idle-cycles
> > > 
> > > ?
> > 
> > This patch is already big enough, having 206 lines. Do you want 
> > everything in this patch ;-)
> 
> The question i asked is whether the above relationship is true. You 
> can test this by displaying the 'cycles' metric too in your test, 
> alongside vec-stall-cycles and vec-idle-cycles. Do the numbers add 
> up?
> 

But I do not understand why you asked this relationship from me, you can
also do this on your side.

What is the point of blocking the patch and going on another tangent.

I am totally confused on one side your are saying this patch is useful
and another side you are not applying it.

Please let me know what is the problem in this patch so that I can fix
it and so that you can apply this patch and another people can start
taking benefit from this feature.

Thanks,
--
JSR


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
  2009-07-04 10:03                         ` Ingo Molnar
@ 2009-07-04 14:05                           ` Jaswinder Singh Rajput
  0 siblings, 0 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-04 14:05 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Arjan van de Ven, Paul Mackerras, Benjamin Herrenschmidt,
	Anton Blanchard, Thomas Gleixner, Peter Zijlstra, x86 maintainers,
	LKML, Alan Cox

On Sat, 2009-07-04 at 12:03 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 
> > > > > I.e. do we have this 
> > > > > general relationship to the cycle event:
> > > > > 
> > > > > 	cycles = vec-stall-cycles + vec-idle-cycles
> > > > > 
> > > > > ?
> > > 
> > > Like on AMD :
> > > 
> > >     13390918485  vec-adds                  (scaled from 57.07%)
> > >     22465091289  vec-muls                  (scaled from 57.22%)
> > >      2643789384  vec-divs                  (scaled from 57.21%)
> > >     17922784596  vec-idle-cycles           (scaled from 57.23%)
> > >      6402888606  vec-stall-cycles          (scaled from 57.17%)
> > >     55823491597  cycles                    (scaled from 57.05%)
> > >     51035264218  vec-ops                   (scaled from 57.05%)
> > > 
> > >   187.494664172  seconds time elapsed
> > > 
> > > vec-idle-cycles + vec-stall-cycles = 24325673202
> > > 
> > > so cycles = 2.29 * (vec-idle-cycles + vec-stall-cycles)
> 
> that equation is entirely bogus.
> 

What is bogus ? in this case this equation is true and it depends on
each application.

> > > 
> > > On AMD I used : EventSelect 0D7h Dispatch Stall for FPU Full The 
> > > number of processor cycles the decoder is stalled because the 
> > > scheduler for the Floating Point Unit is full. This condition 
> > > can be caused by a lack of parallelism in FP-intensive code, or 
> > > by cache misses on FP operand loads (which could also show up as 
> > > EventSelect 0D8h instead, depending on the nature of the 
> > > instruction sequences). May occur simultaneously with certain 
> > > other stall conditions; see EventSelect 0D1h
> > > 
> > > So stall is due to lack of parallelism and cache misses. If we 
> > > keep on increasing the size of FP units and cache may at some 
> > > point be we can get vec-stall-cycles = zero.
> > > 
> > 
> > I mean, So stall is majorly due to lack of parallelism and cache 
> > misses. If we keep on increasing the size of FP units and cache 
> > then stall time will keep on decreasing (ofcourse it will be never 
> > Zero ;)
> > 
> > And same thing will be happen for Intel.
> > 
> > So stall is not equal to busy.
> > 
> > Please let me know what is next, should I remove busy term from 
> > alias.
> 
> What is needed is for you to understand these events and provide a 
> generalization around them that makes sense. Or to declare it 
> honestly when you dont.
> 

what ??

tell me where is the problem, Is there any problem is patch.

> The numbers simply dont add up:
> 
> > >     13390918485  vec-adds                  (scaled from 57.07%)
> > >     22465091289  vec-muls                  (scaled from 57.22%)
> > >      2643789384  vec-divs                  (scaled from 57.21%)
> > >     17922784596  vec-idle-cycles           (scaled from 57.23%)
> > >      6402888606  vec-stall-cycles          (scaled from 57.17%)
> > >     55823491597  cycles                    (scaled from 57.05%)
> > >     51035264218  vec-ops                   (scaled from 57.05%)
> 
> vec-idle-cycles + vec-stall-cycles does not add up to cycles - 
> because a stall is not an 'interchangeable' term with 'busy' as you 
> claimed before, but a special state of the pipeline, a subset of 
> busy.
> 
> I prefer to apply patches from people who understand what they are 
> doing - and more importantly, who express and declare their own 
> limits properly when they _dont_ understand something and are 
> guessing.
> 

what is the problem in understanding. You raised the question, so you
was confused not me. And you got the clear picture from my points and
you are still blaming me ?


> Frankly, your patches dont give me this impression and you are also 
> babbling way too much about things you clearly dont understand, and 
> thus you hinder the discussions with noise.
> 
> It's not bad at all to not understand something (we all are at 
> various stages of a big and constantly refreshing learning curves), 
> but it's very bad to pretend you understand something while you 
> clearly dont. What we need in lkml discussions is an honest laying 
> down of facts, opinions and doubts.
> 
> Why the heck didnt you say:
> 
>  " I dont know much about PMUs or vector units yet, but I have found
>    these blurbs in the Intel and AMD docs and what do you think 
>    about structuring these events the following way. Someone who 
>    knows this stuff should review this first, it is quite likely 
>    incomplete. "


Why should I say this. Its you who need to say this.

I have clear understand that why I came up with this patch.

Thanks,
--
JSR


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-04 10:22             ` Ingo Molnar
@ 2009-07-04 14:17               ` Jaswinder Singh Rajput
  2009-07-05  1:11                 ` Ingo Molnar
  0 siblings, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-04 14:17 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Arjan van de Ven, Frédéric Weisbecker,
	Arnaldo Carvalho de Melo, Paul Mackerras, Anton Blanchard,
	Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox

On Sat, 2009-07-04 at 12:22 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 
> > On Wed, 2009-07-01 at 13:24 +0200, Ingo Molnar wrote:
> > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > 
> > > > 
> > > > $ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null
> > > > 
> > > >  Performance counter stats for 'ls -lR /usr/include/':
> > > > 
> > > >             377  interrupts
> > > >        53429936  int-mask-cycles
> > > >            1119  int-pending-mask-cycles
> > > > 
> > > >     0.371457539  seconds time elapsed
> > > 
> > > Agreed, this is another useful generalization - and the 'cycles 
> > > pending' metrics are not retrievable via any software means.
> > > 
> > > We could and should probably add a software counter for hardirqs 
> > > as wel. That would allow the vector/irqnr information to be 
> > > passed in, and it would allow architectures without irq metrics 
> > > in the PMU to have this counter too.
> > > 
> > 
> > Please let me know that addition of software counter will be in 
> > this patch or we can do it incrementally after this patch.
> 
> It should be in this series. That way we can cross-check whether the 
> soft counts and the hard counts match up and find potential bugs 
> that way, etc.
> 

You want to cross check performance counter events ?

Why you choose interrupt events, why do not you raise this point when
cache events was added ?

I do not understand why you keep going on tangents.

If you want to cross-check then it should be in different patch and
there should be no requirement to have this on this series and no point
of blocking this patch on irrelevant argument.

Only thing I can do is to fix the patch, if you point any problem in
this.

Thanks,
--
JSR


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-04 14:17               ` Jaswinder Singh Rajput
@ 2009-07-05  1:11                 ` Ingo Molnar
  2009-07-05  4:29                   ` Jaswinder Singh Rajput
  0 siblings, 1 reply; 41+ messages in thread
From: Ingo Molnar @ 2009-07-05  1:11 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Peter Zijlstra, Arjan van de Ven, Frédéric Weisbecker,
	Arnaldo Carvalho de Melo, Paul Mackerras, Anton Blanchard,
	Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> On Sat, 2009-07-04 at 12:22 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > 
> > > On Wed, 2009-07-01 at 13:24 +0200, Ingo Molnar wrote:
> > > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > > 
> > > > > 
> > > > > $ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null
> > > > > 
> > > > >  Performance counter stats for 'ls -lR /usr/include/':
> > > > > 
> > > > >             377  interrupts
> > > > >        53429936  int-mask-cycles
> > > > >            1119  int-pending-mask-cycles
> > > > > 
> > > > >     0.371457539  seconds time elapsed
> > > > 
> > > > Agreed, this is another useful generalization - and the 'cycles 
> > > > pending' metrics are not retrievable via any software means.
> > > > 
> > > > We could and should probably add a software counter for hardirqs 
> > > > as wel. That would allow the vector/irqnr information to be 
> > > > passed in, and it would allow architectures without irq metrics 
> > > > in the PMU to have this counter too.
> > > > 
> > > 
> > > Please let me know that addition of software counter will be 
> > > in this patch or we can do it incrementally after this patch.
> > 
> > It should be in this series. That way we can cross-check whether 
> > the soft counts and the hard counts match up and find potential 
> > bugs that way, etc.
> > 
> 
> You want to cross check performance counter events ?

Yes. The events are also more complete if we add per IRQ source 
counts as well, not just summary counts.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-05  1:11                 ` Ingo Molnar
@ 2009-07-05  4:29                   ` Jaswinder Singh Rajput
  2009-07-05  8:04                     ` Ingo Molnar
  0 siblings, 1 reply; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-05  4:29 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Arjan van de Ven, Frédéric Weisbecker,
	Arnaldo Carvalho de Melo, Paul Mackerras, Anton Blanchard,
	Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox

On Sun, 2009-07-05 at 03:11 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 
> > On Sat, 2009-07-04 at 12:22 +0200, Ingo Molnar wrote:
> > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > 
> > > > On Wed, 2009-07-01 at 13:24 +0200, Ingo Molnar wrote:
> > > > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > > > 
> > > > > > 
> > > > > > $ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null
> > > > > > 
> > > > > >  Performance counter stats for 'ls -lR /usr/include/':
> > > > > > 
> > > > > >             377  interrupts
> > > > > >        53429936  int-mask-cycles
> > > > > >            1119  int-pending-mask-cycles
> > > > > > 
> > > > > >     0.371457539  seconds time elapsed
> > > > > 
> > > > > Agreed, this is another useful generalization - and the 'cycles 
> > > > > pending' metrics are not retrievable via any software means.
> > > > > 
> > > > > We could and should probably add a software counter for hardirqs 
> > > > > as wel. That would allow the vector/irqnr information to be 
> > > > > passed in, and it would allow architectures without irq metrics 
> > > > > in the PMU to have this counter too.
> > > > > 
> > > > 
> > > > Please let me know that addition of software counter will be 
> > > > in this patch or we can do it incrementally after this patch.
> > > 
> > > It should be in this series. That way we can cross-check whether 
> > > the soft counts and the hard counts match up and find potential 
> > > bugs that way, etc.
> > > 
> > 
> > You want to cross check performance counter events ?
> 
> Yes. The events are also more complete if we add per IRQ source 
> counts as well, not just summary counts.
> 

If you ask me about 'complete', I will say :
"No-one is 'complete' except God".

Let me know what you mean by 'complete' and 'more complete'.

This is a hardware performance interrupt event patch.
If you want to add IRQ source, of course you can add it in another
patch, it is a never ending task.

I do not understand why you behave like this :

1. Is today the last day of the creation.
2. Or you will not collect any further patches.

Of course answer is "no" then what is the problem with you.

Stop this complete-ness madness. You will never complete atleast in this
life no matter what you will do.

Thanks,
--
JSR


^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-05  4:29                   ` Jaswinder Singh Rajput
@ 2009-07-05  8:04                     ` Ingo Molnar
  2009-07-05  9:01                       ` Jaswinder Singh Rajput
  2009-07-05  9:55                       ` Jaswinder Singh Rajput
  0 siblings, 2 replies; 41+ messages in thread
From: Ingo Molnar @ 2009-07-05  8:04 UTC (permalink / raw)
  To: Jaswinder Singh Rajput
  Cc: Peter Zijlstra, Arjan van de Ven, Frédéric Weisbecker,
	Arnaldo Carvalho de Melo, Paul Mackerras, Anton Blanchard,
	Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox,
	H. Peter Anvin


* Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:

> On Sun, 2009-07-05 at 03:11 +0200, Ingo Molnar wrote:
> > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > 
> > > On Sat, 2009-07-04 at 12:22 +0200, Ingo Molnar wrote:
> > > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > > 
> > > > > On Wed, 2009-07-01 at 13:24 +0200, Ingo Molnar wrote:
> > > > > > * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> > > > > > 
> > > > > > > 
> > > > > > > $ ./perf stat -e interrupts -e masked -e int-pending-mask-cycles -- ls -lR /usr/include/ > /dev/null
> > > > > > > 
> > > > > > >  Performance counter stats for 'ls -lR /usr/include/':
> > > > > > > 
> > > > > > >             377  interrupts
> > > > > > >        53429936  int-mask-cycles
> > > > > > >            1119  int-pending-mask-cycles
> > > > > > > 
> > > > > > >     0.371457539  seconds time elapsed
> > > > > > 
> > > > > > Agreed, this is another useful generalization - and the 'cycles 
> > > > > > pending' metrics are not retrievable via any software means.
> > > > > > 
> > > > > > We could and should probably add a software counter for hardirqs 
> > > > > > as wel. That would allow the vector/irqnr information to be 
> > > > > > passed in, and it would allow architectures without irq metrics 
> > > > > > in the PMU to have this counter too.
> > > > > > 
> > > > > 
> > > > > Please let me know that addition of software counter will be 
> > > > > in this patch or we can do it incrementally after this patch.
> > > > 
> > > > It should be in this series. That way we can cross-check whether 
> > > > the soft counts and the hard counts match up and find potential 
> > > > bugs that way, etc.
> > > > 
> > > 
> > > You want to cross check performance counter events ?
> > 
> > Yes. The events are also more complete if we add per IRQ source 
> > counts as well, not just summary counts.
> 
> If you ask me about 'complete', I will say : "No-one is 'complete' 
> except God".
> 
> Let me know what you mean by 'complete' and 'more complete'.
> 
> This is a hardware performance interrupt event patch. If you want 
> to add IRQ source, of course you can add it in another patch, it 
> is a never ending task.
> 
> I do not understand why you behave like this :
> 
> 1. Is today the last day of the creation.
> 2. Or you will not collect any further patches.
> 
> Of course answer is "no" then what is the problem with you.
> 
> Stop this complete-ness madness. You will never complete atleast 
> in this life no matter what you will do.

I'm simply not going to apply patches from you for what i consider a 
half-done feature.

	Ingo

^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-05  8:04                     ` Ingo Molnar
@ 2009-07-05  9:01                       ` Jaswinder Singh Rajput
  2009-07-05  9:55                       ` Jaswinder Singh Rajput
  1 sibling, 0 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-05  9:01 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Arjan van de Ven, Frédéric Weisbecker,
	Arnaldo Carvalho de Melo, Paul Mackerras, Anton Blanchard,
	Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox,
	H. Peter Anvin

On Sun, 2009-07-05 at 10:04 +0200, Ingo Molnar wrote:
> * Jaswinder Singh Rajput <jaswinder@kernel.org> wrote:
> 

> I'm simply not going to apply patches from you for what i consider a 
> half-done feature.
> 

This is not half-done. There are only 3 hardware interrupt performance
counter events in Intel and AMD. And I supported all of them.

I also supported all relevant Intel models and all AMD models.

You are requesting for software counter for hardirqs, I have no problem
to support it, I have also plan to add exceptions through software
counters, but again it will be different patch. And there is no point of
blocking this patch, as this will never change even if you add software
counters.

And you not even telling the problem in this patch, but you want to add
more stuff, which is independent of this.

So it is time to reconsider your consideration.

Thanks,
--
JSR



^ permalink raw reply	[flat|nested] 41+ messages in thread

* Re: [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD
  2009-07-05  8:04                     ` Ingo Molnar
  2009-07-05  9:01                       ` Jaswinder Singh Rajput
@ 2009-07-05  9:55                       ` Jaswinder Singh Rajput
  1 sibling, 0 replies; 41+ messages in thread
From: Jaswinder Singh Rajput @ 2009-07-05  9:55 UTC (permalink / raw)
  To: Ingo Molnar
  Cc: Peter Zijlstra, Arjan van de Ven, Frédéric Weisbecker,
	Arnaldo Carvalho de Melo, Paul Mackerras, Anton Blanchard,
	Thomas Gleixner, Peter Zijlstra, x86 maintainers, LKML, Alan Cox,
	H. Peter Anvin

On Sun, 2009-07-05 at 10:04 +0200, Ingo Molnar wrote:

> I'm simply not going to apply patches from you for what i consider a 
> half-done feature.
> 

OK, can you suggest me how output will look like so that I can start
preparing the hardirq patch.

Thanks,
--
JSR


^ permalink raw reply	[flat|nested] 41+ messages in thread

end of thread, other threads:[~2009-07-05  9:56 UTC | newest]

Thread overview: 41+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2009-07-01  9:33 [GIT-PULL -tip][PATCH 0/6] perf_counter patches Jaswinder Singh Rajput
2009-07-01  9:35 ` [PATCH 1/6 -tip] perf stat: define MATCH_EVENT for easy attrs checking Jaswinder Singh Rajput
2009-07-01  9:36   ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Jaswinder Singh Rajput
2009-07-01  9:37     ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD Jaswinder Singh Rajput
2009-07-01  9:38       ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt " Jaswinder Singh Rajput
2009-07-01  9:38         ` [PATCH 5/6 -tip] perf_counter: Add hardware vector events for nehalem Jaswinder Singh Rajput
2009-07-01  9:40           ` [PATCH 6/6 -tip] perf_counter: Add hardware interrupt events for nehalem, core2 and atom Jaswinder Singh Rajput
2009-07-01 11:24         ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD Ingo Molnar
2009-07-03 12:01           ` Jaswinder Singh Rajput
2009-07-04 10:22             ` Ingo Molnar
2009-07-04 14:17               ` Jaswinder Singh Rajput
2009-07-05  1:11                 ` Ingo Molnar
2009-07-05  4:29                   ` Jaswinder Singh Rajput
2009-07-05  8:04                     ` Ingo Molnar
2009-07-05  9:01                       ` Jaswinder Singh Rajput
2009-07-05  9:55                       ` Jaswinder Singh Rajput
2009-07-01 11:20       ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor " Ingo Molnar
2009-07-01 11:27         ` Ingo Molnar
2009-07-01 11:40           ` Jaswinder Singh Rajput
2009-07-01 11:49             ` Ingo Molnar
2009-07-02  9:44               ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
2009-07-02  9:45                 ` [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom Jaswinder Singh Rajput
2009-07-03 10:33                   ` Ingo Molnar
2009-07-03  7:38                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
2009-07-03  9:30                   ` Ingo Molnar
2009-07-03 10:10                     ` Jaswinder Singh Rajput
2009-07-03 12:17                     ` [PATCH 3/3 -tip] perf list: avoid replicating functions Jaswinder Singh Rajput
2009-07-04  9:50                       ` Ingo Molnar
2009-07-03 10:29                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Ingo Molnar
2009-07-03 11:55                   ` Jaswinder Singh Rajput
2009-07-03 12:49                     ` Jaswinder Singh Rajput
2009-07-03 13:25                       ` Jaswinder Singh Rajput
2009-07-04 10:03                         ` Ingo Molnar
2009-07-04 14:05                           ` Jaswinder Singh Rajput
2009-07-04  9:49                     ` Ingo Molnar
2009-07-04 13:54                       ` Jaswinder Singh Rajput
2009-07-01 11:39     ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Ingo Molnar
2009-07-03  8:18       ` Paul Mackerras
2009-07-03  8:27         ` Ingo Molnar
2009-07-01 11:30   ` [tip:perfcounters/urgent] perf stat: Define MATCH_EVENT for easy attr checking tip-bot for Jaswinder Singh Rajput
2009-07-01 11:45 ` [GIT-PULL -tip][PATCH 0/6] perf_counter patches Ingo Molnar

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox