All of lore.kernel.org
 help / color / mirror / Atom feed
From: Jaswinder Singh Rajput <jaswinder@kernel.org>
To: Ingo Molnar <mingo@elte.hu>
Cc: Arjan van de Ven <arjan@infradead.org>,
	Paul Mackerras <paulus@samba.org>,
	Benjamin Herrenschmidt <benh@kernel.crashing.org>,
	Anton Blanchard <anton@samba.org>,
	Thomas Gleixner <tglx@linutronix.de>,
	Peter Zijlstra <peterz@infradead.org>,
	x86 maintainers <x86@kernel.org>,
	LKML <linux-kernel@vger.kernel.org>,
	Alan Cox <alan@lxorguk.ukuu.org.uk>
Subject: [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem
Date: Thu, 02 Jul 2009 15:14:32 +0530	[thread overview]
Message-ID: <1246527872.13659.2.camel@hpdv5.satnam> (raw)
In-Reply-To: <20090701114928.GI15958@elte.hu>


This output is from AMD box:

$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- ls -lR /usr/include/ > /dev/null

 Performance counter stats for 'ls -lR /usr/include/':

           4218  vec-adds                  (scaled from 66.60%)
           7426  vec-muls                  (scaled from 66.67%)
           5441  vec-divs                  (scaled from 66.29%)
      821982187  vec-idle-cycles           (scaled from 66.45%)
           2681  vec-stall-cycles          (scaled from 67.11%)
           7887  vec-ops                   (scaled from 66.88%)

    0.417614573  seconds time elapsed

$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/rhythmbox ~jaswinder/Music/singhiskinng.mp3

 Performance counter stats for '/usr/bin/rhythmbox /home/jaswinder/Music/singhiskinng.mp3':

       17552264  vec-adds                  (scaled from 66.28%)
       19715258  vec-muls                  (scaled from 66.63%)
       15862733  vec-divs                  (scaled from 66.82%)
    23735187095  vec-idle-cycles           (scaled from 66.89%)
       11353159  vec-stall-cycles          (scaled from 66.90%)
       36628571  vec-ops                   (scaled from 66.48%)

  298.350012843  seconds time elapsed

$ ./perf stat -e add -e multiply -e divide -e vec-idle-cycles -e vec-stall-cycles -e vec-ops -- /usr/bin/vlc ~jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv

 Performance counter stats for '/usr/bin/vlc /home/jaswinder/Videos/Linus_Torvalds_interview_with_Charlie_Rose_Part_1.flv':

    20177177044  vec-adds                  (scaled from 66.63%)
    34101687027  vec-muls                  (scaled from 66.64%)
     3984060862  vec-divs                  (scaled from 66.71%)
    26349684710  vec-idle-cycles           (scaled from 66.65%)
     9052001905  vec-stall-cycles          (scaled from 66.66%)
    76440734242  vec-ops                   (scaled from 66.71%)

  272.523058097  seconds time elapsed

$ ./perf list shows vector events like :

  vec-adds OR add                          [Hardware vector event]
  vec-muls OR multiply                     [Hardware vector event]
  vec-divs OR divide                       [Hardware vector event]
  vec-idle-cycles OR vec-empty-cycles      [Hardware vector event]
  vec-stall-cycles OR vec-busy-cycles      [Hardware vector event]
  vec-ops OR vec-operations                [Hardware vector event]

Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
---
 arch/x86/kernel/cpu/perf_counter.c |   45 +++++++++++++++++++++++++++++
 include/linux/perf_counter.h       |   15 ++++++++++
 kernel/perf_counter.c              |    1 +
 tools/perf/util/parse-events.c     |   55 ++++++++++++++++++++++++++++++++++++
 4 files changed, 116 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_counter.c b/arch/x86/kernel/cpu/perf_counter.c
index 36c3dc7..48f28b7 100644
--- a/arch/x86/kernel/cpu/perf_counter.c
+++ b/arch/x86/kernel/cpu/perf_counter.c
@@ -372,6 +372,22 @@ static const u64 atom_hw_cache_event_ids
  },
 };
 
+/*
+ * Generalized hw vectored co-processor event table
+ */
+
+static u64 __read_mostly hw_vector_event_ids[PERF_COUNT_HW_VECTOR_MAX];
+
+static const u64 nehalem_hw_vector_event_ids[] =
+{
+  [PERF_COUNT_HW_VECTOR_ADD]		= 0x01B1, /* UOPS_EXECUTED.PORT0     */
+  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0214, /* ARITH.MUL               */
+  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0114, /* ARITH.CYCLES_DIV_BUSY   */
+  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0,
+  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x60A2, /* RESOURCE_STALLS.FPCW|MXCSR*/
+  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0710, /* FP_COMP_OPS_EXE.X87|MMX|SSE_FP*/
+};
+
 static u64 intel_pmu_raw_event(u64 event)
 {
 #define CORE_EVNTSEL_EVENT_MASK		0x000000FFULL
@@ -481,6 +497,17 @@ static const u64 amd_hw_cache_event_ids
  },
 };
 
+static const u64 amd_hw_vector_event_ids[] =
+{
+  [PERF_COUNT_HW_VECTOR_ADD]		= 0x0100, /* Dispatched FPU Add	     */
+  [PERF_COUNT_HW_VECTOR_MULTIPLY]	= 0x0200, /* Dispatched FPU Multiply */
+  [PERF_COUNT_HW_VECTOR_DIVIDE]		= 0x0400, /* Dispatched FPU Store    */
+  [PERF_COUNT_HW_VECTOR_IDLE_CYCLES]	= 0x0001, /* FPU Empty cycles        */
+  [PERF_COUNT_HW_VECTOR_STALL_CYCLES]	= 0x00D7, /* Dispatch stall for FPU  */
+  [PERF_COUNT_HW_VECTOR_OPS]		= 0x0FCB, /* Retired x87|(MMX & 3Dnow)
+						   |SSE & SSE2) Instructions */
+};
+
 /*
  * AMD Performance Monitor K7 and later.
  */
@@ -659,6 +686,17 @@ set_ext_hw_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
 	return 0;
 }
 
+static inline int
+set_hw_vector_attr(struct hw_perf_counter *hwc, struct perf_counter_attr *attr)
+{
+	if (attr->config >= PERF_COUNT_HW_VECTOR_MAX)
+		return -EINVAL;
+
+	hwc->config |= hw_vector_event_ids[attr->config];
+
+	return 0;
+}
+
 /*
  * Setup the hardware configuration for a given attr_type
  */
@@ -716,6 +754,9 @@ static int __hw_perf_counter_init(struct perf_counter *counter)
 	if (attr->type == PERF_TYPE_HW_CACHE)
 		return set_ext_hw_attr(hwc, attr);
 
+	if (attr->type == PERF_TYPE_HW_VECTOR)
+		return set_hw_vector_attr(hwc, attr);
+
 	if (attr->config >= x86_pmu.max_events)
 		return -EINVAL;
 	/*
@@ -1444,6 +1485,8 @@ static int intel_pmu_init(void)
 	case 26:
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
+		memcpy(hw_vector_event_ids, nehalem_hw_vector_event_ids,
+		       sizeof(hw_vector_event_ids));
 
 		pr_cont("Nehalem/Corei7 events, ");
 		break;
@@ -1468,6 +1511,8 @@ static int amd_pmu_init(void)
 	/* Events are common for all AMDs */
 	memcpy(hw_cache_event_ids, amd_hw_cache_event_ids,
 	       sizeof(hw_cache_event_ids));
+	memcpy(hw_vector_event_ids, amd_hw_vector_event_ids,
+	       sizeof(hw_vector_event_ids));
 
 	return 0;
 }
diff --git a/include/linux/perf_counter.h b/include/linux/perf_counter.h
index 5e970c7..e91b712 100644
--- a/include/linux/perf_counter.h
+++ b/include/linux/perf_counter.h
@@ -31,6 +31,7 @@ enum perf_type_id {
 	PERF_TYPE_TRACEPOINT			= 2,
 	PERF_TYPE_HW_CACHE			= 3,
 	PERF_TYPE_RAW				= 4,
+	PERF_TYPE_HW_VECTOR			= 5,
 
 	PERF_TYPE_MAX,				/* non-ABI */
 };
@@ -89,6 +90,20 @@ enum perf_hw_cache_op_result_id {
 };
 
 /*
+ * Generalized hardware vectored co-processor counters:
+ */
+enum perf_hw_vector_id {
+	PERF_COUNT_HW_VECTOR_ADD		= 0,
+	PERF_COUNT_HW_VECTOR_MULTIPLY		= 1,
+	PERF_COUNT_HW_VECTOR_DIVIDE		= 2,
+	PERF_COUNT_HW_VECTOR_IDLE_CYCLES	= 3,
+	PERF_COUNT_HW_VECTOR_STALL_CYCLES	= 4,
+	PERF_COUNT_HW_VECTOR_OPS		= 5,
+
+	PERF_COUNT_HW_VECTOR_MAX,		/* non-ABI */
+};
+
+/*
  * Special "software" counters provided by the kernel, even if the hardware
  * does not support performance counters. These counters measure various
  * physical and sw events of the kernel (and allow the profiling of them as
diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
index d55a50d..dd3848a 100644
--- a/kernel/perf_counter.c
+++ b/kernel/perf_counter.c
@@ -3838,6 +3838,7 @@ perf_counter_alloc(struct perf_counter_attr *attr,
 	case PERF_TYPE_RAW:
 	case PERF_TYPE_HARDWARE:
 	case PERF_TYPE_HW_CACHE:
+	case PERF_TYPE_HW_VECTOR:
 		pmu = hw_perf_counter_init(counter);
 		break;
 
diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c
index 5184959..8213dfb 100644
--- a/tools/perf/util/parse-events.c
+++ b/tools/perf/util/parse-events.c
@@ -40,6 +40,17 @@ static struct event_symbol event_symbols[] = {
   { CSW(CPU_MIGRATIONS),	"cpu-migrations",	"migrations"	},
 };
 
+#define CHVECTOR(x) .type = PERF_TYPE_HW_VECTOR, .config = PERF_COUNT_HW_VECTOR_##x
+
+static struct event_symbol vector_event_symbols[] = {
+  { CHVECTOR(ADD),		"vec-adds",		"add"		},
+  { CHVECTOR(MULTIPLY),		"vec-muls",		"multiply"	},
+  { CHVECTOR(DIVIDE),		"vec-divs",		"divide"	},
+  { CHVECTOR(IDLE_CYCLES),	"vec-idle-cycles",	"vec-empty-cycles"},
+  { CHVECTOR(STALL_CYCLES),	"vec-stall-cycles",	"vec-busy-cycles"},
+  { CHVECTOR(OPS),		"vec-ops",		"vec-operations"},
+};
+
 #define __PERF_COUNTER_FIELD(config, name) \
 	((config & PERF_COUNTER_##name##_MASK) >> PERF_COUNTER_##name##_SHIFT)
 
@@ -172,6 +183,11 @@ char *event_name(int counter)
 		return event_cache_name(cache_type, cache_op, cache_result);
 	}
 
+	case PERF_TYPE_HW_VECTOR:
+		if (config < PERF_COUNT_HW_VECTOR_MAX)
+			return vector_event_symbols[config].symbol;
+		return "unknown-vector";
+
 	case PERF_TYPE_SOFTWARE:
 		if (config < PERF_COUNT_SW_MAX)
 			return sw_event_names[config];
@@ -280,6 +296,21 @@ static int check_events(const char *str, unsigned int i)
 	return 0;
 }
 
+static int check_vector_events(const char *str, unsigned int i)
+{
+	int n;
+
+	n = strlen(vector_event_symbols[i].symbol);
+	if (!strncmp(str, vector_event_symbols[i].symbol, n))
+		return n;
+
+	n = strlen(vector_event_symbols[i].alias);
+	if (n)
+		if (!strncmp(str, vector_event_symbols[i].alias, n))
+			return n;
+	return 0;
+}
+
 static int
 parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 {
@@ -296,6 +327,17 @@ parse_symbolic_event(const char **strp, struct perf_counter_attr *attr)
 			return 1;
 		}
 	}
+
+	for (i = 0; i < ARRAY_SIZE(vector_event_symbols); i++) {
+		n = check_vector_events(str, i);
+		if (n > 0) {
+			attr->type = vector_event_symbols[i].type;
+			attr->config = vector_event_symbols[i].config;
+			*strp = str + n;
+			return 1;
+		}
+	}
+
 	return 0;
 }
 
@@ -420,6 +462,7 @@ static const char * const event_type_descriptors[] = {
 	"Software event",
 	"Tracepoint event",
 	"Hardware cache event",
+	"Hardware vector event",
 };
 
 /*
@@ -468,6 +511,18 @@ void print_events(void)
 	}
 
 	fprintf(stderr, "\n");
+	syms = vector_event_symbols;
+	type = syms->type;
+	for (i = 0; i < ARRAY_SIZE(vector_event_symbols); i++, syms++) {
+		if (strlen(syms->alias))
+			sprintf(name, "%s OR %s", syms->symbol, syms->alias);
+		else
+			strcpy(name, syms->symbol);
+		fprintf(stderr, "  %-40s [%s]\n", name,
+			event_type_descriptors[type]);
+	}
+
+	fprintf(stderr, "\n");
 	fprintf(stderr, "  %-40s [raw hardware event descriptor]\n",
 		"rNNN");
 	fprintf(stderr, "\n");
-- 
1.6.0.6




  reply	other threads:[~2009-07-02  9:45 UTC|newest]

Thread overview: 41+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2009-07-01  9:33 [GIT-PULL -tip][PATCH 0/6] perf_counter patches Jaswinder Singh Rajput
2009-07-01  9:35 ` [PATCH 1/6 -tip] perf stat: define MATCH_EVENT for easy attrs checking Jaswinder Singh Rajput
2009-07-01  9:36   ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Jaswinder Singh Rajput
2009-07-01  9:37     ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor support for AMD Jaswinder Singh Rajput
2009-07-01  9:38       ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt " Jaswinder Singh Rajput
2009-07-01  9:38         ` [PATCH 5/6 -tip] perf_counter: Add hardware vector events for nehalem Jaswinder Singh Rajput
2009-07-01  9:40           ` [PATCH 6/6 -tip] perf_counter: Add hardware interrupt events for nehalem, core2 and atom Jaswinder Singh Rajput
2009-07-01 11:24         ` [PATCH 4/6 -tip] perf_counter: Add Generalized Hardware interrupt support for AMD Ingo Molnar
2009-07-03 12:01           ` Jaswinder Singh Rajput
2009-07-04 10:22             ` Ingo Molnar
2009-07-04 14:17               ` Jaswinder Singh Rajput
2009-07-05  1:11                 ` Ingo Molnar
2009-07-05  4:29                   ` Jaswinder Singh Rajput
2009-07-05  8:04                     ` Ingo Molnar
2009-07-05  9:01                       ` Jaswinder Singh Rajput
2009-07-05  9:55                       ` Jaswinder Singh Rajput
2009-07-01 11:20       ` [PATCH 3/6 -tip] perf_counter: Add Generalized Hardware vectored co-processor " Ingo Molnar
2009-07-01 11:27         ` Ingo Molnar
2009-07-01 11:40           ` Jaswinder Singh Rajput
2009-07-01 11:49             ` Ingo Molnar
2009-07-02  9:44               ` Jaswinder Singh Rajput [this message]
2009-07-02  9:45                 ` [PATCH 2/2 -tip] perf_counter: Add generalized hardware interrupt support for AMD and Intel Corei7/Nehalem, Core2 and Atom Jaswinder Singh Rajput
2009-07-03 10:33                   ` Ingo Molnar
2009-07-03  7:38                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Jaswinder Singh Rajput
2009-07-03  9:30                   ` Ingo Molnar
2009-07-03 10:10                     ` Jaswinder Singh Rajput
2009-07-03 12:17                     ` [PATCH 3/3 -tip] perf list: avoid replicating functions Jaswinder Singh Rajput
2009-07-04  9:50                       ` Ingo Molnar
2009-07-03 10:29                 ` [PATCH 1/2 -tip] perf_counter: Add generalized hardware vectored co-processor support for AMD and Intel Corei7/Nehalem Ingo Molnar
2009-07-03 11:55                   ` Jaswinder Singh Rajput
2009-07-03 12:49                     ` Jaswinder Singh Rajput
2009-07-03 13:25                       ` Jaswinder Singh Rajput
2009-07-04 10:03                         ` Ingo Molnar
2009-07-04 14:05                           ` Jaswinder Singh Rajput
2009-07-04  9:49                     ` Ingo Molnar
2009-07-04 13:54                       ` Jaswinder Singh Rajput
2009-07-01 11:39     ` [PATCH 2/6 -tip] perf stat: treat same behaviour for all CYCLES and CLOCKS Ingo Molnar
2009-07-03  8:18       ` Paul Mackerras
2009-07-03  8:27         ` Ingo Molnar
2009-07-01 11:30   ` [tip:perfcounters/urgent] perf stat: Define MATCH_EVENT for easy attr checking tip-bot for Jaswinder Singh Rajput
2009-07-01 11:45 ` [GIT-PULL -tip][PATCH 0/6] perf_counter patches Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=1246527872.13659.2.camel@hpdv5.satnam \
    --to=jaswinder@kernel.org \
    --cc=alan@lxorguk.ukuu.org.uk \
    --cc=anton@samba.org \
    --cc=arjan@infradead.org \
    --cc=benh@kernel.crashing.org \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=paulus@samba.org \
    --cc=peterz@infradead.org \
    --cc=tglx@linutronix.de \
    --cc=x86@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.