From: Lin Ming <ming.m.lin@intel.com>
To: Cyrill Gorcunov <gorcunov@gmail.com>, Ingo Molnar <mingo@elte.hu>,
Peter Zijlstra <peterz@infradead.org>
Cc: lkml <linux-kernel@vger.kernel.org>
Subject: [RFC][PATCH 2/2] x86,perf: add cache events in p4 PMU
Date: Thu, 18 Mar 2010 18:33:12 +0800 [thread overview]
Message-ID: <1268908392.13901.128.camel@minggr.sh.intel.com> (raw)
Add cache events in p4 PMU.
Move the HT bit setting code from p4_pmu_event_map to p4_hw_config.
So the cache events can get HT bit set correctly.
Tested on my P4 desktop, below 6 cache events work.
L1-dcache-load-misses
LLC-load-misses
dTLB-load-misses
dTLB-store-misses
iTLB-loads
iTLB-load-misses
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
arch/x86/include/asm/msr-index.h | 2 +
arch/x86/include/asm/perf_event_p4.h | 10 ++
arch/x86/kernel/cpu/perf_event_p4.c | 153 ++++++++++++++++++++++++++++++++--
3 files changed, 159 insertions(+), 6 deletions(-)
diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index 1cd58cd..aef562c 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -357,6 +357,8 @@
#define MSR_P4_U2L_ESCR0 0x000003b0
#define MSR_P4_U2L_ESCR1 0x000003b1
+#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2
+
/* Intel Core-based CPU performance counters */
#define MSR_CORE_PERF_FIXED_CTR0 0x00000309
#define MSR_CORE_PERF_FIXED_CTR1 0x0000030a
diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 7d3406a..871249c 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -708,4 +708,14 @@ enum P4_EVENTS_ATTR {
P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1),
};
+enum {
+ KEY_P4_L1D_OP_READ_RESULT_MISS,
+ KEY_P4_LL_OP_READ_RESULT_MISS,
+ KEY_P4_DTLB_OP_READ_RESULT_MISS,
+ KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
+ KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
+ KEY_P4_ITLB_OP_READ_RESULT_MISS,
+ KEY_P4_UOP_TYPE,
+};
+
#endif /* PERF_EVENT_P4_H */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index e088010..4513d2b 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -19,6 +19,11 @@ struct p4_event_template {
u64 config; /* packed predefined bits */
int dep; /* upstream dependency event index */
int key; /* index into p4_templates */
+ u64 msr; /*
+ * the high 32 bits set into MSR_IA32_PEBS_ENABLE and
+ * the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT
+ * for cache events
+ */
unsigned int emask; /* ESCR EventMask */
unsigned int escr_msr[2]; /* ESCR MSR for this event */
unsigned int cntr[2]; /* counter index (offset) */
@@ -31,6 +36,67 @@ struct p4_pmu_res {
static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config);
+#define P4_CACHE_EVENT_CONFIG(event, bit) \
+ p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \
+ p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \
+ p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT)
+
+static __initconst u64 p4_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ /* 1stL_cache_load_miss_retired */
+ [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
+ | KEY_P4_L1D_OP_READ_RESULT_MISS,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ /* 2ndL_cache_load_miss_retired */
+ [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
+ | KEY_P4_LL_OP_READ_RESULT_MISS,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ /* DTLB_load_miss_retired */
+ [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
+ | KEY_P4_DTLB_OP_READ_RESULT_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ /* DTLB_store_miss_retired */
+ [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS)
+ | KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ /* ITLB_reference.HIT */
+ [ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT)
+ | KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
+
+ /* ITLB_reference.MISS */
+ [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS)
+ | KEY_P4_ITLB_OP_READ_RESULT_MISS,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
/*
* WARN: CCCR1 doesn't have a working enable bit so try to not
* use it if possible
@@ -121,11 +187,77 @@ struct p4_event_template p4_templates[] = {
.escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 },
.cntr = { 0, 2 },
},
- [7] = {
+ [KEY_P4_L1D_OP_READ_RESULT_MISS] = {
+ .opcode = P4_REPLAY_EVENT,
+ .config = 0,
+ .dep = -1,
+ .msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0),
+ .key = KEY_P4_L1D_OP_READ_RESULT_MISS,
+ .emask =
+ P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
+ .cntr = { 16, 17 },
+ },
+ [KEY_P4_LL_OP_READ_RESULT_MISS] = {
+ .opcode = P4_REPLAY_EVENT,
+ .config = 0,
+ .dep = -1,
+ .msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0),
+ .key = KEY_P4_LL_OP_READ_RESULT_MISS,
+ .emask =
+ P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
+ .cntr = { 16, 17 },
+ },
+ [KEY_P4_DTLB_OP_READ_RESULT_MISS] = {
+ .opcode = P4_REPLAY_EVENT,
+ .config = 0,
+ .dep = -1,
+ .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0),
+ .key = KEY_P4_DTLB_OP_READ_RESULT_MISS,
+ .emask =
+ P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
+ .cntr = { 16, 17 },
+ },
+ [KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = {
+ .opcode = P4_REPLAY_EVENT,
+ .config = 0,
+ .dep = -1,
+ .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1),
+ .key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS,
+ .emask =
+ P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS),
+ .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 },
+ .cntr = { 16, 17 },
+ },
+ [KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = {
+ .opcode = P4_ITLB_REFERENCE,
+ .config = 0,
+ .dep = -1,
+ .msr = 0,
+ .key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS,
+ .emask =
+ P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT),
+ .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
+ .cntr = { 0, 2 },
+ },
+ [KEY_P4_ITLB_OP_READ_RESULT_MISS] = {
+ .opcode = P4_ITLB_REFERENCE,
+ .config = 0,
+ .dep = -1,
+ .msr = 0,
+ .key = KEY_P4_ITLB_OP_READ_RESULT_MISS,
+ .emask =
+ P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS),
+ .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 },
+ .cntr = { 0, 2 },
+ },
+ [KEY_P4_UOP_TYPE] = {
.opcode = P4_UOP_TYPE,
.config = 0,
.dep = -1,
- .key = 7,
+ .key = KEY_P4_UOP_TYPE,
.emask =
P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) |
P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES),
@@ -155,10 +287,6 @@ static u64 p4_pmu_event_map(int hw_event)
config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT);
config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED);
- /* on HT machine we need a special bit */
- if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id()))
- config = p4_set_ht_bit(config);
-
return config;
}
@@ -211,6 +339,10 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc)
/* Count user and OS events unless not requested to */
hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel,
attr->exclude_user));
+ /* on HT machine we need a special bit */
+ if (p4_ht_active() && p4_ht_thread(cpu))
+ hwc->config = p4_set_ht_bit(hwc->config);
+
return 0;
}
@@ -271,6 +403,12 @@ static void p4_pmu_enable_event(struct perf_event *event)
pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx);
return;
}
+
+ if (tpl->msr) {
+ (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32);
+ (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff);
+ }
+
escr_base = (u64)tpl->escr_msr[thread];
/*
@@ -579,6 +717,9 @@ static __init int p4_pmu_init(void)
return -ENODEV;
}
+ memcpy(hw_cache_event_ids, p4_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
pr_cont("Netburst events, ");
x86_pmu = p4_pmu;
next reply other threads:[~2010-03-18 10:50 UTC|newest]
Thread overview: 5+ messages / expand[flat|nested] mbox.gz Atom feed top
2010-03-18 10:33 Lin Ming [this message]
2010-03-18 15:56 ` [RFC][PATCH 2/2] x86,perf: add cache events in p4 PMU Ingo Molnar
2010-03-18 16:01 ` Cyrill Gorcunov
2010-03-18 20:59 ` Cyrill Gorcunov
2010-03-18 17:38 ` [tip:perf/core] perf, x86: Add cache events for the Pentium-4 PMU tip-bot for Lin Ming
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1268908392.13901.128.camel@minggr.sh.intel.com \
--to=ming.m.lin@intel.com \
--cc=gorcunov@gmail.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@elte.hu \
--cc=peterz@infradead.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.