* [PATCH v2 -tip] perf: x86, add SandyBridge support
@ 2011-02-28 7:22 Lin Ming
2011-02-28 8:20 ` Stephane Eranian
2011-02-28 9:15 ` Peter Zijlstra
0 siblings, 2 replies; 30+ messages in thread
From: Lin Ming @ 2011-02-28 7:22 UTC (permalink / raw)
To: Peter Zijlstra, Ingo Molnar, Stephane Eranian, Andi Kleen; +Cc: lkml
This patch adds basic SandyBridge support, including hardware cache
events and PEBS events support.
LLC-* hareware cache events don't work for now, it depends on the
offcore patches.
All PEBS events are tested on my SandyBridge machine and work well.
Note that SandyBridge does not support INSTR_RETIRED.ANY(0x00c0) PEBS
event, instead it supports INST_RETIRED.PRECDIST(0x01c0) event and PMC1
only.
v1 -> v2:
- add more raw and PEBS events constraints
- use offcore events for LLC-* cache events
- remove the call to Nehalem workaround enable_all function
todo:
- precise store
- precise distribution of instructions retired
Signed-off-by: Lin Ming <ming.m.lin@intel.com>
---
arch/x86/kernel/cpu/perf_event.c | 2 +
arch/x86/kernel/cpu/perf_event_intel.c | 123 +++++++++++++++++++++++++++++
arch/x86/kernel/cpu/perf_event_intel_ds.c | 44 ++++++++++-
3 files changed, 168 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 10bfe24..49d51be 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -148,6 +148,8 @@ struct cpu_hw_events {
*/
#define INTEL_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
+#define INTEL_EVENT_CONSTRAINT2(c, n) \
+ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
/*
* Constraint on the Event code + UMask + fixed-mask
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 084b383..3085868 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -76,6 +76,19 @@ static struct event_constraint intel_westmere_event_constraints[] =
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_snb_event_constraints[] =
+{
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
+ INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
+ INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
+ INTEL_EVENT_CONSTRAINT2(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
+ INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint intel_gen_event_constraints[] =
{
FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -89,6 +102,106 @@ static u64 intel_pmu_event_map(int hw_event)
return intel_perfmon_event_map[hw_event];
}
+static __initconst const u64 snb_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
+ [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ /*
+ * TBD: Need Off-core Response Performance Monitoring support
+ */
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ [ C(OP_WRITE) ] = {
+ /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ [ C(OP_PREFETCH) ] = {
+ /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+ [ C(RESULT_ACCESS) ] = 0x01b7,
+ /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
+ [ C(RESULT_MISS) ] = 0x01bb,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x01d0, /* MEM_UOP_RETIRED.LOADS */
+ [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x02d0, /* MEM_UOP_RETIRED.STORES */
+ [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
+ [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
static __initconst const u64 westmere_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -1062,6 +1175,16 @@ static __init int intel_pmu_init(void)
pr_cont("Westmere events, ");
break;
+ case 42: /* SandyBridge */
+ memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
+ sizeof(hw_cache_event_ids));
+
+ intel_pmu_lbr_init_nhm();
+
+ x86_pmu.event_constraints = intel_snb_event_constraints;
+ pr_cont("SandyBridge events, ");
+ break;
+
default:
/*
* default constraints for v2 and up
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index b7dcd9f..e60f91b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -388,6 +388,42 @@ static struct event_constraint intel_nehalem_pebs_events[] = {
EVENT_CONSTRAINT_END
};
+static struct event_constraint intel_snb_pebs_events[] = {
+ PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
+ PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
+ PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
+ PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
+ PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
+ PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
+ PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
+ PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
+ PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
+ PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
+ PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
+ PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
+ PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
+ PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
+ PEBS_EVENT_CONSTRAINT(0x01d0, 0xf), /* MEM_UOP_RETIRED.LOADS */
+ PEBS_EVENT_CONSTRAINT(0x02d0, 0xf), /* MEM_UOP_RETIRED.STORES */
+ PEBS_EVENT_CONSTRAINT(0x10d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS */
+ PEBS_EVENT_CONSTRAINT(0x20d0, 0xf), /* MEM_UOP_RETIRED.LOCK */
+ PEBS_EVENT_CONSTRAINT(0x40d0, 0xf), /* MEM_UOP_RETIRED.SPLIT */
+ PEBS_EVENT_CONSTRAINT(0x80d0, 0xf), /* MEM_UOP_RETIRED.ALL */
+ PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
+ PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
+ PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */
+ PEBS_EVENT_CONSTRAINT(0x40d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.HIT_LFB */
+ PEBS_EVENT_CONSTRAINT(0x01d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS */
+ PEBS_EVENT_CONSTRAINT(0x02d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT */
+ PEBS_EVENT_CONSTRAINT(0x04d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM */
+ PEBS_EVENT_CONSTRAINT(0x08d2, 0xf), /* MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE */
+ PEBS_EVENT_CONSTRAINT(0x02d4, 0xf), /* MEM_LOAD_UOPS_MISC_RETIRED.LLC_MISS */
+ EVENT_CONSTRAINT_END
+};
+
static struct event_constraint *
intel_pebs_constraints(struct perf_event *event)
{
@@ -702,7 +738,13 @@ static void intel_ds_init(void)
printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
+ switch (boot_cpu_data.x86_model) {
+ case 42: /* SandyBridge */
+ x86_pmu.pebs_constraints = intel_snb_pebs_events;
+ break;
+ default:
+ x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
+ }
break;
default:
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 7:22 [PATCH v2 -tip] perf: x86, add SandyBridge support Lin Ming
@ 2011-02-28 8:20 ` Stephane Eranian
2011-02-28 8:51 ` Lin Ming
2011-02-28 9:15 ` Peter Zijlstra
1 sibling, 1 reply; 30+ messages in thread
From: Stephane Eranian @ 2011-02-28 8:20 UTC (permalink / raw)
To: Lin Ming; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, Feb 28, 2011 at 8:22 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> This patch adds basic SandyBridge support, including hardware cache
> events and PEBS events support.
>
> LLC-* hareware cache events don't work for now, it depends on the
> offcore patches.
>
> All PEBS events are tested on my SandyBridge machine and work well.
> Note that SandyBridge does not support INSTR_RETIRED.ANY(0x00c0) PEBS
> event, instead it supports INST_RETIRED.PRECDIST(0x01c0) event and PMC1
> only.
>
> v1 -> v2:
> - add more raw and PEBS events constraints
> - use offcore events for LLC-* cache events
> - remove the call to Nehalem workaround enable_all function
>
> todo:
> - precise store
> - precise distribution of instructions retired
>
> Signed-off-by: Lin Ming <ming.m.lin@intel.com>
> ---
> arch/x86/kernel/cpu/perf_event.c | 2 +
> arch/x86/kernel/cpu/perf_event_intel.c | 123 +++++++++++++++++++++++++++++
> arch/x86/kernel/cpu/perf_event_intel_ds.c | 44 ++++++++++-
> 3 files changed, 168 insertions(+), 1 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index 10bfe24..49d51be 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -148,6 +148,8 @@ struct cpu_hw_events {
> */
> #define INTEL_EVENT_CONSTRAINT(c, n) \
> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> /*
> * Constraint on the Event code + UMask + fixed-mask
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index 084b383..3085868 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -76,6 +76,19 @@ static struct event_constraint intel_westmere_event_constraints[] =
> EVENT_CONSTRAINT_END
> };
>
> +static struct event_constraint intel_snb_event_constraints[] =
> +{
> + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
> + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
> + INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
> + INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
> + INTEL_EVENT_CONSTRAINT2(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
> + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> + EVENT_CONSTRAINT_END
> +};
> +
> static struct event_constraint intel_gen_event_constraints[] =
> {
> FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> @@ -89,6 +102,106 @@ static u64 intel_pmu_event_map(int hw_event)
> return intel_perfmon_event_map[hw_event];
> }
>
> +static __initconst const u64 snb_hw_cache_event_ids
> + [PERF_COUNT_HW_CACHE_MAX]
> + [PERF_COUNT_HW_CACHE_OP_MAX]
> + [PERF_COUNT_HW_CACHE_RESULT_MAX] =
> +{
> + [ C(L1D) ] = {
> + [ C(OP_READ) ] = {
> + [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
> + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
> + },
> + [ C(OP_WRITE) ] = {
> + [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
> + [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
> + },
> + [ C(OP_PREFETCH) ] = {
> + [ C(RESULT_ACCESS) ] = 0x0,
> + [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
> + },
> + },
> + [ C(L1I ) ] = {
> + [ C(OP_READ) ] = {
> + [ C(RESULT_ACCESS) ] = 0x0,
> + [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
> + },
> + [ C(OP_WRITE) ] = {
> + [ C(RESULT_ACCESS) ] = -1,
> + [ C(RESULT_MISS) ] = -1,
> + },
> + [ C(OP_PREFETCH) ] = {
> + [ C(RESULT_ACCESS) ] = 0x0,
> + [ C(RESULT_MISS) ] = 0x0,
> + },
> + },
> + [ C(LL ) ] = {
> + /*
> + * TBD: Need Off-core Response Performance Monitoring support
> + */
> + [ C(OP_READ) ] = {
> + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
> + [ C(RESULT_ACCESS) ] = 0x01b7,
> + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
> + [ C(RESULT_MISS) ] = 0x01bb,
> + },
> + [ C(OP_WRITE) ] = {
> + /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
> + [ C(RESULT_ACCESS) ] = 0x01b7,
> + /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
> + [ C(RESULT_MISS) ] = 0x01bb,
> + },
> + [ C(OP_PREFETCH) ] = {
> + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
> + [ C(RESULT_ACCESS) ] = 0x01b7,
> + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
> + [ C(RESULT_MISS) ] = 0x01bb,
> + },
> + },
> + [ C(DTLB) ] = {
> + [ C(OP_READ) ] = {
> + [ C(RESULT_ACCESS) ] = 0x01d0, /* MEM_UOP_RETIRED.LOADS */
> + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
> + },
> + [ C(OP_WRITE) ] = {
> + [ C(RESULT_ACCESS) ] = 0x02d0, /* MEM_UOP_RETIRED.STORES */
> + [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
> + },
> + [ C(OP_PREFETCH) ] = {
> + [ C(RESULT_ACCESS) ] = 0x0,
> + [ C(RESULT_MISS) ] = 0x0,
> + },
> + },
> + [ C(ITLB) ] = {
> + [ C(OP_READ) ] = {
> + [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
> + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
> + },
> + [ C(OP_WRITE) ] = {
> + [ C(RESULT_ACCESS) ] = -1,
> + [ C(RESULT_MISS) ] = -1,
> + },
> + [ C(OP_PREFETCH) ] = {
> + [ C(RESULT_ACCESS) ] = -1,
> + [ C(RESULT_MISS) ] = -1,
> + },
> + },
> + [ C(BPU ) ] = {
> + [ C(OP_READ) ] = {
> + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
> + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
> + },
> + [ C(OP_WRITE) ] = {
> + [ C(RESULT_ACCESS) ] = -1,
> + [ C(RESULT_MISS) ] = -1,
> + },
> + [ C(OP_PREFETCH) ] = {
> + [ C(RESULT_ACCESS) ] = -1,
> + [ C(RESULT_MISS) ] = -1,
> + },
> + },
> +};
> +
> static __initconst const u64 westmere_hw_cache_event_ids
> [PERF_COUNT_HW_CACHE_MAX]
> [PERF_COUNT_HW_CACHE_OP_MAX]
> @@ -1062,6 +1175,16 @@ static __init int intel_pmu_init(void)
> pr_cont("Westmere events, ");
> break;
>
> + case 42: /* SandyBridge */
> + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
> + sizeof(hw_cache_event_ids));
> +
> + intel_pmu_lbr_init_nhm();
> +
> + x86_pmu.event_constraints = intel_snb_event_constraints;
> + pr_cont("SandyBridge events, ");
> + break;
> +
> default:
> /*
> * default constraints for v2 and up
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index b7dcd9f..e60f91b 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -388,6 +388,42 @@ static struct event_constraint intel_nehalem_pebs_events[] = {
> EVENT_CONSTRAINT_END
> };
>
> +static struct event_constraint intel_snb_pebs_events[] = {
> + PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> + PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> + PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> + PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
> + PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
> + PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
> + PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
> + PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
> + PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
> + PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
> + PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
> + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
> + PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
> + PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
> + PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
> + PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> + PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
> + PEBS_EVENT_CONSTRAINT(0x01d0, 0xf), /* MEM_UOP_RETIRED.LOADS */
> + PEBS_EVENT_CONSTRAINT(0x02d0, 0xf), /* MEM_UOP_RETIRED.STORES */
> + PEBS_EVENT_CONSTRAINT(0x10d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS */
> + PEBS_EVENT_CONSTRAINT(0x20d0, 0xf), /* MEM_UOP_RETIRED.LOCK */
> + PEBS_EVENT_CONSTRAINT(0x40d0, 0xf), /* MEM_UOP_RETIRED.SPLIT */
> + PEBS_EVENT_CONSTRAINT(0x80d0, 0xf), /* MEM_UOP_RETIRED.ALL */
Not quite. For event 0xd0, you are not listing the right umask combinations.
The following combinations are supported for event 0xd0:
0x5381d0 snb::MEM_UOP_RETIRED:ANY_LOADS
0x5382d0 snb::MEM_UOP_RETIRED:ANY_STORES
0x5321d0 snb::MEM_UOP_RETIRED:LOCK_LOADS
0x5322d0 snb::MEM_UOP_RETIRED:LOCK_STORES
0x5341d0 snb::MEM_UOP_RETIRED:SPLIT_LOADS
0x5342d0 snb::MEM_UOP_RETIRED:SPLIT_STORES
0x5311d0 snb::MEM_UOP_RETIRED:STLB_MISS_LOADS
0x5312d0 snb::MEM_UOP_RETIRED:STLB_MISS_STORES
In other words, bit 0-3 of the umask cannot be zero.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 8:20 ` Stephane Eranian
@ 2011-02-28 8:51 ` Lin Ming
2011-02-28 9:02 ` Stephane Eranian
2011-02-28 9:08 ` Ingo Molnar
0 siblings, 2 replies; 30+ messages in thread
From: Lin Ming @ 2011-02-28 8:51 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, 2011-02-28 at 16:20 +0800, Stephane Eranian wrote:
> On Mon, Feb 28, 2011 at 8:22 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> > This patch adds basic SandyBridge support, including hardware cache
> > events and PEBS events support.
> >
> > LLC-* hareware cache events don't work for now, it depends on the
> > offcore patches.
> >
> > All PEBS events are tested on my SandyBridge machine and work well.
> > Note that SandyBridge does not support INSTR_RETIRED.ANY(0x00c0) PEBS
> > event, instead it supports INST_RETIRED.PRECDIST(0x01c0) event and PMC1
> > only.
> >
> > v1 -> v2:
> > - add more raw and PEBS events constraints
> > - use offcore events for LLC-* cache events
> > - remove the call to Nehalem workaround enable_all function
> >
> > todo:
> > - precise store
> > - precise distribution of instructions retired
> >
> > Signed-off-by: Lin Ming <ming.m.lin@intel.com>
> > ---
> > arch/x86/kernel/cpu/perf_event.c | 2 +
> > arch/x86/kernel/cpu/perf_event_intel.c | 123 +++++++++++++++++++++++++++++
> > arch/x86/kernel/cpu/perf_event_intel_ds.c | 44 ++++++++++-
> > 3 files changed, 168 insertions(+), 1 deletions(-)
> >
> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> > index 10bfe24..49d51be 100644
> > --- a/arch/x86/kernel/cpu/perf_event.c
> > +++ b/arch/x86/kernel/cpu/perf_event.c
> > @@ -148,6 +148,8 @@ struct cpu_hw_events {
> > */
> > #define INTEL_EVENT_CONSTRAINT(c, n) \
> > EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> > +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
> >
> > /*
> > * Constraint on the Event code + UMask + fixed-mask
> > diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> > index 084b383..3085868 100644
> > --- a/arch/x86/kernel/cpu/perf_event_intel.c
> > +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> > @@ -76,6 +76,19 @@ static struct event_constraint intel_westmere_event_constraints[] =
> > EVENT_CONSTRAINT_END
> > };
> >
> > +static struct event_constraint intel_snb_event_constraints[] =
> > +{
> > + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> > + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> > + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
> > + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
> > + INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
> > + INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
> > + INTEL_EVENT_CONSTRAINT2(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
> > + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> > + EVENT_CONSTRAINT_END
> > +};
> > +
> > static struct event_constraint intel_gen_event_constraints[] =
> > {
> > FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> > @@ -89,6 +102,106 @@ static u64 intel_pmu_event_map(int hw_event)
> > return intel_perfmon_event_map[hw_event];
> > }
> >
> > +static __initconst const u64 snb_hw_cache_event_ids
> > + [PERF_COUNT_HW_CACHE_MAX]
> > + [PERF_COUNT_HW_CACHE_OP_MAX]
> > + [PERF_COUNT_HW_CACHE_RESULT_MAX] =
> > +{
> > + [ C(L1D) ] = {
> > + [ C(OP_READ) ] = {
> > + [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
> > + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
> > + },
> > + [ C(OP_WRITE) ] = {
> > + [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
> > + [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
> > + },
> > + [ C(OP_PREFETCH) ] = {
> > + [ C(RESULT_ACCESS) ] = 0x0,
> > + [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
> > + },
> > + },
> > + [ C(L1I ) ] = {
> > + [ C(OP_READ) ] = {
> > + [ C(RESULT_ACCESS) ] = 0x0,
> > + [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
> > + },
> > + [ C(OP_WRITE) ] = {
> > + [ C(RESULT_ACCESS) ] = -1,
> > + [ C(RESULT_MISS) ] = -1,
> > + },
> > + [ C(OP_PREFETCH) ] = {
> > + [ C(RESULT_ACCESS) ] = 0x0,
> > + [ C(RESULT_MISS) ] = 0x0,
> > + },
> > + },
> > + [ C(LL ) ] = {
> > + /*
> > + * TBD: Need Off-core Response Performance Monitoring support
> > + */
> > + [ C(OP_READ) ] = {
> > + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
> > + [ C(RESULT_ACCESS) ] = 0x01b7,
> > + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
> > + [ C(RESULT_MISS) ] = 0x01bb,
> > + },
> > + [ C(OP_WRITE) ] = {
> > + /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
> > + [ C(RESULT_ACCESS) ] = 0x01b7,
> > + /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
> > + [ C(RESULT_MISS) ] = 0x01bb,
> > + },
> > + [ C(OP_PREFETCH) ] = {
> > + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
> > + [ C(RESULT_ACCESS) ] = 0x01b7,
> > + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
> > + [ C(RESULT_MISS) ] = 0x01bb,
> > + },
> > + },
> > + [ C(DTLB) ] = {
> > + [ C(OP_READ) ] = {
> > + [ C(RESULT_ACCESS) ] = 0x01d0, /* MEM_UOP_RETIRED.LOADS */
> > + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
> > + },
> > + [ C(OP_WRITE) ] = {
> > + [ C(RESULT_ACCESS) ] = 0x02d0, /* MEM_UOP_RETIRED.STORES */
> > + [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
> > + },
> > + [ C(OP_PREFETCH) ] = {
> > + [ C(RESULT_ACCESS) ] = 0x0,
> > + [ C(RESULT_MISS) ] = 0x0,
> > + },
> > + },
> > + [ C(ITLB) ] = {
> > + [ C(OP_READ) ] = {
> > + [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
> > + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
> > + },
> > + [ C(OP_WRITE) ] = {
> > + [ C(RESULT_ACCESS) ] = -1,
> > + [ C(RESULT_MISS) ] = -1,
> > + },
> > + [ C(OP_PREFETCH) ] = {
> > + [ C(RESULT_ACCESS) ] = -1,
> > + [ C(RESULT_MISS) ] = -1,
> > + },
> > + },
> > + [ C(BPU ) ] = {
> > + [ C(OP_READ) ] = {
> > + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
> > + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
> > + },
> > + [ C(OP_WRITE) ] = {
> > + [ C(RESULT_ACCESS) ] = -1,
> > + [ C(RESULT_MISS) ] = -1,
> > + },
> > + [ C(OP_PREFETCH) ] = {
> > + [ C(RESULT_ACCESS) ] = -1,
> > + [ C(RESULT_MISS) ] = -1,
> > + },
> > + },
> > +};
> > +
> > static __initconst const u64 westmere_hw_cache_event_ids
> > [PERF_COUNT_HW_CACHE_MAX]
> > [PERF_COUNT_HW_CACHE_OP_MAX]
> > @@ -1062,6 +1175,16 @@ static __init int intel_pmu_init(void)
> > pr_cont("Westmere events, ");
> > break;
> >
> > + case 42: /* SandyBridge */
> > + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
> > + sizeof(hw_cache_event_ids));
> > +
> > + intel_pmu_lbr_init_nhm();
> > +
> > + x86_pmu.event_constraints = intel_snb_event_constraints;
> > + pr_cont("SandyBridge events, ");
> > + break;
> > +
> > default:
> > /*
> > * default constraints for v2 and up
> > diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> > index b7dcd9f..e60f91b 100644
> > --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> > +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> > @@ -388,6 +388,42 @@ static struct event_constraint intel_nehalem_pebs_events[] = {
> > EVENT_CONSTRAINT_END
> > };
> >
> > +static struct event_constraint intel_snb_pebs_events[] = {
> > + PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> > + PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> > + PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> > + PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
> > + PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
> > + PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
> > + PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
> > + PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
> > + PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
> > + PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
> > + PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
> > + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
> > + PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
> > + PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
> > + PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
> > + PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> > + PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
>
> > + PEBS_EVENT_CONSTRAINT(0x01d0, 0xf), /* MEM_UOP_RETIRED.LOADS */
> > + PEBS_EVENT_CONSTRAINT(0x02d0, 0xf), /* MEM_UOP_RETIRED.STORES */
> > + PEBS_EVENT_CONSTRAINT(0x10d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS */
> > + PEBS_EVENT_CONSTRAINT(0x20d0, 0xf), /* MEM_UOP_RETIRED.LOCK */
> > + PEBS_EVENT_CONSTRAINT(0x40d0, 0xf), /* MEM_UOP_RETIRED.SPLIT */
> > + PEBS_EVENT_CONSTRAINT(0x80d0, 0xf), /* MEM_UOP_RETIRED.ALL */
>
> Not quite. For event 0xd0, you are not listing the right umask combinations.
> The following combinations are supported for event 0xd0:
>
> 0x5381d0 snb::MEM_UOP_RETIRED:ANY_LOADS
> 0x5382d0 snb::MEM_UOP_RETIRED:ANY_STORES
> 0x5321d0 snb::MEM_UOP_RETIRED:LOCK_LOADS
> 0x5322d0 snb::MEM_UOP_RETIRED:LOCK_STORES
> 0x5341d0 snb::MEM_UOP_RETIRED:SPLIT_LOADS
> 0x5342d0 snb::MEM_UOP_RETIRED:SPLIT_STORES
> 0x5311d0 snb::MEM_UOP_RETIRED:STLB_MISS_LOADS
> 0x5312d0 snb::MEM_UOP_RETIRED:STLB_MISS_STORES
>
> In other words, bit 0-3 of the umask cannot be zero.
I got the umask from "Table 30-20. PEBS Performance Events for Intel
microarchitecture code name Sandy Bridge".
But from "Table A-2. Non-Architectural Performance Events In the
Processor Core for Intel Core Processor 2xxx Series", the combinations
are needed as you show above.
Which one is correct?
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 8:51 ` Lin Ming
@ 2011-02-28 9:02 ` Stephane Eranian
2011-02-28 14:03 ` Lin Ming
2011-02-28 9:08 ` Ingo Molnar
1 sibling, 1 reply; 30+ messages in thread
From: Stephane Eranian @ 2011-02-28 9:02 UTC (permalink / raw)
To: Lin Ming; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, Feb 28, 2011 at 9:51 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> On Mon, 2011-02-28 at 16:20 +0800, Stephane Eranian wrote:
>> On Mon, Feb 28, 2011 at 8:22 AM, Lin Ming <ming.m.lin@intel.com> wrote:
>> > This patch adds basic SandyBridge support, including hardware cache
>> > events and PEBS events support.
>> >
>> > LLC-* hareware cache events don't work for now, it depends on the
>> > offcore patches.
>> >
>> > All PEBS events are tested on my SandyBridge machine and work well.
>> > Note that SandyBridge does not support INSTR_RETIRED.ANY(0x00c0) PEBS
>> > event, instead it supports INST_RETIRED.PRECDIST(0x01c0) event and PMC1
>> > only.
>> >
>> > v1 -> v2:
>> > - add more raw and PEBS events constraints
>> > - use offcore events for LLC-* cache events
>> > - remove the call to Nehalem workaround enable_all function
>> >
>> > todo:
>> > - precise store
>> > - precise distribution of instructions retired
>> >
>> > Signed-off-by: Lin Ming <ming.m.lin@intel.com>
>> > ---
>> > arch/x86/kernel/cpu/perf_event.c | 2 +
>> > arch/x86/kernel/cpu/perf_event_intel.c | 123 +++++++++++++++++++++++++++++
>> > arch/x86/kernel/cpu/perf_event_intel_ds.c | 44 ++++++++++-
>> > 3 files changed, 168 insertions(+), 1 deletions(-)
>> >
>> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
>> > index 10bfe24..49d51be 100644
>> > --- a/arch/x86/kernel/cpu/perf_event.c
>> > +++ b/arch/x86/kernel/cpu/perf_event.c
>> > @@ -148,6 +148,8 @@ struct cpu_hw_events {
>> > */
>> > #define INTEL_EVENT_CONSTRAINT(c, n) \
>> > EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
>> > +#define INTEL_EVENT_CONSTRAINT2(c, n) \
>> > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>> >
>> > /*
>> > * Constraint on the Event code + UMask + fixed-mask
>> > diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
>> > index 084b383..3085868 100644
>> > --- a/arch/x86/kernel/cpu/perf_event_intel.c
>> > +++ b/arch/x86/kernel/cpu/perf_event_intel.c
>> > @@ -76,6 +76,19 @@ static struct event_constraint intel_westmere_event_constraints[] =
>> > EVENT_CONSTRAINT_END
>> > };
>> >
>> > +static struct event_constraint intel_snb_event_constraints[] =
>> > +{
>> > + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>> > + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
>> > + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
>> > + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
>> > + INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
>> > + INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
>> > + INTEL_EVENT_CONSTRAINT2(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
>> > + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
>> > + EVENT_CONSTRAINT_END
>> > +};
>> > +
>> > static struct event_constraint intel_gen_event_constraints[] =
>> > {
>> > FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
>> > @@ -89,6 +102,106 @@ static u64 intel_pmu_event_map(int hw_event)
>> > return intel_perfmon_event_map[hw_event];
>> > }
>> >
>> > +static __initconst const u64 snb_hw_cache_event_ids
>> > + [PERF_COUNT_HW_CACHE_MAX]
>> > + [PERF_COUNT_HW_CACHE_OP_MAX]
>> > + [PERF_COUNT_HW_CACHE_RESULT_MAX] =
>> > +{
>> > + [ C(L1D) ] = {
>> > + [ C(OP_READ) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
>> > + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
>> > + },
>> > + [ C(OP_WRITE) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
>> > + [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
>> > + },
>> > + [ C(OP_PREFETCH) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0x0,
>> > + [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
>> > + },
>> > + },
>> > + [ C(L1I ) ] = {
>> > + [ C(OP_READ) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0x0,
>> > + [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
>> > + },
>> > + [ C(OP_WRITE) ] = {
>> > + [ C(RESULT_ACCESS) ] = -1,
>> > + [ C(RESULT_MISS) ] = -1,
>> > + },
>> > + [ C(OP_PREFETCH) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0x0,
>> > + [ C(RESULT_MISS) ] = 0x0,
>> > + },
>> > + },
>> > + [ C(LL ) ] = {
>> > + /*
>> > + * TBD: Need Off-core Response Performance Monitoring support
>> > + */
>> > + [ C(OP_READ) ] = {
>> > + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
>> > + [ C(RESULT_ACCESS) ] = 0x01b7,
>> > + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
>> > + [ C(RESULT_MISS) ] = 0x01bb,
>> > + },
>> > + [ C(OP_WRITE) ] = {
>> > + /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
>> > + [ C(RESULT_ACCESS) ] = 0x01b7,
>> > + /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
>> > + [ C(RESULT_MISS) ] = 0x01bb,
>> > + },
>> > + [ C(OP_PREFETCH) ] = {
>> > + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
>> > + [ C(RESULT_ACCESS) ] = 0x01b7,
>> > + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
>> > + [ C(RESULT_MISS) ] = 0x01bb,
>> > + },
>> > + },
>> > + [ C(DTLB) ] = {
>> > + [ C(OP_READ) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0x01d0, /* MEM_UOP_RETIRED.LOADS */
>> > + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
>> > + },
>> > + [ C(OP_WRITE) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0x02d0, /* MEM_UOP_RETIRED.STORES */
>> > + [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
>> > + },
>> > + [ C(OP_PREFETCH) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0x0,
>> > + [ C(RESULT_MISS) ] = 0x0,
>> > + },
>> > + },
>> > + [ C(ITLB) ] = {
>> > + [ C(OP_READ) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
>> > + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
>> > + },
>> > + [ C(OP_WRITE) ] = {
>> > + [ C(RESULT_ACCESS) ] = -1,
>> > + [ C(RESULT_MISS) ] = -1,
>> > + },
>> > + [ C(OP_PREFETCH) ] = {
>> > + [ C(RESULT_ACCESS) ] = -1,
>> > + [ C(RESULT_MISS) ] = -1,
>> > + },
>> > + },
>> > + [ C(BPU ) ] = {
>> > + [ C(OP_READ) ] = {
>> > + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
>> > + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
>> > + },
>> > + [ C(OP_WRITE) ] = {
>> > + [ C(RESULT_ACCESS) ] = -1,
>> > + [ C(RESULT_MISS) ] = -1,
>> > + },
>> > + [ C(OP_PREFETCH) ] = {
>> > + [ C(RESULT_ACCESS) ] = -1,
>> > + [ C(RESULT_MISS) ] = -1,
>> > + },
>> > + },
>> > +};
>> > +
>> > static __initconst const u64 westmere_hw_cache_event_ids
>> > [PERF_COUNT_HW_CACHE_MAX]
>> > [PERF_COUNT_HW_CACHE_OP_MAX]
>> > @@ -1062,6 +1175,16 @@ static __init int intel_pmu_init(void)
>> > pr_cont("Westmere events, ");
>> > break;
>> >
>> > + case 42: /* SandyBridge */
>> > + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
>> > + sizeof(hw_cache_event_ids));
>> > +
>> > + intel_pmu_lbr_init_nhm();
>> > +
>> > + x86_pmu.event_constraints = intel_snb_event_constraints;
>> > + pr_cont("SandyBridge events, ");
>> > + break;
>> > +
>> > default:
>> > /*
>> > * default constraints for v2 and up
>> > diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> > index b7dcd9f..e60f91b 100644
>> > --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> > +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
>> > @@ -388,6 +388,42 @@ static struct event_constraint intel_nehalem_pebs_events[] = {
>> > EVENT_CONSTRAINT_END
>> > };
>> >
>> > +static struct event_constraint intel_snb_pebs_events[] = {
>> > + PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
>> > + PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
>> > + PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
>> > + PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
>> > + PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
>> > + PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
>> > + PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
>> > + PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
>> > + PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
>> > + PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
>> > + PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
>> > + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
>> > + PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
>> > + PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
>> > + PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
>> > + PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
>> > + PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
>>
>> > + PEBS_EVENT_CONSTRAINT(0x01d0, 0xf), /* MEM_UOP_RETIRED.LOADS */
>> > + PEBS_EVENT_CONSTRAINT(0x02d0, 0xf), /* MEM_UOP_RETIRED.STORES */
>> > + PEBS_EVENT_CONSTRAINT(0x10d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS */
>> > + PEBS_EVENT_CONSTRAINT(0x20d0, 0xf), /* MEM_UOP_RETIRED.LOCK */
>> > + PEBS_EVENT_CONSTRAINT(0x40d0, 0xf), /* MEM_UOP_RETIRED.SPLIT */
>> > + PEBS_EVENT_CONSTRAINT(0x80d0, 0xf), /* MEM_UOP_RETIRED.ALL */
>>
>> Not quite. For event 0xd0, you are not listing the right umask combinations.
>> The following combinations are supported for event 0xd0:
>>
>> 0x5381d0 snb::MEM_UOP_RETIRED:ANY_LOADS
>> 0x5382d0 snb::MEM_UOP_RETIRED:ANY_STORES
>> 0x5321d0 snb::MEM_UOP_RETIRED:LOCK_LOADS
>> 0x5322d0 snb::MEM_UOP_RETIRED:LOCK_STORES
>> 0x5341d0 snb::MEM_UOP_RETIRED:SPLIT_LOADS
>> 0x5342d0 snb::MEM_UOP_RETIRED:SPLIT_STORES
>> 0x5311d0 snb::MEM_UOP_RETIRED:STLB_MISS_LOADS
>> 0x5312d0 snb::MEM_UOP_RETIRED:STLB_MISS_STORES
>>
>> In other words, bit 0-3 of the umask cannot be zero.
>
> I got the umask from "Table 30-20. PEBS Performance Events for Intel
> microarchitecture code name Sandy Bridge".
>
> But from "Table A-2. Non-Architectural Performance Events In the
> Processor Core for Intel Core Processor 2xxx Series", the combinations
> are needed as you show above.
>
> Which one is correct?
>
I think Table A-2 is correct. Umasks 10h, 20h, 40h, 80h MUST be combined
to collect something meaningful.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 8:51 ` Lin Ming
2011-02-28 9:02 ` Stephane Eranian
@ 2011-02-28 9:08 ` Ingo Molnar
2011-02-28 14:02 ` Lin Ming
1 sibling, 1 reply; 30+ messages in thread
From: Ingo Molnar @ 2011-02-28 9:08 UTC (permalink / raw)
To: Lin Ming; +Cc: Stephane Eranian, Peter Zijlstra, Andi Kleen, lkml
* Lin Ming <ming.m.lin@intel.com> wrote:
> > In other words, bit 0-3 of the umask cannot be zero.
>
> I got the umask from "Table 30-20. PEBS Performance Events for Intel
> microarchitecture code name Sandy Bridge".
>
> But from "Table A-2. Non-Architectural Performance Events In the Processor Core
> for Intel Core Processor 2xxx Series", the combinations are needed as you show
> above.
>
> Which one is correct?
Since you have access to the hardware, could you please test and see it in practice
which one is correct?
Thanks,
Ingo
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 7:22 [PATCH v2 -tip] perf: x86, add SandyBridge support Lin Ming
2011-02-28 8:20 ` Stephane Eranian
@ 2011-02-28 9:15 ` Peter Zijlstra
2011-02-28 12:25 ` Stephane Eranian
` (3 more replies)
1 sibling, 4 replies; 30+ messages in thread
From: Peter Zijlstra @ 2011-02-28 9:15 UTC (permalink / raw)
To: Lin Ming; +Cc: Ingo Molnar, Stephane Eranian, Andi Kleen, lkml
On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> This patch adds basic SandyBridge support, including hardware cache
> events and PEBS events support.
>
> LLC-* hareware cache events don't work for now, it depends on the
> offcore patches.
What's the status of those, Stephane reported some problems last I
remember?
> #define INTEL_EVENT_CONSTRAINT(c, n) \
> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
That's a particularly bad name, how about something like
INTEL_UEVENT_CONSTRAINT or somesuch.
> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> + switch (boot_cpu_data.x86_model) {
> + case 42: /* SandyBridge */
> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
> + break;
> + default:
> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> + }
> break;
>
> default:
We already have this massive model switch right after this function,
might as well move the pebs constraint assignment there.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 9:15 ` Peter Zijlstra
@ 2011-02-28 12:25 ` Stephane Eranian
2011-02-28 14:33 ` Lin Ming
2011-02-28 14:21 ` Lin Ming
` (2 subsequent siblings)
3 siblings, 1 reply; 30+ messages in thread
From: Stephane Eranian @ 2011-02-28 12:25 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Lin Ming, Ingo Molnar, Andi Kleen, lkml
On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
>> This patch adds basic SandyBridge support, including hardware cache
>> events and PEBS events support.
>>
>> LLC-* hareware cache events don't work for now, it depends on the
>> offcore patches.
>
> What's the status of those, Stephane reported some problems last I
> remember?
>
>
>> #define INTEL_EVENT_CONSTRAINT(c, n) \
>> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
>> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
>> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> That's a particularly bad name, how about something like
>
> INTEL_UEVENT_CONSTRAINT or somesuch.
>
>> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
>> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
>> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
>> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
>> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> + switch (boot_cpu_data.x86_model) {
>> + case 42: /* SandyBridge */
>> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
>> + break;
>> + default:
>> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> + }
>> break;
>>
>> default:
>
> We already have this massive model switch right after this function,
> might as well move the pebs constraint assignment there.
>
My PEBS patch was going to cleanup this part, though it was using
it's own switch
statement.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 9:08 ` Ingo Molnar
@ 2011-02-28 14:02 ` Lin Ming
2011-02-28 14:13 ` Stephane Eranian
0 siblings, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-02-28 14:02 UTC (permalink / raw)
To: Ingo Molnar; +Cc: Stephane Eranian, Peter Zijlstra, Andi Kleen, lkml
On Mon, 2011-02-28 at 17:08 +0800, Ingo Molnar wrote:
> * Lin Ming <ming.m.lin@intel.com> wrote:
>
> > > In other words, bit 0-3 of the umask cannot be zero.
> >
> > I got the umask from "Table 30-20. PEBS Performance Events for Intel
> > microarchitecture code name Sandy Bridge".
> >
> > But from "Table A-2. Non-Architectural Performance Events In the Processor Core
> > for Intel Core Processor 2xxx Series", the combinations are needed as you show
> > above.
> >
> > Which one is correct?
>
> Since you have access to the hardware, could you please test and see it in practice
> which one is correct?
Stephane is right, need the combination.
Sorry that I may made mistake when I tested 0xd0 pebs events.
Re-test all PEBS events, now only below 2 events need more support to
work.
PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE*/
>
> Thanks,
>
> Ingo
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 9:02 ` Stephane Eranian
@ 2011-02-28 14:03 ` Lin Ming
2011-02-28 14:28 ` Lin Ming
0 siblings, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-02-28 14:03 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, 2011-02-28 at 17:02 +0800, Stephane Eranian wrote:
> On Mon, Feb 28, 2011 at 9:51 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> > On Mon, 2011-02-28 at 16:20 +0800, Stephane Eranian wrote:
> >> On Mon, Feb 28, 2011 at 8:22 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> >> > This patch adds basic SandyBridge support, including hardware cache
> >> > events and PEBS events support.
> >> >
> >> > LLC-* hareware cache events don't work for now, it depends on the
> >> > offcore patches.
> >> >
> >> > All PEBS events are tested on my SandyBridge machine and work well.
> >> > Note that SandyBridge does not support INSTR_RETIRED.ANY(0x00c0) PEBS
> >> > event, instead it supports INST_RETIRED.PRECDIST(0x01c0) event and PMC1
> >> > only.
> >> >
> >> > v1 -> v2:
> >> > - add more raw and PEBS events constraints
> >> > - use offcore events for LLC-* cache events
> >> > - remove the call to Nehalem workaround enable_all function
> >> >
> >> > todo:
> >> > - precise store
> >> > - precise distribution of instructions retired
> >> >
> >> > Signed-off-by: Lin Ming <ming.m.lin@intel.com>
> >> > ---
> >> > arch/x86/kernel/cpu/perf_event.c | 2 +
> >> > arch/x86/kernel/cpu/perf_event_intel.c | 123 +++++++++++++++++++++++++++++
> >> > arch/x86/kernel/cpu/perf_event_intel_ds.c | 44 ++++++++++-
> >> > 3 files changed, 168 insertions(+), 1 deletions(-)
> >> >
> >> > diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> >> > index 10bfe24..49d51be 100644
> >> > --- a/arch/x86/kernel/cpu/perf_event.c
> >> > +++ b/arch/x86/kernel/cpu/perf_event.c
> >> > @@ -148,6 +148,8 @@ struct cpu_hw_events {
> >> > */
> >> > #define INTEL_EVENT_CONSTRAINT(c, n) \
> >> > EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> >> > +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> >> > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
> >> >
> >> > /*
> >> > * Constraint on the Event code + UMask + fixed-mask
> >> > diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> >> > index 084b383..3085868 100644
> >> > --- a/arch/x86/kernel/cpu/perf_event_intel.c
> >> > +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> >> > @@ -76,6 +76,19 @@ static struct event_constraint intel_westmere_event_constraints[] =
> >> > EVENT_CONSTRAINT_END
> >> > };
> >> >
> >> > +static struct event_constraint intel_snb_event_constraints[] =
> >> > +{
> >> > + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> >> > + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
> >> > + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */
> >> > + INTEL_EVENT_CONSTRAINT(0x48, 0x4), /* L1D_PEND_MISS.PENDING */
> >> > + INTEL_EVENT_CONSTRAINT(0xb7, 0x1), /* OFF_CORE_RESPONSE_0 */
> >> > + INTEL_EVENT_CONSTRAINT(0xbb, 0x8), /* OFF_CORE_RESPONSE_1 */
> >> > + INTEL_EVENT_CONSTRAINT2(0x01c0, 0x2), /* INST_RETIRED.PREC_DIST */
> >> > + INTEL_EVENT_CONSTRAINT(0xcd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> >> > + EVENT_CONSTRAINT_END
> >> > +};
> >> > +
> >> > static struct event_constraint intel_gen_event_constraints[] =
> >> > {
> >> > FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> >> > @@ -89,6 +102,106 @@ static u64 intel_pmu_event_map(int hw_event)
> >> > return intel_perfmon_event_map[hw_event];
> >> > }
> >> >
> >> > +static __initconst const u64 snb_hw_cache_event_ids
> >> > + [PERF_COUNT_HW_CACHE_MAX]
> >> > + [PERF_COUNT_HW_CACHE_OP_MAX]
> >> > + [PERF_COUNT_HW_CACHE_RESULT_MAX] =
> >> > +{
> >> > + [ C(L1D) ] = {
> >> > + [ C(OP_READ) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0xf1d0, /* MEM_UOP_RETIRED.LOADS */
> >> > + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPLACEMENT */
> >> > + },
> >> > + [ C(OP_WRITE) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0xf2d0, /* MEM_UOP_RETIRED.STORES */
> >> > + [ C(RESULT_MISS) ] = 0x0851, /* L1D.ALL_M_REPLACEMENT */
> >> > + },
> >> > + [ C(OP_PREFETCH) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0x0,
> >> > + [ C(RESULT_MISS) ] = 0x024e, /* HW_PRE_REQ.DL1_MISS */
> >> > + },
> >> > + },
> >> > + [ C(L1I ) ] = {
> >> > + [ C(OP_READ) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0x0,
> >> > + [ C(RESULT_MISS) ] = 0x0280, /* ICACHE.MISSES */
> >> > + },
> >> > + [ C(OP_WRITE) ] = {
> >> > + [ C(RESULT_ACCESS) ] = -1,
> >> > + [ C(RESULT_MISS) ] = -1,
> >> > + },
> >> > + [ C(OP_PREFETCH) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0x0,
> >> > + [ C(RESULT_MISS) ] = 0x0,
> >> > + },
> >> > + },
> >> > + [ C(LL ) ] = {
> >> > + /*
> >> > + * TBD: Need Off-core Response Performance Monitoring support
> >> > + */
> >> > + [ C(OP_READ) ] = {
> >> > + /* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
> >> > + [ C(RESULT_ACCESS) ] = 0x01b7,
> >> > + /* OFFCORE_RESPONSE_1.ANY_DATA.ANY_LLC_MISS */
> >> > + [ C(RESULT_MISS) ] = 0x01bb,
> >> > + },
> >> > + [ C(OP_WRITE) ] = {
> >> > + /* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
> >> > + [ C(RESULT_ACCESS) ] = 0x01b7,
> >> > + /* OFFCORE_RESPONSE_1.ANY_RFO.ANY_LLC_MISS */
> >> > + [ C(RESULT_MISS) ] = 0x01bb,
> >> > + },
> >> > + [ C(OP_PREFETCH) ] = {
> >> > + /* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
> >> > + [ C(RESULT_ACCESS) ] = 0x01b7,
> >> > + /* OFFCORE_RESPONSE_1.PREFETCH.ANY_LLC_MISS */
> >> > + [ C(RESULT_MISS) ] = 0x01bb,
> >> > + },
> >> > + },
> >> > + [ C(DTLB) ] = {
> >> > + [ C(OP_READ) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0x01d0, /* MEM_UOP_RETIRED.LOADS */
> >> > + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
> >> > + },
> >> > + [ C(OP_WRITE) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0x02d0, /* MEM_UOP_RETIRED.STORES */
> >> > + [ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
> >> > + },
> >> > + [ C(OP_PREFETCH) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0x0,
> >> > + [ C(RESULT_MISS) ] = 0x0,
> >> > + },
> >> > + },
> >> > + [ C(ITLB) ] = {
> >> > + [ C(OP_READ) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0x1085, /* ITLB_MISSES.STLB_HIT */
> >> > + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.CAUSES_A_WALK */
> >> > + },
> >> > + [ C(OP_WRITE) ] = {
> >> > + [ C(RESULT_ACCESS) ] = -1,
> >> > + [ C(RESULT_MISS) ] = -1,
> >> > + },
> >> > + [ C(OP_PREFETCH) ] = {
> >> > + [ C(RESULT_ACCESS) ] = -1,
> >> > + [ C(RESULT_MISS) ] = -1,
> >> > + },
> >> > + },
> >> > + [ C(BPU ) ] = {
> >> > + [ C(OP_READ) ] = {
> >> > + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
> >> > + [ C(RESULT_MISS) ] = 0x00c5, /* BR_MISP_RETIRED.ALL_BRANCHES */
> >> > + },
> >> > + [ C(OP_WRITE) ] = {
> >> > + [ C(RESULT_ACCESS) ] = -1,
> >> > + [ C(RESULT_MISS) ] = -1,
> >> > + },
> >> > + [ C(OP_PREFETCH) ] = {
> >> > + [ C(RESULT_ACCESS) ] = -1,
> >> > + [ C(RESULT_MISS) ] = -1,
> >> > + },
> >> > + },
> >> > +};
> >> > +
> >> > static __initconst const u64 westmere_hw_cache_event_ids
> >> > [PERF_COUNT_HW_CACHE_MAX]
> >> > [PERF_COUNT_HW_CACHE_OP_MAX]
> >> > @@ -1062,6 +1175,16 @@ static __init int intel_pmu_init(void)
> >> > pr_cont("Westmere events, ");
> >> > break;
> >> >
> >> > + case 42: /* SandyBridge */
> >> > + memcpy(hw_cache_event_ids, snb_hw_cache_event_ids,
> >> > + sizeof(hw_cache_event_ids));
> >> > +
> >> > + intel_pmu_lbr_init_nhm();
> >> > +
> >> > + x86_pmu.event_constraints = intel_snb_event_constraints;
> >> > + pr_cont("SandyBridge events, ");
> >> > + break;
> >> > +
> >> > default:
> >> > /*
> >> > * default constraints for v2 and up
> >> > diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> >> > index b7dcd9f..e60f91b 100644
> >> > --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> >> > +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> >> > @@ -388,6 +388,42 @@ static struct event_constraint intel_nehalem_pebs_events[] = {
> >> > EVENT_CONSTRAINT_END
> >> > };
> >> >
> >> > +static struct event_constraint intel_snb_pebs_events[] = {
> >> > + PEBS_EVENT_CONSTRAINT(0x01c0, 0x2), /* INST_RETIRED.PRECDIST */
> >> > + PEBS_EVENT_CONSTRAINT(0x01c2, 0xf), /* UOPS_RETIRED.ALL */
> >> > + PEBS_EVENT_CONSTRAINT(0x02c2, 0xf), /* UOPS_RETIRED.RETIRE_SLOTS */
> >> > + PEBS_EVENT_CONSTRAINT(0x01c4, 0xf), /* BR_INST_RETIRED.CONDITIONAL */
> >> > + PEBS_EVENT_CONSTRAINT(0x02c4, 0xf), /* BR_INST_RETIRED.NEAR_CALL */
> >> > + PEBS_EVENT_CONSTRAINT(0x04c4, 0xf), /* BR_INST_RETIRED.ALL_BRANCHES */
> >> > + PEBS_EVENT_CONSTRAINT(0x08c4, 0xf), /* BR_INST_RETIRED.NEAR_RETURN */
> >> > + PEBS_EVENT_CONSTRAINT(0x10c4, 0xf), /* BR_INST_RETIRED.NOT_TAKEN */
> >> > + PEBS_EVENT_CONSTRAINT(0x20c4, 0xf), /* BR_INST_RETIRED.NEAR_TAKEN */
> >> > + PEBS_EVENT_CONSTRAINT(0x40c4, 0xf), /* BR_INST_RETIRED.FAR_BRANCH */
> >> > + PEBS_EVENT_CONSTRAINT(0x01c5, 0xf), /* BR_MISP_RETIRED.CONDITIONAL */
> >> > + PEBS_EVENT_CONSTRAINT(0x02c5, 0xf), /* BR_MISP_RETIRED.NEAR_CALL */
> >> > + PEBS_EVENT_CONSTRAINT(0x04c5, 0xf), /* BR_MISP_RETIRED.ALL_BRANCHES */
> >> > + PEBS_EVENT_CONSTRAINT(0x10c5, 0xf), /* BR_MISP_RETIRED.NOT_TAKEN */
> >> > + PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
> >> > + PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> >> > + PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
> >>
> >> > + PEBS_EVENT_CONSTRAINT(0x01d0, 0xf), /* MEM_UOP_RETIRED.LOADS */
> >> > + PEBS_EVENT_CONSTRAINT(0x02d0, 0xf), /* MEM_UOP_RETIRED.STORES */
> >> > + PEBS_EVENT_CONSTRAINT(0x10d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS */
> >> > + PEBS_EVENT_CONSTRAINT(0x20d0, 0xf), /* MEM_UOP_RETIRED.LOCK */
> >> > + PEBS_EVENT_CONSTRAINT(0x40d0, 0xf), /* MEM_UOP_RETIRED.SPLIT */
> >> > + PEBS_EVENT_CONSTRAINT(0x80d0, 0xf), /* MEM_UOP_RETIRED.ALL */
> >>
> >> Not quite. For event 0xd0, you are not listing the right umask combinations.
> >> The following combinations are supported for event 0xd0:
> >>
> >> 0x5381d0 snb::MEM_UOP_RETIRED:ANY_LOADS
> >> 0x5382d0 snb::MEM_UOP_RETIRED:ANY_STORES
> >> 0x5321d0 snb::MEM_UOP_RETIRED:LOCK_LOADS
> >> 0x5322d0 snb::MEM_UOP_RETIRED:LOCK_STORES
> >> 0x5341d0 snb::MEM_UOP_RETIRED:SPLIT_LOADS
> >> 0x5342d0 snb::MEM_UOP_RETIRED:SPLIT_STORES
> >> 0x5311d0 snb::MEM_UOP_RETIRED:STLB_MISS_LOADS
> >> 0x5312d0 snb::MEM_UOP_RETIRED:STLB_MISS_STORES
> >>
> >> In other words, bit 0-3 of the umask cannot be zero.
> >
> > I got the umask from "Table 30-20. PEBS Performance Events for Intel
> > microarchitecture code name Sandy Bridge".
> >
> > But from "Table A-2. Non-Architectural Performance Events In the
> > Processor Core for Intel Core Processor 2xxx Series", the combinations
> > are needed as you show above.
> >
> > Which one is correct?
> >
> I think Table A-2 is correct. Umasks 10h, 20h, 40h, 80h MUST be combined
> to collect something meaningful.
Yes, thanks for figuring this out.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:02 ` Lin Ming
@ 2011-02-28 14:13 ` Stephane Eranian
0 siblings, 0 replies; 30+ messages in thread
From: Stephane Eranian @ 2011-02-28 14:13 UTC (permalink / raw)
To: Lin Ming; +Cc: Ingo Molnar, Peter Zijlstra, Andi Kleen, lkml
Also for offcore_reponse_*, the fact that they are marked as PMC3 only is just
a convenience for scheduling. Given both events need an extra MSR, it makes
scheduling easier if you consider offcore_reponse_0 to work only on one counter.
That guarantees there won't be conflict on that extra MSR. The downside, is that
you cannot measure the event twice if you only want to vary the counter filters
(not the extra MSR), e.g., measure one instance at user level and the other at
kernel level.
On SNB, the extra MSRs are not shared by HT threads anymore (Table B-9).
That means that in the offcore patch, the extra_config logic is needed but not
the mutual exclusion between HT threads.
On Mon, Feb 28, 2011 at 3:02 PM, Lin Ming <ming.m.lin@intel.com> wrote:
> On Mon, 2011-02-28 at 17:08 +0800, Ingo Molnar wrote:
>> * Lin Ming <ming.m.lin@intel.com> wrote:
>>
>> > > In other words, bit 0-3 of the umask cannot be zero.
>> >
>> > I got the umask from "Table 30-20. PEBS Performance Events for Intel
>> > microarchitecture code name Sandy Bridge".
>> >
>> > But from "Table A-2. Non-Architectural Performance Events In the Processor Core
>> > for Intel Core Processor 2xxx Series", the combinations are needed as you show
>> > above.
>> >
>> > Which one is correct?
>>
>> Since you have access to the hardware, could you please test and see it in practice
>> which one is correct?
>
> Stephane is right, need the combination.
> Sorry that I may made mistake when I tested 0xd0 pebs events.
>
> Re-test all PEBS events, now only below 2 events need more support to
> work.
>
> PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
> PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE*/
>
>>
>> Thanks,
>>
>> Ingo
>
>
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 9:15 ` Peter Zijlstra
2011-02-28 12:25 ` Stephane Eranian
@ 2011-02-28 14:21 ` Lin Ming
2011-02-28 14:24 ` Peter Zijlstra
2011-02-28 14:56 ` Lin Ming
2011-03-01 7:43 ` Stephane Eranian
3 siblings, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-02-28 14:21 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, Andi Kleen, lkml
On Mon, 2011-02-28 at 17:15 +0800, Peter Zijlstra wrote:
> On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> > This patch adds basic SandyBridge support, including hardware cache
> > events and PEBS events support.
> >
> > LLC-* hareware cache events don't work for now, it depends on the
> > offcore patches.
>
> What's the status of those, Stephane reported some problems last I
> remember?
There is an event scheduling issue.
http://marc.info/?l=linux-kernel&m=129842356323752&w=2
I'll look at it.
>
>
> > #define INTEL_EVENT_CONSTRAINT(c, n) \
> > EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> > +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> That's a particularly bad name, how about something like
>
> INTEL_UEVENT_CONSTRAINT or somesuch.
OK.
But any case it's duplicated with PEBS_EVENT_CONSTRAINT.
#define PEBS_EVENT_CONSTRAINT(c, n) \
EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> > @@ -702,7 +738,13 @@ static void intel_ds_init(void)
> > printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> > x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> > x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> > - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> > + switch (boot_cpu_data.x86_model) {
> > + case 42: /* SandyBridge */
> > + x86_pmu.pebs_constraints = intel_snb_pebs_events;
> > + break;
> > + default:
> > + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> > + }
> > break;
> >
> > default:
>
> We already have this massive model switch right after this function,
> might as well move the pebs constraint assignment there.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:21 ` Lin Ming
@ 2011-02-28 14:24 ` Peter Zijlstra
2011-02-28 14:45 ` Lin Ming
0 siblings, 1 reply; 30+ messages in thread
From: Peter Zijlstra @ 2011-02-28 14:24 UTC (permalink / raw)
To: Lin Ming; +Cc: Ingo Molnar, Stephane Eranian, Andi Kleen, lkml
On Mon, 2011-02-28 at 22:21 +0800, Lin Ming wrote:
> > > #define INTEL_EVENT_CONSTRAINT(c, n) \
> > > EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> > > +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> > > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
> >
> > That's a particularly bad name, how about something like
> >
> > INTEL_UEVENT_CONSTRAINT or somesuch.
>
> OK.
>
> But any case it's duplicated with PEBS_EVENT_CONSTRAINT.
>
> #define PEBS_EVENT_CONSTRAINT(c, n) \
> EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
Ah, indeed, so maybe we can remove PEBS_EVENT_CONSTRAINT and use regular
INTEL_*_CONSTRAINTS there, that could also help for PEBS events where
all umasks are allowed (not sure there are any such things but the SNB
PEBS list was quite large).
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:03 ` Lin Ming
@ 2011-02-28 14:28 ` Lin Ming
0 siblings, 0 replies; 30+ messages in thread
From: Lin Ming @ 2011-02-28 14:28 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, 2011-02-28 at 22:03 +0800, Lin Ming wrote:
> > >>
> > >> Not quite. For event 0xd0, you are not listing the right umask combinations.
> > >> The following combinations are supported for event 0xd0:
> > >>
> > >> 0x5381d0 snb::MEM_UOP_RETIRED:ANY_LOADS
> > >> 0x5382d0 snb::MEM_UOP_RETIRED:ANY_STORES
> > >> 0x5321d0 snb::MEM_UOP_RETIRED:LOCK_LOADS
> > >> 0x5322d0 snb::MEM_UOP_RETIRED:LOCK_STORES
> > >> 0x5341d0 snb::MEM_UOP_RETIRED:SPLIT_LOADS
> > >> 0x5342d0 snb::MEM_UOP_RETIRED:SPLIT_STORES
> > >> 0x5311d0 snb::MEM_UOP_RETIRED:STLB_MISS_LOADS
> > >> 0x5312d0 snb::MEM_UOP_RETIRED:STLB_MISS_STORES
> > >>
> > >> In other words, bit 0-3 of the umask cannot be zero.
> > >
> > > I got the umask from "Table 30-20. PEBS Performance Events for Intel
> > > microarchitecture code name Sandy Bridge".
> > >
> > > But from "Table A-2. Non-Architectural Performance Events In the
> > > Processor Core for Intel Core Processor 2xxx Series", the combinations
> > > are needed as you show above.
> > >
> > > Which one is correct?
> > >
> > I think Table A-2 is correct. Umasks 10h, 20h, 40h, 80h MUST be combined
> > to collect something meaningful.
>
> Yes, thanks for figuring this out.
I also fix the dTLB-loads/stores events.
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 3085868..66712dd 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -160,11 +160,11 @@ static __initconst const u64 snb_hw_cache_event_ids
},
[ C(DTLB) ] = {
[ C(OP_READ) ] = {
- [ C(RESULT_ACCESS) ] = 0x01d0, /* MEM_UOP_RETIRED.LOADS */
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOP_RETIRED.ANY_LOADS */
[ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.CAUSES_A_WALK */
},
[ C(OP_WRITE) ] = {
- [ C(RESULT_ACCESS) ] = 0x02d0, /* MEM_UOP_RETIRED.STORES */
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOP_RETIRED.ANY_STORES */
[ C(RESULT_MISS) ] = 0x0149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
},
[ C(OP_PREFETCH) ] = {
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index e60f91b..2128755 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -406,12 +406,14 @@ static struct event_constraint intel_snb_pebs_events[] = {
PEBS_EVENT_CONSTRAINT(0x20c5, 0xf), /* BR_MISP_RETIRED.TAKEN */
PEBS_EVENT_CONSTRAINT(0x01cd, 0x8), /* MEM_TRANS_RETIRED.LOAD_LATENCY */
PEBS_EVENT_CONSTRAINT(0x02cd, 0x8), /* MEM_TRANS_RETIRED.PRECISE_STORE */
- PEBS_EVENT_CONSTRAINT(0x01d0, 0xf), /* MEM_UOP_RETIRED.LOADS */
- PEBS_EVENT_CONSTRAINT(0x02d0, 0xf), /* MEM_UOP_RETIRED.STORES */
- PEBS_EVENT_CONSTRAINT(0x10d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS */
- PEBS_EVENT_CONSTRAINT(0x20d0, 0xf), /* MEM_UOP_RETIRED.LOCK */
- PEBS_EVENT_CONSTRAINT(0x40d0, 0xf), /* MEM_UOP_RETIRED.SPLIT */
- PEBS_EVENT_CONSTRAINT(0x80d0, 0xf), /* MEM_UOP_RETIRED.ALL */
+ PEBS_EVENT_CONSTRAINT(0x11d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x12d0, 0xf), /* MEM_UOP_RETIRED.STLB_MISS_STORES */
+ PEBS_EVENT_CONSTRAINT(0x21d0, 0xf), /* MEM_UOP_RETIRED.LOCK_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x22d0, 0xf), /* MEM_UOP_RETIRED.LOCK_STORES */
+ PEBS_EVENT_CONSTRAINT(0x41d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x42d0, 0xf), /* MEM_UOP_RETIRED.SPLIT_STORES */
+ PEBS_EVENT_CONSTRAINT(0x81d0, 0xf), /* MEM_UOP_RETIRED.ANY_LOADS */
+ PEBS_EVENT_CONSTRAINT(0x82d0, 0xf), /* MEM_UOP_RETIRED.ANY_STORES */
PEBS_EVENT_CONSTRAINT(0x01d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L1_HIT */
PEBS_EVENT_CONSTRAINT(0x02d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.L2_HIT */
PEBS_EVENT_CONSTRAINT(0x04d1, 0xf), /* MEM_LOAD_UOPS_RETIRED.LLC_HIT */
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 12:25 ` Stephane Eranian
@ 2011-02-28 14:33 ` Lin Ming
2011-02-28 14:43 ` Stephane Eranian
0 siblings, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-02-28 14:33 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, 2011-02-28 at 20:25 +0800, Stephane Eranian wrote:
> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> >> This patch adds basic SandyBridge support, including hardware cache
> >> events and PEBS events support.
> >>
> >> LLC-* hareware cache events don't work for now, it depends on the
> >> offcore patches.
> >
> > What's the status of those, Stephane reported some problems last I
> > remember?
> >
> >
> >> #define INTEL_EVENT_CONSTRAINT(c, n) \
> >> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> >> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> >> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
> >
> > That's a particularly bad name, how about something like
> >
> > INTEL_UEVENT_CONSTRAINT or somesuch.
> >
> >> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
> >> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> >> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> >> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> >> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> >> + switch (boot_cpu_data.x86_model) {
> >> + case 42: /* SandyBridge */
> >> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
> >> + break;
> >> + default:
> >> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> >> + }
> >> break;
> >>
> >> default:
> >
> > We already have this massive model switch right after this function,
> > might as well move the pebs constraint assignment there.
> >
> My PEBS patch was going to cleanup this part, though it was using
> it's own switch
> statement.
Did you send out the patch? A link?
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:33 ` Lin Ming
@ 2011-02-28 14:43 ` Stephane Eranian
2011-02-28 14:52 ` Lin Ming
0 siblings, 1 reply; 30+ messages in thread
From: Stephane Eranian @ 2011-02-28 14:43 UTC (permalink / raw)
To: Lin Ming; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, Feb 28, 2011 at 3:33 PM, Lin Ming <ming.m.lin@intel.com> wrote:
> On Mon, 2011-02-28 at 20:25 +0800, Stephane Eranian wrote:
>> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
>> >> This patch adds basic SandyBridge support, including hardware cache
>> >> events and PEBS events support.
>> >>
>> >> LLC-* hareware cache events don't work for now, it depends on the
>> >> offcore patches.
>> >
>> > What's the status of those, Stephane reported some problems last I
>> > remember?
>> >
>> >
>> >> #define INTEL_EVENT_CONSTRAINT(c, n) \
>> >> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
>> >> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
>> >> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>> >
>> > That's a particularly bad name, how about something like
>> >
>> > INTEL_UEVENT_CONSTRAINT or somesuch.
>> >
>> >> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
>> >> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
>> >> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
>> >> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
>> >> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> >> + switch (boot_cpu_data.x86_model) {
>> >> + case 42: /* SandyBridge */
>> >> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
>> >> + break;
>> >> + default:
>> >> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> >> + }
>> >> break;
>> >>
>> >> default:
>> >
>> > We already have this massive model switch right after this function,
>> > might as well move the pebs constraint assignment there.
>> >
>> My PEBS patch was going to cleanup this part, though it was using
>> it's own switch
>> statement.
>
> Did you send out the patch? A link?
>
For what?
>
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:24 ` Peter Zijlstra
@ 2011-02-28 14:45 ` Lin Ming
2011-02-28 14:46 ` Stephane Eranian
0 siblings, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-02-28 14:45 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, Andi Kleen, lkml
On Mon, 2011-02-28 at 22:24 +0800, Peter Zijlstra wrote:
> On Mon, 2011-02-28 at 22:21 +0800, Lin Ming wrote:
> > > > #define INTEL_EVENT_CONSTRAINT(c, n) \
> > > > EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> > > > +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> > > > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
> > >
> > > That's a particularly bad name, how about something like
> > >
> > > INTEL_UEVENT_CONSTRAINT or somesuch.
> >
> > OK.
> >
> > But any case it's duplicated with PEBS_EVENT_CONSTRAINT.
> >
> > #define PEBS_EVENT_CONSTRAINT(c, n) \
> > EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> Ah, indeed, so maybe we can remove PEBS_EVENT_CONSTRAINT and use regular
> INTEL_*_CONSTRAINTS there, that could also help for PEBS events where
> all umasks are allowed (not sure there are any such things but the SNB
> PEBS list was quite large).
Yes, there are, for example, BR_INST_RETIRED.*
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:45 ` Lin Ming
@ 2011-02-28 14:46 ` Stephane Eranian
0 siblings, 0 replies; 30+ messages in thread
From: Stephane Eranian @ 2011-02-28 14:46 UTC (permalink / raw)
To: Lin Ming; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, Feb 28, 2011 at 3:45 PM, Lin Ming <ming.m.lin@intel.com> wrote:
> On Mon, 2011-02-28 at 22:24 +0800, Peter Zijlstra wrote:
>> On Mon, 2011-02-28 at 22:21 +0800, Lin Ming wrote:
>> > > > #define INTEL_EVENT_CONSTRAINT(c, n) \
>> > > > EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
>> > > > +#define INTEL_EVENT_CONSTRAINT2(c, n) \
>> > > > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>> > >
>> > > That's a particularly bad name, how about something like
>> > >
>> > > INTEL_UEVENT_CONSTRAINT or somesuch.
>> >
>> > OK.
>> >
>> > But any case it's duplicated with PEBS_EVENT_CONSTRAINT.
>> >
>> > #define PEBS_EVENT_CONSTRAINT(c, n) \
>> > EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>>
>> Ah, indeed, so maybe we can remove PEBS_EVENT_CONSTRAINT and use regular
>> INTEL_*_CONSTRAINTS there, that could also help for PEBS events where
>> all umasks are allowed (not sure there are any such things but the SNB
>> PEBS list was quite large).
>
> Yes, there are, for example, BR_INST_RETIRED.*
>
I think most of the time all umasks are allowed.
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:43 ` Stephane Eranian
@ 2011-02-28 14:52 ` Lin Ming
2011-02-28 14:55 ` Stephane Eranian
0 siblings, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-02-28 14:52 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, 2011-02-28 at 22:43 +0800, Stephane Eranian wrote:
> On Mon, Feb 28, 2011 at 3:33 PM, Lin Ming <ming.m.lin@intel.com> wrote:
> > On Mon, 2011-02-28 at 20:25 +0800, Stephane Eranian wrote:
> >> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> >> >> This patch adds basic SandyBridge support, including hardware cache
> >> >> events and PEBS events support.
> >> >>
> >> >> LLC-* hareware cache events don't work for now, it depends on the
> >> >> offcore patches.
> >> >
> >> > What's the status of those, Stephane reported some problems last I
> >> > remember?
> >> >
> >> >
> >> >> #define INTEL_EVENT_CONSTRAINT(c, n) \
> >> >> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> >> >> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> >> >> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
> >> >
> >> > That's a particularly bad name, how about something like
> >> >
> >> > INTEL_UEVENT_CONSTRAINT or somesuch.
> >> >
> >> >> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
> >> >> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> >> >> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> >> >> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> >> >> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> >> >> + switch (boot_cpu_data.x86_model) {
> >> >> + case 42: /* SandyBridge */
> >> >> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
> >> >> + break;
> >> >> + default:
> >> >> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> >> >> + }
> >> >> break;
> >> >>
> >> >> default:
> >> >
> >> > We already have this massive model switch right after this function,
> >> > might as well move the pebs constraint assignment there.
> >> >
> >> My PEBS patch was going to cleanup this part, though it was using
> >> it's own switch
> >> statement.
> >
> > Did you send out the patch? A link?
> >
> For what?
I mean your PEBS patch to do the cleanup.
>
> >
> >
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:52 ` Lin Ming
@ 2011-02-28 14:55 ` Stephane Eranian
0 siblings, 0 replies; 30+ messages in thread
From: Stephane Eranian @ 2011-02-28 14:55 UTC (permalink / raw)
To: Lin Ming; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Mon, Feb 28, 2011 at 3:52 PM, Lin Ming <ming.m.lin@intel.com> wrote:
> On Mon, 2011-02-28 at 22:43 +0800, Stephane Eranian wrote:
>> On Mon, Feb 28, 2011 at 3:33 PM, Lin Ming <ming.m.lin@intel.com> wrote:
>> > On Mon, 2011-02-28 at 20:25 +0800, Stephane Eranian wrote:
>> >> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>> >> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
>> >> >> This patch adds basic SandyBridge support, including hardware cache
>> >> >> events and PEBS events support.
>> >> >>
>> >> >> LLC-* hareware cache events don't work for now, it depends on the
>> >> >> offcore patches.
>> >> >
>> >> > What's the status of those, Stephane reported some problems last I
>> >> > remember?
>> >> >
>> >> >
>> >> >> #define INTEL_EVENT_CONSTRAINT(c, n) \
>> >> >> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
>> >> >> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
>> >> >> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>> >> >
>> >> > That's a particularly bad name, how about something like
>> >> >
>> >> > INTEL_UEVENT_CONSTRAINT or somesuch.
>> >> >
>> >> >> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
>> >> >> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
>> >> >> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
>> >> >> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
>> >> >> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> >> >> + switch (boot_cpu_data.x86_model) {
>> >> >> + case 42: /* SandyBridge */
>> >> >> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
>> >> >> + break;
>> >> >> + default:
>> >> >> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> >> >> + }
>> >> >> break;
>> >> >>
>> >> >> default:
>> >> >
>> >> > We already have this massive model switch right after this function,
>> >> > might as well move the pebs constraint assignment there.
>> >> >
>> >> My PEBS patch was going to cleanup this part, though it was using
>> >> it's own switch
>> >> statement.
>> >
>> > Did you send out the patch? A link?
>> >
>> For what?
>
> I mean your PEBS patch to do the cleanup.
>
I am updating it based on our discussion. Running a few tests and it should
be out today.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 9:15 ` Peter Zijlstra
2011-02-28 12:25 ` Stephane Eranian
2011-02-28 14:21 ` Lin Ming
@ 2011-02-28 14:56 ` Lin Ming
2011-02-28 15:11 ` Peter Zijlstra
2011-03-01 7:43 ` Stephane Eranian
3 siblings, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-02-28 14:56 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, Andi Kleen, lkml
On Mon, 2011-02-28 at 17:15 +0800, Peter Zijlstra wrote:
> On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> > This patch adds basic SandyBridge support, including hardware cache
> > events and PEBS events support.
> >
> > LLC-* hareware cache events don't work for now, it depends on the
> > offcore patches.
>
> What's the status of those, Stephane reported some problems last I
> remember?
>
>
> > #define INTEL_EVENT_CONSTRAINT(c, n) \
> > EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> > +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> > + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> That's a particularly bad name, how about something like
>
> INTEL_UEVENT_CONSTRAINT or somesuch.
>
> > @@ -702,7 +738,13 @@ static void intel_ds_init(void)
> > printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> > x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> > x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> > - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> > + switch (boot_cpu_data.x86_model) {
> > + case 42: /* SandyBridge */
> > + x86_pmu.pebs_constraints = intel_snb_pebs_events;
> > + break;
> > + default:
> > + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> > + }
> > break;
> >
> > default:
>
> We already have this massive model switch right after this function,
> might as well move the pebs constraint assignment there.
How about below?
Leave the default pebs constraint assignment in intel_ds_init, and
overwrite it for snb in intel_pmu_init.
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 66712dd..d6edc8a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1182,6 +1182,7 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_snb_event_constraints;
+ x86_pmu.pebs_constraints = intel_snb_pebs_events;
pr_cont("SandyBridge events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 2128755..3e5530b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -740,13 +740,6 @@ static void intel_ds_init(void)
printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
- switch (boot_cpu_data.x86_model) {
- case 42: /* SandyBridge */
- x86_pmu.pebs_constraints = intel_snb_pebs_events;
- break;
- default:
- x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
- }
break;
default:
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 14:56 ` Lin Ming
@ 2011-02-28 15:11 ` Peter Zijlstra
2011-03-01 0:32 ` Lin Ming
0 siblings, 1 reply; 30+ messages in thread
From: Peter Zijlstra @ 2011-02-28 15:11 UTC (permalink / raw)
To: Lin Ming; +Cc: Ingo Molnar, Stephane Eranian, Andi Kleen, lkml
On Mon, 2011-02-28 at 22:56 +0800, Lin Ming wrote:
> @@ -740,13 +740,6 @@ static void intel_ds_init(void)
> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> - switch (boot_cpu_data.x86_model) {
> - case 42: /* SandyBridge */
> - x86_pmu.pebs_constraints = intel_snb_pebs_events;
> - break;
> - default:
> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> - }
> break;
it looks like you lost the nhm table assignment
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 15:11 ` Peter Zijlstra
@ 2011-03-01 0:32 ` Lin Ming
0 siblings, 0 replies; 30+ messages in thread
From: Lin Ming @ 2011-03-01 0:32 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, Stephane Eranian, Andi Kleen, lkml
On Mon, 2011-02-28 at 23:11 +0800, Peter Zijlstra wrote:
> On Mon, 2011-02-28 at 22:56 +0800, Lin Ming wrote:
> > @@ -740,13 +740,6 @@ static void intel_ds_init(void)
> > printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> > x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> > x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> > - switch (boot_cpu_data.x86_model) {
> > - case 42: /* SandyBridge */
> > - x86_pmu.pebs_constraints = intel_snb_pebs_events;
> > - break;
> > - default:
> > - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> > - }
> > break;
>
> it looks like you lost the nhm table assignment
Ah, yes. Will update all in v3 patch.
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-02-28 9:15 ` Peter Zijlstra
` (2 preceding siblings ...)
2011-02-28 14:56 ` Lin Ming
@ 2011-03-01 7:43 ` Stephane Eranian
2011-03-01 8:21 ` Lin Ming
2011-03-01 8:45 ` Lin Ming
3 siblings, 2 replies; 30+ messages in thread
From: Stephane Eranian @ 2011-03-01 7:43 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Lin Ming, Ingo Molnar, Andi Kleen, lkml
On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
>> This patch adds basic SandyBridge support, including hardware cache
>> events and PEBS events support.
>>
>> LLC-* hareware cache events don't work for now, it depends on the
>> offcore patches.
>
> What's the status of those, Stephane reported some problems last I
> remember?
>
I tried the trick I mentioned and it seems to work.
Something like below with hwc->extra_alloc.
Could probably find a better name for that field.
static struct event_constraint *
intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
{
struct hw_perf_event *hwc = &event->hw;
unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
struct event_constraint *c;
struct intel_percore *pc;
struct er_account *era;
int i;
int free_slot;
int found;
if (!x86_pmu.percore_constraints)
return NULL;
if (hwc->extra_alloc)
return NULL;
for (c = x86_pmu.percore_constraints; c->cmask; c++) {
if (e != c->code)
continue;
/*
* Allocate resource per core.
*/
c = NULL;
pc = cpuc->per_core;
if (!pc)
break;
c = &emptyconstraint;
raw_spin_lock(&pc->lock);
free_slot = -1;
found = 0;
for (i = 0; i < MAX_EXTRA_REGS; i++) {
era = &pc->regs[i];
if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
/* Allow sharing same config */
if (hwc->extra_config == era->extra_config) {
era->ref++;
cpuc->percore_used = 1;
hwc->extra_alloc = 1;
c = NULL;
}
/* else conflict */
found = 1;
break;
} else if (era->ref == 0 && free_slot == -1)
free_slot = i;
}
if (!found && free_slot != -1) {
era = &pc->regs[free_slot];
era->ref = 1;
era->extra_reg = hwc->extra_reg;
era->extra_config = hwc->extra_config;
cpuc->percore_used = 1;
hwc->extra_alloc = 1;
c = NULL;
}
raw_spin_unlock(&pc->lock);
return c;
}
return NULL;
}
static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
struct perf_event *event)
{
struct extra_reg *er;
struct intel_percore *pc;
struct er_account *era;
struct hw_perf_event *hwc = &event->hw;
int i, allref;
if (!cpuc->percore_used)
return;
for (er = x86_pmu.extra_regs; er->msr; er++) {
if (er->event != (hwc->config & er->config_mask))
continue;
pc = cpuc->per_core;
raw_spin_lock(&pc->lock);
for (i = 0; i < MAX_EXTRA_REGS; i++) {
era = &pc->regs[i];
if (era->ref > 0 &&
era->extra_config == hwc->extra_config &&
era->extra_reg == er->msr) {
era->ref--;
hwc->extra_alloc = 0;
break;
}
}
allref = 0;
for (i = 0; i < MAX_EXTRA_REGS; i++)
allref += pc->regs[i].ref;
if (allref == 0)
cpuc->percore_used = 0;
raw_spin_unlock(&pc->lock);
break;
}
}
>
>> #define INTEL_EVENT_CONSTRAINT(c, n) \
>> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
>> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
>> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>
> That's a particularly bad name, how about something like
>
> INTEL_UEVENT_CONSTRAINT or somesuch.
>
>> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
>> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
>> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
>> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
>> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> + switch (boot_cpu_data.x86_model) {
>> + case 42: /* SandyBridge */
>> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
>> + break;
>> + default:
>> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> + }
>> break;
>>
>> default:
>
> We already have this massive model switch right after this function,
> might as well move the pebs constraint assignment there.
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-03-01 7:43 ` Stephane Eranian
@ 2011-03-01 8:21 ` Lin Ming
2011-03-01 8:45 ` Lin Ming
1 sibling, 0 replies; 30+ messages in thread
From: Lin Ming @ 2011-03-01 8:21 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Tue, 2011-03-01 at 15:43 +0800, Stephane Eranian wrote:
> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> >> This patch adds basic SandyBridge support, including hardware cache
> >> events and PEBS events support.
> >>
> >> LLC-* hareware cache events don't work for now, it depends on the
> >> offcore patches.
> >
> > What's the status of those, Stephane reported some problems last I
> > remember?
> >
> I tried the trick I mentioned and it seems to work.
Let me make sure I understand the problem correctly.
perf top -C 0 -e LLC-loads -e LLC-store-misses
LLC-loads and LLC-store-misses events use different extra_config.
1. LLC-loads submitted successfully, era->ref=1
2. LLC-loads event is resubmitted because incremental scheduling,
era->ref=2.
3. LLC-store-misses is submitted, but fail because conflict.
Step 2 is wrong, the reference count should not be incremented with the
same event.
Is my understanding right?
Lin Ming
>
> Something like below with hwc->extra_alloc.
> Could probably find a better name for that field.
>
> static struct event_constraint *
> intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> {
> struct hw_perf_event *hwc = &event->hw;
> unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
> struct event_constraint *c;
> struct intel_percore *pc;
> struct er_account *era;
> int i;
> int free_slot;
> int found;
>
> if (!x86_pmu.percore_constraints)
> return NULL;
>
> if (hwc->extra_alloc)
> return NULL;
>
> for (c = x86_pmu.percore_constraints; c->cmask; c++) {
> if (e != c->code)
> continue;
>
> /*
> * Allocate resource per core.
> */
> c = NULL;
> pc = cpuc->per_core;
> if (!pc)
> break;
> c = &emptyconstraint;
> raw_spin_lock(&pc->lock);
> free_slot = -1;
> found = 0;
> for (i = 0; i < MAX_EXTRA_REGS; i++) {
> era = &pc->regs[i];
> if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
> /* Allow sharing same config */
> if (hwc->extra_config == era->extra_config) {
> era->ref++;
> cpuc->percore_used = 1;
> hwc->extra_alloc = 1;
> c = NULL;
> }
> /* else conflict */
> found = 1;
> break;
> } else if (era->ref == 0 && free_slot == -1)
> free_slot = i;
> }
> if (!found && free_slot != -1) {
> era = &pc->regs[free_slot];
> era->ref = 1;
> era->extra_reg = hwc->extra_reg;
> era->extra_config = hwc->extra_config;
> cpuc->percore_used = 1;
> hwc->extra_alloc = 1;
> c = NULL;
> }
> raw_spin_unlock(&pc->lock);
> return c;
> }
>
> return NULL;
> }
>
> static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
> struct perf_event *event)
> {
> struct extra_reg *er;
> struct intel_percore *pc;
> struct er_account *era;
> struct hw_perf_event *hwc = &event->hw;
> int i, allref;
>
> if (!cpuc->percore_used)
> return;
>
> for (er = x86_pmu.extra_regs; er->msr; er++) {
> if (er->event != (hwc->config & er->config_mask))
> continue;
>
> pc = cpuc->per_core;
> raw_spin_lock(&pc->lock);
> for (i = 0; i < MAX_EXTRA_REGS; i++) {
> era = &pc->regs[i];
> if (era->ref > 0 &&
> era->extra_config == hwc->extra_config &&
> era->extra_reg == er->msr) {
> era->ref--;
> hwc->extra_alloc = 0;
> break;
> }
> }
> allref = 0;
> for (i = 0; i < MAX_EXTRA_REGS; i++)
> allref += pc->regs[i].ref;
> if (allref == 0)
> cpuc->percore_used = 0;
> raw_spin_unlock(&pc->lock);
> break;
> }
> }
>
> >
> >> #define INTEL_EVENT_CONSTRAINT(c, n) \
> >> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> >> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> >> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
> >
> > That's a particularly bad name, how about something like
> >
> > INTEL_UEVENT_CONSTRAINT or somesuch.
> >
> >> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
> >> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> >> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> >> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> >> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> >> + switch (boot_cpu_data.x86_model) {
> >> + case 42: /* SandyBridge */
> >> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
> >> + break;
> >> + default:
> >> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> >> + }
> >> break;
> >>
> >> default:
> >
> > We already have this massive model switch right after this function,
> > might as well move the pebs constraint assignment there.
> >
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-03-01 7:43 ` Stephane Eranian
2011-03-01 8:21 ` Lin Ming
@ 2011-03-01 8:45 ` Lin Ming
2011-03-01 8:57 ` Stephane Eranian
1 sibling, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-03-01 8:45 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Tue, 2011-03-01 at 15:43 +0800, Stephane Eranian wrote:
> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> >> This patch adds basic SandyBridge support, including hardware cache
> >> events and PEBS events support.
> >>
> >> LLC-* hareware cache events don't work for now, it depends on the
> >> offcore patches.
> >
> > What's the status of those, Stephane reported some problems last I
> > remember?
> >
> I tried the trick I mentioned and it seems to work.
>
> Something like below with hwc->extra_alloc.
> Could probably find a better name for that field.
Stephane,
I'll integrate below changes to the offcore patches, OK?
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index f152930..ac1d100 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -906,7 +906,7 @@ intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
int free_slot;
int found;
- if (!x86_pmu.percore_constraints)
+ if (!x86_pmu.percore_constraints || hwc->extra_alloc)
return NULL;
for (c = x86_pmu.percore_constraints; c->cmask; c++) {
@@ -931,6 +931,7 @@ intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
if (hwc->extra_config == era->extra_config) {
era->ref++;
cpuc->percore_used = 1;
+ hwc->extra_alloc = 1;
c = NULL;
}
/* else conflict */
@@ -945,6 +946,7 @@ intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
era->extra_reg = hwc->extra_reg;
era->extra_config = hwc->extra_config;
cpuc->percore_used = 1;
+ hwc->extra_alloc = 1;
c = NULL;
}
raw_spin_unlock(&pc->lock);
@@ -998,6 +1000,7 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
era->extra_config == hwc->extra_config &&
era->extra_reg == er->msr) {
era->ref--;
+ hwc->extra_alloc = 0;
break;
}
}
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f531ce3..dbbf33a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -546,6 +546,7 @@ struct hw_perf_event {
int last_cpu;
unsigned int extra_reg;
u64 extra_config;
+ int extra_alloc;
};
struct { /* software */
struct hrtimer hrtimer;
>
> static struct event_constraint *
> intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> {
> struct hw_perf_event *hwc = &event->hw;
> unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
> struct event_constraint *c;
> struct intel_percore *pc;
> struct er_account *era;
> int i;
> int free_slot;
> int found;
>
> if (!x86_pmu.percore_constraints)
> return NULL;
>
> if (hwc->extra_alloc)
> return NULL;
>
> for (c = x86_pmu.percore_constraints; c->cmask; c++) {
> if (e != c->code)
> continue;
>
> /*
> * Allocate resource per core.
> */
> c = NULL;
> pc = cpuc->per_core;
> if (!pc)
> break;
> c = &emptyconstraint;
> raw_spin_lock(&pc->lock);
> free_slot = -1;
> found = 0;
> for (i = 0; i < MAX_EXTRA_REGS; i++) {
> era = &pc->regs[i];
> if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
> /* Allow sharing same config */
> if (hwc->extra_config == era->extra_config) {
> era->ref++;
> cpuc->percore_used = 1;
> hwc->extra_alloc = 1;
> c = NULL;
> }
> /* else conflict */
> found = 1;
> break;
> } else if (era->ref == 0 && free_slot == -1)
> free_slot = i;
> }
> if (!found && free_slot != -1) {
> era = &pc->regs[free_slot];
> era->ref = 1;
> era->extra_reg = hwc->extra_reg;
> era->extra_config = hwc->extra_config;
> cpuc->percore_used = 1;
> hwc->extra_alloc = 1;
> c = NULL;
> }
> raw_spin_unlock(&pc->lock);
> return c;
> }
>
> return NULL;
> }
>
> static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
> struct perf_event *event)
> {
> struct extra_reg *er;
> struct intel_percore *pc;
> struct er_account *era;
> struct hw_perf_event *hwc = &event->hw;
> int i, allref;
>
> if (!cpuc->percore_used)
> return;
>
> for (er = x86_pmu.extra_regs; er->msr; er++) {
> if (er->event != (hwc->config & er->config_mask))
> continue;
>
> pc = cpuc->per_core;
> raw_spin_lock(&pc->lock);
> for (i = 0; i < MAX_EXTRA_REGS; i++) {
> era = &pc->regs[i];
> if (era->ref > 0 &&
> era->extra_config == hwc->extra_config &&
> era->extra_reg == er->msr) {
> era->ref--;
> hwc->extra_alloc = 0;
> break;
> }
> }
> allref = 0;
> for (i = 0; i < MAX_EXTRA_REGS; i++)
> allref += pc->regs[i].ref;
> if (allref == 0)
> cpuc->percore_used = 0;
> raw_spin_unlock(&pc->lock);
> break;
> }
> }
>
> >
> >> #define INTEL_EVENT_CONSTRAINT(c, n) \
> >> EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
> >> +#define INTEL_EVENT_CONSTRAINT2(c, n) \
> >> + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
> >
> > That's a particularly bad name, how about something like
> >
> > INTEL_UEVENT_CONSTRAINT or somesuch.
> >
> >> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
> >> printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
> >> x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
> >> x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
> >> - x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> >> + switch (boot_cpu_data.x86_model) {
> >> + case 42: /* SandyBridge */
> >> + x86_pmu.pebs_constraints = intel_snb_pebs_events;
> >> + break;
> >> + default:
> >> + x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
> >> + }
> >> break;
> >>
> >> default:
> >
> > We already have this massive model switch right after this function,
> > might as well move the pebs constraint assignment there.
> >
^ permalink raw reply related [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-03-01 8:45 ` Lin Ming
@ 2011-03-01 8:57 ` Stephane Eranian
2011-03-01 9:39 ` Stephane Eranian
0 siblings, 1 reply; 30+ messages in thread
From: Stephane Eranian @ 2011-03-01 8:57 UTC (permalink / raw)
To: Lin Ming; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
[-- Warning: decoded text below may be mangled, UTF-8 assumed --]
[-- Attachment #1: Type: text/plain; charset=UTF-8, Size: 10281 bytes --]
On Tue, Mar 1, 2011 at 9:45 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> On Tue, 2011-03-01 at 15:43 +0800, Stephane Eranian wrote:
>> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
>> >> This patch adds basic SandyBridge support, including hardware cache
>> >> events and PEBS events support.
>> >>
>> >> LLC-* hareware cache events don't work for now, it depends on the
>> >> offcore patches.
>> >
>> > What's the status of those, Stephane reported some problems last I
>> > remember?
>> >
>> I tried the trick I mentioned and it seems to work.
>>
>> Something like below with hwc->extra_alloc.
>> Could probably find a better name for that field.
>
> Stephane,
>
> I'll integrate below changes to the offcore patches, OK?
>
Let me try one more test on this.
I want to show the case the caused the problem in the first place.
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index f152930..ac1d100 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -906,7 +906,7 @@ intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> Â Â Â Â int free_slot;
> Â Â Â Â int found;
>
> - Â Â Â if (!x86_pmu.percore_constraints)
> + Â Â Â if (!x86_pmu.percore_constraints || hwc->extra_alloc)
> Â Â Â Â Â Â Â Â return NULL;
>
> Â Â Â Â for (c = x86_pmu.percore_constraints; c->cmask; c++) {
> @@ -931,6 +931,7 @@ intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â if (hwc->extra_config == era->extra_config) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â era->ref++;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â cpuc->percore_used = 1;
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â hwc->extra_alloc = 1;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â c = NULL;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â }
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â /* else conflict */
> @@ -945,6 +946,7 @@ intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> Â Â Â Â Â Â Â Â Â Â Â Â era->extra_reg = hwc->extra_reg;
> Â Â Â Â Â Â Â Â Â Â Â Â era->extra_config = hwc->extra_config;
> Â Â Â Â Â Â Â Â Â Â Â Â cpuc->percore_used = 1;
> + Â Â Â Â Â Â Â Â Â Â Â hwc->extra_alloc = 1;
> Â Â Â Â Â Â Â Â Â Â Â Â c = NULL;
> Â Â Â Â Â Â Â Â }
> Â Â Â Â Â Â Â Â raw_spin_unlock(&pc->lock);
> @@ -998,6 +1000,7 @@ static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â era->extra_config == hwc->extra_config &&
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â era->extra_reg == er->msr) {
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â era->ref--;
> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â hwc->extra_alloc = 0;
> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
> Â Â Â Â Â Â Â Â Â Â Â Â }
> Â Â Â Â Â Â Â Â }
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index f531ce3..dbbf33a 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -546,6 +546,7 @@ struct hw_perf_event {
>             int       last_cpu;
>             unsigned int   extra_reg;
> Â Â Â Â Â Â Â Â Â Â Â Â u64 Â Â Â Â Â Â extra_config;
> +            int       extra_alloc;
> Â Â Â Â Â Â Â Â };
> Â Â Â Â Â Â Â Â struct { /* software */
>             struct hrtimer  hrtimer;
>
>
>>
>> static struct event_constraint *
>> intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
>> {
>> Â Â Â Â struct hw_perf_event *hwc = &event->hw;
>> Â Â Â Â unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
>> Â Â Â Â struct event_constraint *c;
>> Â Â Â Â struct intel_percore *pc;
>> Â Â Â Â struct er_account *era;
>> Â Â Â Â int i;
>> Â Â Â Â int free_slot;
>> Â Â Â Â int found;
>>
>> Â Â Â Â if (!x86_pmu.percore_constraints)
>> Â Â Â Â Â Â Â Â return NULL;
>>
>> Â Â Â Â if (hwc->extra_alloc)
>> Â Â Â Â Â Â Â Â return NULL;
>>
>> Â Â Â Â for (c = x86_pmu.percore_constraints; c->cmask; c++) {
>> Â Â Â Â Â Â Â Â if (e != c->code)
>> Â Â Â Â Â Â Â Â Â Â Â Â continue;
>>
>> Â Â Â Â Â Â Â Â /*
>> Â Â Â Â Â Â Â Â Â * Allocate resource per core.
>> Â Â Â Â Â Â Â Â Â */
>> Â Â Â Â Â Â Â Â c = NULL;
>> Â Â Â Â Â Â Â Â pc = cpuc->per_core;
>> Â Â Â Â Â Â Â Â if (!pc)
>> Â Â Â Â Â Â Â Â Â Â Â Â break;
>> Â Â Â Â Â Â Â Â c = &emptyconstraint;
>> Â Â Â Â Â Â Â Â raw_spin_lock(&pc->lock);
>> Â Â Â Â Â Â Â Â free_slot = -1;
>> Â Â Â Â Â Â Â Â found = 0;
>> Â Â Â Â Â Â Â Â for (i = 0; i < MAX_EXTRA_REGS; i++) {
>> Â Â Â Â Â Â Â Â Â Â Â Â era = &pc->regs[i];
>> Â Â Â Â Â Â Â Â Â Â Â Â if (era->ref > 0 && hwc->extra_reg == era->extra_reg) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â /* Allow sharing same config */
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â if (hwc->extra_config == era->extra_config) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â era->ref++;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â cpuc->percore_used = 1;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â hwc->extra_alloc = 1;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â c = NULL;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â }
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â /* else conflict */
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â found = 1;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
>> Â Â Â Â Â Â Â Â Â Â Â Â } else if (era->ref == 0 && free_slot == -1)
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â free_slot = i;
>> Â Â Â Â Â Â Â Â }
>> Â Â Â Â Â Â Â Â if (!found && free_slot != -1) {
>> Â Â Â Â Â Â Â Â Â Â Â Â era = &pc->regs[free_slot];
>> Â Â Â Â Â Â Â Â Â Â Â Â era->ref = 1;
>> Â Â Â Â Â Â Â Â Â Â Â Â era->extra_reg = hwc->extra_reg;
>> Â Â Â Â Â Â Â Â Â Â Â Â era->extra_config = hwc->extra_config;
>> Â Â Â Â Â Â Â Â Â Â Â Â cpuc->percore_used = 1;
>> Â Â Â Â Â Â Â Â Â Â Â Â hwc->extra_alloc = 1;
>> Â Â Â Â Â Â Â Â Â Â Â Â c = NULL;
>> Â Â Â Â Â Â Â Â }
>> Â Â Â Â Â Â Â Â raw_spin_unlock(&pc->lock);
>> Â Â Â Â Â Â Â Â return c;
>> Â Â Â Â }
>>
>> Â Â Â Â return NULL;
>> }
>>
>> static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â struct perf_event *event)
>> {
>> Â Â Â Â struct extra_reg *er;
>> Â Â Â Â struct intel_percore *pc;
>> Â Â Â Â struct er_account *era;
>> Â Â Â Â struct hw_perf_event *hwc = &event->hw;
>> Â Â Â Â int i, allref;
>>
>> Â Â Â Â if (!cpuc->percore_used)
>> Â Â Â Â Â Â Â Â return;
>>
>> Â Â Â Â for (er = x86_pmu.extra_regs; er->msr; er++) {
>> Â Â Â Â Â Â Â Â if (er->event != (hwc->config & er->config_mask))
>> Â Â Â Â Â Â Â Â Â Â Â Â continue;
>>
>> Â Â Â Â Â Â Â Â pc = cpuc->per_core;
>> Â Â Â Â Â Â Â Â raw_spin_lock(&pc->lock);
>> Â Â Â Â Â Â Â Â for (i = 0; i < MAX_EXTRA_REGS; i++) {
>> Â Â Â Â Â Â Â Â Â Â Â Â era = &pc->regs[i];
>> Â Â Â Â Â Â Â Â Â Â Â Â if (era->ref > 0 &&
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â era->extra_config == hwc->extra_config &&
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â era->extra_reg == er->msr) {
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â era->ref--;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â hwc->extra_alloc = 0;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
>> Â Â Â Â Â Â Â Â Â Â Â Â }
>> Â Â Â Â Â Â Â Â }
>> Â Â Â Â Â Â Â Â allref = 0;
>> Â Â Â Â Â Â Â Â for (i = 0; i < MAX_EXTRA_REGS; i++)
>> Â Â Â Â Â Â Â Â Â Â Â Â allref += pc->regs[i].ref;
>> Â Â Â Â Â Â Â Â if (allref == 0)
>> Â Â Â Â Â Â Â Â Â Â Â Â cpuc->percore_used = 0;
>> Â Â Â Â Â Â Â Â raw_spin_unlock(&pc->lock);
>> Â Â Â Â Â Â Â Â break;
>> Â Â Â Â }
>> }
>>
>> >
>> >> Â #define INTEL_EVENT_CONSTRAINT(c, n) \
>> >> Â Â Â EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT)
>> >> +#define INTEL_EVENT_CONSTRAINT2(c, n) Â Â Â Â \
>> >> + Â Â EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
>> >
>> > That's a particularly bad name, how about something like
>> >
>> > INTEL_UEVENT_CONSTRAINT or somesuch.
>> >
>> >> @@ -702,7 +738,13 @@ static void intel_ds_init(void)
>> >> Â Â Â Â Â Â Â Â Â Â Â printk(KERN_CONT "PEBS fmt1%c, ", pebs_type);
>> >> Â Â Â Â Â Â Â Â Â Â Â x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm);
>> >> Â Â Â Â Â Â Â Â Â Â Â x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
>> >> - Â Â Â Â Â Â Â Â Â Â x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> >> + Â Â Â Â Â Â Â Â Â Â switch (boot_cpu_data.x86_model) {
>> >> + Â Â Â Â Â Â Â Â Â Â case 42: /* SandyBridge */
>> >> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â x86_pmu.pebs_constraints = intel_snb_pebs_events;
>> >> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â break;
>> >> + Â Â Â Â Â Â Â Â Â Â default:
>> >> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â x86_pmu.pebs_constraints = intel_nehalem_pebs_events;
>> >> + Â Â Â Â Â Â Â Â Â Â }
>> >> Â Â Â Â Â Â Â Â Â Â Â break;
>> >>
>> >> Â Â Â Â Â Â Â default:
>> >
>> > We already have this massive model switch right after this function,
>> > might as well move the pebs constraint assignment there.
>> >
>
>
>
ÿôèº{.nÇ+·®+%Ëÿ±éݶ\x17¥wÿº{.nÇ+·¥{±þG«éÿ{ayº\x1dÊÚë,j\a¢f£¢·hïêÿêçz_è®\x03(éÝ¢j"ú\x1a¶^[m§ÿÿ¾\a«þG«éÿ¢¸?¨èÚ&£ø§~á¶iOæ¬z·vØ^\x14\x04\x1a¶^[m§ÿÿÃ\fÿ¶ìÿ¢¸?I¥
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-03-01 8:57 ` Stephane Eranian
@ 2011-03-01 9:39 ` Stephane Eranian
2011-03-01 15:07 ` Lin Ming
0 siblings, 1 reply; 30+ messages in thread
From: Stephane Eranian @ 2011-03-01 9:39 UTC (permalink / raw)
To: Lin Ming; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Tue, Mar 1, 2011 at 9:57 AM, Stephane Eranian <eranian@google.com> wrote:
> On Tue, Mar 1, 2011 at 9:45 AM, Lin Ming <ming.m.lin@intel.com> wrote:
>> On Tue, 2011-03-01 at 15:43 +0800, Stephane Eranian wrote:
>>> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>>> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
>>> >> This patch adds basic SandyBridge support, including hardware cache
>>> >> events and PEBS events support.
>>> >>
>>> >> LLC-* hareware cache events don't work for now, it depends on the
>>> >> offcore patches.
>>> >
>>> > What's the status of those, Stephane reported some problems last I
>>> > remember?
>>> >
>>> I tried the trick I mentioned and it seems to work.
>>>
>>> Something like below with hwc->extra_alloc.
>>> Could probably find a better name for that field.
>>
>> Stephane,
>>
>> I'll integrate below changes to the offcore patches, OK?
>>
> Let me try one more test on this.
> I want to show the case the caused the problem in the first place.
>
There you go:
$ task -e offcore_response_0:DMND_DATA_RD:local_dram -e
offcore_response_0:DMND_DATA_RD:local_dram noploop 1
Here the two instances of offcore_response are in two different event groups.
I instrumented get/put percore constraint routines. get1 is where you do the
first allocation, get2 is where you do ref++.
The scheduling algorithm will do:
- submit 1st group
- schedule 1st group
- submit 2nd group
- schedule 1st + 2nd group
Which results in the following trace:
[ 109.855713] CPU0 get1 cfg=1301b7 ref=1
[ 109.855717] CPU0 get2 cfg=1301b7 ref=2
[ 109.855718] CPU0 get2 cfg=1301b7 ref=3 <-- this one is bogus
[ 109.856606] CPU0 put cfg=1301b7 ref=2
[ 109.856609] CPU0 put cfg=1301b7 ref=1 <- don't free the resource
[ 109.856616] CPU0 get2 cfg=1301b7 ref=2
[ 109.856619] CPU0 get2 cfg=1301b7 ref=3
[ 109.856622] CPU0 get2 cfg=1301b7 ref=4
[ 110.742151] CPU0 put cfg=1301b7 ref=3
[ 110.742154] CPU0 put cfg=1301b7 ref=2
[ 110.742160] CPU0 get2 cfg=1301b7 ref=3
[ 110.742161] CPU0 get2 cfg=1301b7 ref=4
[ 110.742163] CPU0 get2 cfg=1301b7 ref=5
[ 110.854448] CPU0 put cfg=1301b7 ref=4
[ 110.854450] CPU0 put cfg=1301b7 ref=3
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-03-01 9:39 ` Stephane Eranian
@ 2011-03-01 15:07 ` Lin Ming
2011-03-01 15:09 ` Stephane Eranian
0 siblings, 1 reply; 30+ messages in thread
From: Lin Ming @ 2011-03-01 15:07 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Tue, 2011-03-01 at 17:39 +0800, Stephane Eranian wrote:
> On Tue, Mar 1, 2011 at 9:57 AM, Stephane Eranian <eranian@google.com> wrote:
> > On Tue, Mar 1, 2011 at 9:45 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> >> On Tue, 2011-03-01 at 15:43 +0800, Stephane Eranian wrote:
> >>> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >>> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> >>> >> This patch adds basic SandyBridge support, including hardware cache
> >>> >> events and PEBS events support.
> >>> >>
> >>> >> LLC-* hareware cache events don't work for now, it depends on the
> >>> >> offcore patches.
> >>> >
> >>> > What's the status of those, Stephane reported some problems last I
> >>> > remember?
> >>> >
> >>> I tried the trick I mentioned and it seems to work.
> >>>
> >>> Something like below with hwc->extra_alloc.
> >>> Could probably find a better name for that field.
> >>
> >> Stephane,
> >>
> >> I'll integrate below changes to the offcore patches, OK?
> >>
> > Let me try one more test on this.
> > I want to show the case the caused the problem in the first place.
> >
>
> There you go:
>
> $ task -e offcore_response_0:DMND_DATA_RD:local_dram -e
> offcore_response_0:DMND_DATA_RD:local_dram noploop 1
>
> Here the two instances of offcore_response are in two different event groups.
> I instrumented get/put percore constraint routines. get1 is where you do the
> first allocation, get2 is where you do ref++.
>
> The scheduling algorithm will do:
> - submit 1st group
> - schedule 1st group
> - submit 2nd group
> - schedule 1st + 2nd group
>
> Which results in the following trace:
>
> [ 109.855713] CPU0 get1 cfg=1301b7 ref=1
> [ 109.855717] CPU0 get2 cfg=1301b7 ref=2
> [ 109.855718] CPU0 get2 cfg=1301b7 ref=3 <-- this one is bogus
> [ 109.856606] CPU0 put cfg=1301b7 ref=2
> [ 109.856609] CPU0 put cfg=1301b7 ref=1 <- don't free the resource
Do you mean the issue is still there even with your extra_alloc patch
applied?
>
> [ 109.856616] CPU0 get2 cfg=1301b7 ref=2
> [ 109.856619] CPU0 get2 cfg=1301b7 ref=3
> [ 109.856622] CPU0 get2 cfg=1301b7 ref=4
> [ 110.742151] CPU0 put cfg=1301b7 ref=3
> [ 110.742154] CPU0 put cfg=1301b7 ref=2
>
> [ 110.742160] CPU0 get2 cfg=1301b7 ref=3
> [ 110.742161] CPU0 get2 cfg=1301b7 ref=4
> [ 110.742163] CPU0 get2 cfg=1301b7 ref=5
> [ 110.854448] CPU0 put cfg=1301b7 ref=4
> [ 110.854450] CPU0 put cfg=1301b7 ref=3
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-03-01 15:07 ` Lin Ming
@ 2011-03-01 15:09 ` Stephane Eranian
2011-03-01 15:18 ` Lin Ming
0 siblings, 1 reply; 30+ messages in thread
From: Stephane Eranian @ 2011-03-01 15:09 UTC (permalink / raw)
To: Lin Ming; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Tue, Mar 1, 2011 at 4:07 PM, Lin Ming <ming.m.lin@intel.com> wrote:
> On Tue, 2011-03-01 at 17:39 +0800, Stephane Eranian wrote:
>> On Tue, Mar 1, 2011 at 9:57 AM, Stephane Eranian <eranian@google.com> wrote:
>> > On Tue, Mar 1, 2011 at 9:45 AM, Lin Ming <ming.m.lin@intel.com> wrote:
>> >> On Tue, 2011-03-01 at 15:43 +0800, Stephane Eranian wrote:
>> >>> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>> >>> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
>> >>> >> This patch adds basic SandyBridge support, including hardware cache
>> >>> >> events and PEBS events support.
>> >>> >>
>> >>> >> LLC-* hareware cache events don't work for now, it depends on the
>> >>> >> offcore patches.
>> >>> >
>> >>> > What's the status of those, Stephane reported some problems last I
>> >>> > remember?
>> >>> >
>> >>> I tried the trick I mentioned and it seems to work.
>> >>>
>> >>> Something like below with hwc->extra_alloc.
>> >>> Could probably find a better name for that field.
>> >>
>> >> Stephane,
>> >>
>> >> I'll integrate below changes to the offcore patches, OK?
>> >>
>> > Let me try one more test on this.
>> > I want to show the case the caused the problem in the first place.
>> >
>>
>> There you go:
>>
>> $ task -e offcore_response_0:DMND_DATA_RD:local_dram -e
>> offcore_response_0:DMND_DATA_RD:local_dram noploop 1
>>
>> Here the two instances of offcore_response are in two different event groups.
>> I instrumented get/put percore constraint routines. get1 is where you do the
>> first allocation, get2 is where you do ref++.
>>
>> The scheduling algorithm will do:
>> - submit 1st group
>> - schedule 1st group
>> - submit 2nd group
>> - schedule 1st + 2nd group
>>
>> Which results in the following trace:
>>
>> [ 109.855713] CPU0 get1 cfg=1301b7 ref=1
>> [ 109.855717] CPU0 get2 cfg=1301b7 ref=2
>> [ 109.855718] CPU0 get2 cfg=1301b7 ref=3 <-- this one is bogus
>> [ 109.856606] CPU0 put cfg=1301b7 ref=2
>> [ 109.856609] CPU0 put cfg=1301b7 ref=1 <- don't free the resource
>
> Do you mean the issue is still there even with your extra_alloc patch
> applied?
>
No, I am showing you what happens without it.
If you try with it, it should work.
>>
>> [ 109.856616] CPU0 get2 cfg=1301b7 ref=2
>> [ 109.856619] CPU0 get2 cfg=1301b7 ref=3
>> [ 109.856622] CPU0 get2 cfg=1301b7 ref=4
>> [ 110.742151] CPU0 put cfg=1301b7 ref=3
>> [ 110.742154] CPU0 put cfg=1301b7 ref=2
>>
>> [ 110.742160] CPU0 get2 cfg=1301b7 ref=3
>> [ 110.742161] CPU0 get2 cfg=1301b7 ref=4
>> [ 110.742163] CPU0 get2 cfg=1301b7 ref=5
>> [ 110.854448] CPU0 put cfg=1301b7 ref=4
>> [ 110.854450] CPU0 put cfg=1301b7 ref=3
>
>
>
^ permalink raw reply [flat|nested] 30+ messages in thread
* Re: [PATCH v2 -tip] perf: x86, add SandyBridge support
2011-03-01 15:09 ` Stephane Eranian
@ 2011-03-01 15:18 ` Lin Ming
0 siblings, 0 replies; 30+ messages in thread
From: Lin Ming @ 2011-03-01 15:18 UTC (permalink / raw)
To: Stephane Eranian; +Cc: Peter Zijlstra, Ingo Molnar, Andi Kleen, lkml
On Tue, 2011-03-01 at 23:09 +0800, Stephane Eranian wrote:
> On Tue, Mar 1, 2011 at 4:07 PM, Lin Ming <ming.m.lin@intel.com> wrote:
> > On Tue, 2011-03-01 at 17:39 +0800, Stephane Eranian wrote:
> >> On Tue, Mar 1, 2011 at 9:57 AM, Stephane Eranian <eranian@google.com> wrote:
> >> > On Tue, Mar 1, 2011 at 9:45 AM, Lin Ming <ming.m.lin@intel.com> wrote:
> >> >> On Tue, 2011-03-01 at 15:43 +0800, Stephane Eranian wrote:
> >> >>> On Mon, Feb 28, 2011 at 10:15 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> >> >>> > On Mon, 2011-02-28 at 15:22 +0800, Lin Ming wrote:
> >> >>> >> This patch adds basic SandyBridge support, including hardware cache
> >> >>> >> events and PEBS events support.
> >> >>> >>
> >> >>> >> LLC-* hareware cache events don't work for now, it depends on the
> >> >>> >> offcore patches.
> >> >>> >
> >> >>> > What's the status of those, Stephane reported some problems last I
> >> >>> > remember?
> >> >>> >
> >> >>> I tried the trick I mentioned and it seems to work.
> >> >>>
> >> >>> Something like below with hwc->extra_alloc.
> >> >>> Could probably find a better name for that field.
> >> >>
> >> >> Stephane,
> >> >>
> >> >> I'll integrate below changes to the offcore patches, OK?
> >> >>
> >> > Let me try one more test on this.
> >> > I want to show the case the caused the problem in the first place.
> >> >
> >>
> >> There you go:
> >>
> >> $ task -e offcore_response_0:DMND_DATA_RD:local_dram -e
> >> offcore_response_0:DMND_DATA_RD:local_dram noploop 1
> >>
> >> Here the two instances of offcore_response are in two different event groups.
> >> I instrumented get/put percore constraint routines. get1 is where you do the
> >> first allocation, get2 is where you do ref++.
> >>
> >> The scheduling algorithm will do:
> >> - submit 1st group
> >> - schedule 1st group
> >> - submit 2nd group
> >> - schedule 1st + 2nd group
> >>
> >> Which results in the following trace:
> >>
> >> [ 109.855713] CPU0 get1 cfg=1301b7 ref=1
> >> [ 109.855717] CPU0 get2 cfg=1301b7 ref=2
> >> [ 109.855718] CPU0 get2 cfg=1301b7 ref=3 <-- this one is bogus
> >> [ 109.856606] CPU0 put cfg=1301b7 ref=2
> >> [ 109.856609] CPU0 put cfg=1301b7 ref=1 <- don't free the resource
> >
> > Do you mean the issue is still there even with your extra_alloc patch
> > applied?
> >
> No, I am showing you what happens without it.
> If you try with it, it should work.
Got it. I'll send out a new version with all the fixes.
Thanks.
>
> >>
> >> [ 109.856616] CPU0 get2 cfg=1301b7 ref=2
> >> [ 109.856619] CPU0 get2 cfg=1301b7 ref=3
> >> [ 109.856622] CPU0 get2 cfg=1301b7 ref=4
> >> [ 110.742151] CPU0 put cfg=1301b7 ref=3
> >> [ 110.742154] CPU0 put cfg=1301b7 ref=2
> >>
> >> [ 110.742160] CPU0 get2 cfg=1301b7 ref=3
> >> [ 110.742161] CPU0 get2 cfg=1301b7 ref=4
> >> [ 110.742163] CPU0 get2 cfg=1301b7 ref=5
> >> [ 110.854448] CPU0 put cfg=1301b7 ref=4
> >> [ 110.854450] CPU0 put cfg=1301b7 ref=3
> >
> >
> >
^ permalink raw reply [flat|nested] 30+ messages in thread
end of thread, other threads:[~2011-03-01 15:18 UTC | newest]
Thread overview: 30+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2011-02-28 7:22 [PATCH v2 -tip] perf: x86, add SandyBridge support Lin Ming
2011-02-28 8:20 ` Stephane Eranian
2011-02-28 8:51 ` Lin Ming
2011-02-28 9:02 ` Stephane Eranian
2011-02-28 14:03 ` Lin Ming
2011-02-28 14:28 ` Lin Ming
2011-02-28 9:08 ` Ingo Molnar
2011-02-28 14:02 ` Lin Ming
2011-02-28 14:13 ` Stephane Eranian
2011-02-28 9:15 ` Peter Zijlstra
2011-02-28 12:25 ` Stephane Eranian
2011-02-28 14:33 ` Lin Ming
2011-02-28 14:43 ` Stephane Eranian
2011-02-28 14:52 ` Lin Ming
2011-02-28 14:55 ` Stephane Eranian
2011-02-28 14:21 ` Lin Ming
2011-02-28 14:24 ` Peter Zijlstra
2011-02-28 14:45 ` Lin Ming
2011-02-28 14:46 ` Stephane Eranian
2011-02-28 14:56 ` Lin Ming
2011-02-28 15:11 ` Peter Zijlstra
2011-03-01 0:32 ` Lin Ming
2011-03-01 7:43 ` Stephane Eranian
2011-03-01 8:21 ` Lin Ming
2011-03-01 8:45 ` Lin Ming
2011-03-01 8:57 ` Stephane Eranian
2011-03-01 9:39 ` Stephane Eranian
2011-03-01 15:07 ` Lin Ming
2011-03-01 15:09 ` Stephane Eranian
2011-03-01 15:18 ` Lin Ming
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox