* [PATCH 2/3] perf, x86: Add Broadwell core support
2014-08-13 1:45 [PATCH 1/3] perf, x86: Remove incorrect model number from Haswell perf Andi Kleen
@ 2014-08-13 1:45 ` Andi Kleen
2014-08-13 8:00 ` Peter Zijlstra
2014-08-13 1:45 ` [PATCH 3/3] perf, x86: Add INST_RETIRED.PREC_DIST workarounds Andi Kleen
2014-08-13 7:58 ` [PATCH 1/3] perf, x86: Remove incorrect model number from Haswell perf Peter Zijlstra
2 siblings, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2014-08-13 1:45 UTC (permalink / raw)
To: peterz; +Cc: linux-kernel, mingo, eranian, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
Add Broadwell support for Broadwell Client to perf.
This is very similar to Haswell. It uses a new cache event table,
because there were various changes there.
The constraint list has one new event that needs to be handled over Haswell.
The PEBS event list is the same, so we reuse Haswell's.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event_intel.c | 152 +++++++++++++++++++++++++++++++++
1 file changed, 152 insertions(+)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index ef6c8b7..dd0ea95 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -220,6 +220,15 @@ static struct event_constraint intel_hsw_event_constraints[] = {
EVENT_CONSTRAINT_END
};
+struct event_constraint intel_bdw_event_constraints[] = {
+ FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
+ FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */
+ FIXED_EVENT_CONSTRAINT(0x0300, 2), /* CPU_CLK_UNHALTED.REF */
+ INTEL_UEVENT_CONSTRAINT(0x148, 0x4), /* L1D_PEND_MISS.PENDING */
+ INTEL_EVENT_CONSTRAINT(0xa3, 0x4), /* CYCLE_ACTIVITY.* */
+ EVENT_CONSTRAINT_END
+};
+
static u64 intel_pmu_event_map(int hw_event)
{
return intel_perfmon_event_map[hw_event];
@@ -415,6 +424,126 @@ static __initconst const u64 snb_hw_cache_event_ids
};
+static __initconst const u64 bdw_hw_cache_event_ids
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x151, /* L1D.REPLACEMENT */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(L1I ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x280, /* ICACHE.MISSES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
+ [ C(RESULT_ACCESS) ] = 0x1b7,
+ /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
+ L3_MISS|ANY_SNOOP */
+ [ C(RESULT_MISS) ] = 0x1b7,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x1b7, /* OFFCORE_RESPONSE:ALL_RFO */
+ /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
+ [ C(RESULT_MISS) ] = 0x1b7,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(DTLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x81d0, /* MEM_UOPS_RETIRED.ALL_LOADS */
+ [ C(RESULT_MISS) ] = 0x108, /* DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x82d0, /* MEM_UOPS_RETIRED.ALL_STORES */
+ [ C(RESULT_MISS) ] = 0x149, /* DTLB_STORE_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+ [ C(ITLB) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0x6085, /* ITLB_MISSES.STLB_HIT */
+ [ C(RESULT_MISS) ] = 0x185, /* ITLB_MISSES.MISS_CAUSES_A_WALK */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+ [ C(BPU ) ] = {
+ [ C(OP_READ) ] = {
+ [ C(RESULT_ACCESS) ] = 0xc4, /* BR_INST_RETIRED.ALL_BRANCHES */
+ [ C(RESULT_MISS) ] = 0xc5, /* BR_MISP_RETIRED.ALL_BRANCHES */
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = -1,
+ [ C(RESULT_MISS) ] = -1,
+ },
+ },
+};
+
+static __initconst const u64 bdw_hw_cache_extra_regs
+ [PERF_COUNT_HW_CACHE_MAX]
+ [PERF_COUNT_HW_CACHE_OP_MAX]
+ [PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(LL ) ] = {
+ [ C(OP_READ) ] = {
+ /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD */
+ [ C(RESULT_ACCESS) ] = 0x2d5,
+ /* OFFCORE_RESPONSE:ALL_DATA_RD|ALL_CODE_RD|SUPPLIER_NONE|
+ L3_MISS|ANY_SNOOP */
+ [ C(RESULT_MISS) ] = 0x3fbc0202d5,
+ },
+ [ C(OP_WRITE) ] = {
+ [ C(RESULT_ACCESS) ] = 0x122, /* OFFCORE_RESPONSE:ALL_RFO */
+ /* OFFCORE_RESPONSE:ALL_RFO|SUPPLIER_NONE|L3_MISS|ANY_SNOOP */
+ [ C(RESULT_MISS) ] = 0x3fbc020122,
+ },
+ [ C(OP_PREFETCH) ] = {
+ [ C(RESULT_ACCESS) ] = 0x0,
+ [ C(RESULT_MISS) ] = 0x0,
+ },
+ },
+};
+
static __initconst const u64 westmere_hw_cache_event_ids
[PERF_COUNT_HW_CACHE_MAX]
[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -2564,6 +2693,29 @@ __init int intel_pmu_init(void)
pr_cont("Haswell events, ");
break;
+ case 61: /* Broadwell */
+ case 71:
+ case 79:
+ x86_pmu.late_ack = true;
+ memcpy(hw_cache_event_ids, bdw_hw_cache_event_ids, sizeof(hw_cache_event_ids));
+ memcpy(hw_cache_extra_regs, bdw_hw_cache_extra_regs, sizeof(hw_cache_extra_regs));
+
+ intel_pmu_lbr_init_snb();
+
+ x86_pmu.event_constraints = intel_bdw_event_constraints;
+ x86_pmu.pebs_constraints = intel_hsw_pebs_event_constraints;
+ x86_pmu.extra_regs = intel_snbep_extra_regs;
+ x86_pmu.pebs_aliases = intel_pebs_aliases_snb;
+ /* all extra regs are per-cpu when HT is on */
+ x86_pmu.er_flags |= ERF_HAS_RSP_1;
+ x86_pmu.er_flags |= ERF_NO_HT_SHARING;
+
+ x86_pmu.hw_config = hsw_hw_config;
+ x86_pmu.get_event_constraints = hsw_get_event_constraints;
+ x86_pmu.cpu_events = hsw_events_attrs;
+ pr_cont("Broadwell events, ");
+ break;
+
default:
switch (x86_pmu.version) {
case 1:
--
1.9.3
^ permalink raw reply related [flat|nested] 6+ messages in thread* [PATCH 3/3] perf, x86: Add INST_RETIRED.PREC_DIST workarounds
2014-08-13 1:45 [PATCH 1/3] perf, x86: Remove incorrect model number from Haswell perf Andi Kleen
2014-08-13 1:45 ` [PATCH 2/3] perf, x86: Add Broadwell core support Andi Kleen
@ 2014-08-13 1:45 ` Andi Kleen
2014-08-13 8:10 ` Peter Zijlstra
2014-08-13 7:58 ` [PATCH 1/3] perf, x86: Remove incorrect model number from Haswell perf Peter Zijlstra
2 siblings, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2014-08-13 1:45 UTC (permalink / raw)
To: peterz; +Cc: linux-kernel, mingo, eranian, Andi Kleen
From: Andi Kleen <ak@linux.intel.com>
On Broadwell INST_RETIRED.PREC_DIST cannot be used with any period
that doesn't have the lowest 6 bits cleared. And the period
should not be smaller than 128.
Add a new callback to enforce this, and set it for Broadwell.
This is erratum BDM57 and BDM11.
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
arch/x86/kernel/cpu/perf_event.c | 3 +++
arch/x86/kernel/cpu/perf_event.h | 1 +
arch/x86/kernel/cpu/perf_event_intel.c | 18 ++++++++++++++++++
3 files changed, 22 insertions(+)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 0adc5e3..a0adf58 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -980,6 +980,9 @@ int x86_perf_event_set_period(struct perf_event *event)
if (left > x86_pmu.max_period)
left = x86_pmu.max_period;
+ if (x86_pmu.limit_period)
+ left = x86_pmu.limit_period(event, left);
+
per_cpu(pmc_prev_left[idx], smp_processor_id()) = left;
/*
diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
index de81627..a46e391 100644
--- a/arch/x86/kernel/cpu/perf_event.h
+++ b/arch/x86/kernel/cpu/perf_event.h
@@ -456,6 +456,7 @@ struct x86_pmu {
struct x86_pmu_quirk *quirks;
int perfctr_second_write;
bool late_ack;
+ unsigned (*limit_period)(struct perf_event *event, unsigned l);
/*
* sysfs attrs
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index dd0ea95..db131b0 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -2034,6 +2034,23 @@ hsw_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
return c;
}
+/*
+ * Broadwell:
+ * The INST_RETIRED.ALL period always needs to have lowest
+ * 6bits cleared (BDM57). It shall not use a period smaller
+ * than 100 (BDM11). We combine the two to enforce
+ * a min-period of 128.
+ */
+static unsigned bdw_limit_period(struct perf_event *event, unsigned left)
+{
+ if ((event->hw.config & 0xffff) == 0x1c0) {
+ if (left < 128)
+ left = 128;
+ left &= ~0x3fu;
+ }
+ return left;
+}
+
PMU_FORMAT_ATTR(event, "config:0-7" );
PMU_FORMAT_ATTR(umask, "config:8-15" );
PMU_FORMAT_ATTR(edge, "config:18" );
@@ -2713,6 +2730,7 @@ __init int intel_pmu_init(void)
x86_pmu.hw_config = hsw_hw_config;
x86_pmu.get_event_constraints = hsw_get_event_constraints;
x86_pmu.cpu_events = hsw_events_attrs;
+ x86_pmu.limit_period = bdw_limit_period;
pr_cont("Broadwell events, ");
break;
--
1.9.3
^ permalink raw reply related [flat|nested] 6+ messages in thread