* [tip:perf/core] perf, x86: Add Nehelem PMU programming errata workaround
2010-03-26 13:08 [PATCH] perf, x86: Add Nehelem PMU programming errata workaround Peter Zijlstra
@ 2010-03-26 14:33 ` tip-bot for Peter Zijlstra
2010-03-26 14:43 ` [PATCH] " Peter Zijlstra
` (2 subsequent siblings)
3 siblings, 0 replies; 6+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-03-26 14:33 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, peterz, tglx,
mingo
Commit-ID: 1ac62cfff252fb668405ef3398a1fa7f4a0d6d15
Gitweb: http://git.kernel.org/tip/1ac62cfff252fb668405ef3398a1fa7f4a0d6d15
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Fri, 26 Mar 2010 14:08:44 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 26 Mar 2010 14:12:44 +0100
perf, x86: Add Nehelem PMU programming errata workaround
workaround From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Fri Mar 26 13:59:41 CET 2010
Implement the workaround for Intel Errata AAK100 and AAP53.
Also, remove the Core-i7 name for Nehalem events since there are
also Westmere based i7 chips.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <1269608924.12097.147.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/kernel/cpu/perf_event.c | 8 +++---
arch/x86/kernel/cpu/perf_event_intel.c | 43 +++++++++++++++++++++++++++++---
arch/x86/kernel/cpu/perf_event_p4.c | 2 +-
arch/x86/kernel/cpu/perf_event_p6.c | 2 +-
4 files changed, 45 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f571f51..6f66d4a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -184,7 +184,7 @@ struct x86_pmu {
int version;
int (*handle_irq)(struct pt_regs *);
void (*disable_all)(void);
- void (*enable_all)(void);
+ void (*enable_all)(int added);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
@@ -576,7 +576,7 @@ void hw_perf_disable(void)
x86_pmu.disable_all();
}
-static void x86_pmu_enable_all(void)
+static void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
@@ -784,7 +784,7 @@ void hw_perf_enable(void)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct perf_event *event;
struct hw_perf_event *hwc;
- int i;
+ int i, added = cpuc->n_added;
if (!x86_pmu_initialized())
return;
@@ -836,7 +836,7 @@ void hw_perf_enable(void)
cpuc->enabled = 1;
barrier();
- x86_pmu.enable_all();
+ x86_pmu.enable_all(added);
}
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 044b843..676aac2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -483,7 +483,7 @@ static void intel_pmu_disable_all(void)
intel_pmu_lbr_disable_all();
}
-static void intel_pmu_enable_all(void)
+static void intel_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -502,6 +502,40 @@ static void intel_pmu_enable_all(void)
}
}
+/*
+ * Workaround for:
+ * Intel Errata AAK100 (model 26)
+ * Intel Errata AAP53 (model 30)
+ *
+ * These chips need to be 'reset' when adding counters by programming
+ * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
+ * either in sequence on the same PMC or on different PMCs.
+ */
+static void intel_pmu_nhm_enable_all(int added)
+{
+ if (added) {
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ int i;
+
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
+
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+
+ for (i = 0; i < 3; i++) {
+ struct perf_event *event = cpuc->events[i];
+
+ if (!event)
+ continue;
+
+ __x86_pmu_enable_event(&event->hw);
+ }
+ }
+ intel_pmu_enable_all(added);
+}
+
static inline u64 intel_pmu_get_status(void)
{
u64 status;
@@ -658,7 +692,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status();
if (!status) {
- intel_pmu_enable_all();
+ intel_pmu_enable_all(0);
return 0;
}
@@ -707,7 +741,7 @@ again:
goto again;
done:
- intel_pmu_enable_all();
+ intel_pmu_enable_all(0);
return 1;
}
@@ -920,7 +954,8 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_nehalem_event_constraints;
- pr_cont("Nehalem/Corei7 events, ");
+ x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ pr_cont("Nehalem events, ");
break;
case 28: /* Atom */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index f8fe069..0d1be36 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -535,7 +535,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}
-static void p4_pmu_enable_all(void)
+static void p4_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 6ff4d01..877182c 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -66,7 +66,7 @@ static void p6_pmu_disable_all(void)
wrmsrl(MSR_P6_EVNTSEL0, val);
}
-static void p6_pmu_enable_all(void)
+static void p6_pmu_enable_all(int added)
{
unsigned long val;
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH] perf, x86: Add Nehelem PMU programming errata workaround
2010-03-26 13:08 [PATCH] perf, x86: Add Nehelem PMU programming errata workaround Peter Zijlstra
2010-03-26 14:33 ` [tip:perf/core] " tip-bot for Peter Zijlstra
@ 2010-03-26 14:43 ` Peter Zijlstra
2010-03-26 14:48 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-03-27 21:27 ` [PATCH] " Andi Kleen
3 siblings, 0 replies; 6+ messages in thread
From: Peter Zijlstra @ 2010-03-26 14:43 UTC (permalink / raw)
To: Ingo Molnar; +Cc: LKML, Stephane Eranian
On Fri, 2010-03-26 at 14:08 +0100, Peter Zijlstra wrote:
> Subject: perf, x86: Add Nehelem PMU programming errata workaround
> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Date: Fri Mar 26 13:59:41 CET 2010
>
> Implement the workaround for Intel Errata AAK100 and AAP53.
>
> Also, remove the Core-i7 name for Nehalem events since there are also
> Westmere based i7 chips.
*sigh* it appears the westmere chips suffer this too according to the
new Xeon 5600 docs (BD53) .. I'll update the patch.
^ permalink raw reply [flat|nested] 6+ messages in thread
* [tip:perf/core] perf, x86: Add Nehelem PMU programming errata workaround
2010-03-26 13:08 [PATCH] perf, x86: Add Nehelem PMU programming errata workaround Peter Zijlstra
2010-03-26 14:33 ` [tip:perf/core] " tip-bot for Peter Zijlstra
2010-03-26 14:43 ` [PATCH] " Peter Zijlstra
@ 2010-03-26 14:48 ` tip-bot for Peter Zijlstra
2010-03-27 21:27 ` [PATCH] " Andi Kleen
3 siblings, 0 replies; 6+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-03-26 14:48 UTC (permalink / raw)
To: linux-tip-commits
Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, peterz, tglx,
mingo
Commit-ID: 11164cd4f6dab326a88bdf27f2f8f7c11977e91a
Gitweb: http://git.kernel.org/tip/11164cd4f6dab326a88bdf27f2f8f7c11977e91a
Author: Peter Zijlstra <peterz@infradead.org>
AuthorDate: Fri, 26 Mar 2010 14:08:44 +0100
Committer: Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 26 Mar 2010 15:47:24 +0100
perf, x86: Add Nehelem PMU programming errata workaround
Implement the workaround for Intel Errata AAK100 and AAP53.
Also, remove the Core-i7 name for Nehalem events since there are
also Westmere based i7 chips.
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <1269608924.12097.147.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
arch/x86/kernel/cpu/perf_event.c | 8 +++---
arch/x86/kernel/cpu/perf_event_intel.c | 43 +++++++++++++++++++++++++++++---
arch/x86/kernel/cpu/perf_event_p4.c | 2 +-
arch/x86/kernel/cpu/perf_event_p6.c | 2 +-
4 files changed, 45 insertions(+), 10 deletions(-)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index f571f51..6f66d4a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -184,7 +184,7 @@ struct x86_pmu {
int version;
int (*handle_irq)(struct pt_regs *);
void (*disable_all)(void);
- void (*enable_all)(void);
+ void (*enable_all)(int added);
void (*enable)(struct perf_event *);
void (*disable)(struct perf_event *);
int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
@@ -576,7 +576,7 @@ void hw_perf_disable(void)
x86_pmu.disable_all();
}
-static void x86_pmu_enable_all(void)
+static void x86_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
@@ -784,7 +784,7 @@ void hw_perf_enable(void)
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
struct perf_event *event;
struct hw_perf_event *hwc;
- int i;
+ int i, added = cpuc->n_added;
if (!x86_pmu_initialized())
return;
@@ -836,7 +836,7 @@ void hw_perf_enable(void)
cpuc->enabled = 1;
barrier();
- x86_pmu.enable_all();
+ x86_pmu.enable_all(added);
}
static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index 044b843..676aac2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -483,7 +483,7 @@ static void intel_pmu_disable_all(void)
intel_pmu_lbr_disable_all();
}
-static void intel_pmu_enable_all(void)
+static void intel_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
@@ -502,6 +502,40 @@ static void intel_pmu_enable_all(void)
}
}
+/*
+ * Workaround for:
+ * Intel Errata AAK100 (model 26)
+ * Intel Errata AAP53 (model 30)
+ *
+ * These chips need to be 'reset' when adding counters by programming
+ * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5
+ * either in sequence on the same PMC or on different PMCs.
+ */
+static void intel_pmu_nhm_enable_all(int added)
+{
+ if (added) {
+ struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
+ int i;
+
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2);
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1);
+ wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5);
+
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0);
+
+ for (i = 0; i < 3; i++) {
+ struct perf_event *event = cpuc->events[i];
+
+ if (!event)
+ continue;
+
+ __x86_pmu_enable_event(&event->hw);
+ }
+ }
+ intel_pmu_enable_all(added);
+}
+
static inline u64 intel_pmu_get_status(void)
{
u64 status;
@@ -658,7 +692,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs)
intel_pmu_drain_bts_buffer();
status = intel_pmu_get_status();
if (!status) {
- intel_pmu_enable_all();
+ intel_pmu_enable_all(0);
return 0;
}
@@ -707,7 +741,7 @@ again:
goto again;
done:
- intel_pmu_enable_all();
+ intel_pmu_enable_all(0);
return 1;
}
@@ -920,7 +954,8 @@ static __init int intel_pmu_init(void)
intel_pmu_lbr_init_nhm();
x86_pmu.event_constraints = intel_nehalem_event_constraints;
- pr_cont("Nehalem/Corei7 events, ");
+ x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+ pr_cont("Nehalem events, ");
break;
case 28: /* Atom */
diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c
index f8fe069..0d1be36 100644
--- a/arch/x86/kernel/cpu/perf_event_p4.c
+++ b/arch/x86/kernel/cpu/perf_event_p4.c
@@ -535,7 +535,7 @@ static void p4_pmu_enable_event(struct perf_event *event)
(cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE);
}
-static void p4_pmu_enable_all(void)
+static void p4_pmu_enable_all(int added)
{
struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
int idx;
diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c
index 6ff4d01..877182c 100644
--- a/arch/x86/kernel/cpu/perf_event_p6.c
+++ b/arch/x86/kernel/cpu/perf_event_p6.c
@@ -66,7 +66,7 @@ static void p6_pmu_disable_all(void)
wrmsrl(MSR_P6_EVNTSEL0, val);
}
-static void p6_pmu_enable_all(void)
+static void p6_pmu_enable_all(int added)
{
unsigned long val;
^ permalink raw reply related [flat|nested] 6+ messages in thread* Re: [PATCH] perf, x86: Add Nehelem PMU programming errata workaround
2010-03-26 13:08 [PATCH] perf, x86: Add Nehelem PMU programming errata workaround Peter Zijlstra
` (2 preceding siblings ...)
2010-03-26 14:48 ` [tip:perf/core] " tip-bot for Peter Zijlstra
@ 2010-03-27 21:27 ` Andi Kleen
2010-03-28 21:53 ` Stephane Eranian
3 siblings, 1 reply; 6+ messages in thread
From: Andi Kleen @ 2010-03-27 21:27 UTC (permalink / raw)
To: Peter Zijlstra; +Cc: Ingo Molnar, LKML, Stephane Eranian
Peter Zijlstra <peterz@infradead.org> writes:
> Subject: perf, x86: Add Nehelem PMU programming errata workaround
> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
> Date: Fri Mar 26 13:59:41 CET 2010
>
> Implement the workaround for Intel Errata AAK100 and AAP53.
>
> Also, remove the Core-i7 name for Nehalem events since there are also
> Westmere based i7 chips.
Did you actually see this happen?
It looks like this will make the context switch into a perf
enabled process _MUCH_ more expensive, MSR writes are very slow.
-Andi
--
ak@linux.intel.com -- Speaking for myself only.
^ permalink raw reply [flat|nested] 6+ messages in thread* Re: [PATCH] perf, x86: Add Nehelem PMU programming errata workaround
2010-03-27 21:27 ` [PATCH] " Andi Kleen
@ 2010-03-28 21:53 ` Stephane Eranian
0 siblings, 0 replies; 6+ messages in thread
From: Stephane Eranian @ 2010-03-28 21:53 UTC (permalink / raw)
To: Andi Kleen; +Cc: Peter Zijlstra, Ingo Molnar, LKML
On Sat, Mar 27, 2010 at 10:27 PM, Andi Kleen <andi@firstfloor.org> wrote:
> Peter Zijlstra <peterz@infradead.org> writes:
>
>> Subject: perf, x86: Add Nehelem PMU programming errata workaround
>> From: Peter Zijlstra <a.p.zijlstra@chello.nl>
>> Date: Fri Mar 26 13:59:41 CET 2010
>>
>> Implement the workaround for Intel Errata AAK100 and AAP53.
>>
>> Also, remove the Core-i7 name for Nehalem events since there are also
>> Westmere based i7 chips.
>
> Did you actually see this happen?
>
This is the same as AAJ91. At the time, I created a test program and it was
moderately easy to reproduce.
> It looks like this will make the context switch into a perf
> enabled process _MUCH_ more expensive, MSR writes are very slow.
>
Yes, but there is no alternative, I suspect.
^ permalink raw reply [flat|nested] 6+ messages in thread