All of lore.kernel.org
 help / color / mirror / Atom feed
* [PATCH 0/4] perf x86 fixes
@ 2010-01-27 22:07 Peter Zijlstra
  2010-01-27 22:07 ` [PATCH 1/4] perf_events: x86: Fix event constraint masks Peter Zijlstra
                   ` (5 more replies)
  0 siblings, 6 replies; 12+ messages in thread
From: Peter Zijlstra @ 2010-01-27 22:07 UTC (permalink / raw)
  To: mingo, eranian; +Cc: linux-kernel, Peter Zijlstra

These patches make the tmp.perf/core branch work for me, that is, esp.
the first patch in this series cured a few funnies, the rest is
basically fallout from staring at that code too long.

-- 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 1/4] perf_events: x86: Fix event constraint masks
  2010-01-27 22:07 [PATCH 0/4] perf x86 fixes Peter Zijlstra
@ 2010-01-27 22:07 ` Peter Zijlstra
  2010-01-29  9:29   ` [tip:perf/core] perf_events, " tip-bot for Peter Zijlstra
  2010-01-27 22:07 ` [PATCH 2/4] perf_events: x86: Clean up hw_perf_*_all() implementation Peter Zijlstra
                   ` (4 subsequent siblings)
  5 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-01-27 22:07 UTC (permalink / raw)
  To: mingo, eranian; +Cc: linux-kernel, Peter Zijlstra

[-- Attachment #1: perf-fix-constraint-mask.patch --]
[-- Type: text/plain, Size: 1874 bytes --]

Since constraints are specified on the event number, not number and
unit mask shorten the constraint masks so that we'll actually match
something.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
---
 arch/x86/include/asm/perf_event.h |    2 +-
 arch/x86/kernel/cpu/perf_event.c  |   13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -100,12 +100,17 @@ struct cpu_hw_events {
 	.weight = HWEIGHT64((u64)(n)),	\
 }
 
-#define INTEL_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
-#define FIXED_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
+#define INTEL_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 
-#define EVENT_CONSTRAINT_END			EVENT_CONSTRAINT(0, 0, 0)
+#define FIXED_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
 
-#define for_each_event_constraint(e, c)		for ((e) = (c); (e)->cmask; (e)++)
+#define EVENT_CONSTRAINT_END		\
+	EVENT_CONSTRAINT(0, 0, 0)
+
+#define for_each_event_constraint(e, c)	\
+	for ((e) = (c); (e)->cmask; (e)++)
 
 /*
  * struct x86_pmu - generic x86 pmu
Index: linux-2.6/arch/x86/include/asm/perf_event.h
===================================================================
--- linux-2.6.orig/arch/x86/include/asm/perf_event.h
+++ linux-2.6/arch/x86/include/asm/perf_event.h
@@ -50,7 +50,7 @@
 	 INTEL_ARCH_INV_MASK| \
 	 INTEL_ARCH_EDGE_MASK|\
 	 INTEL_ARCH_UNIT_MASK|\
-	 INTEL_ARCH_EVENT_MASK)
+	 INTEL_ARCH_EVTSEL_MASK)
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)

-- 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 2/4] perf_events: x86: Clean up hw_perf_*_all() implementation
  2010-01-27 22:07 [PATCH 0/4] perf x86 fixes Peter Zijlstra
  2010-01-27 22:07 ` [PATCH 1/4] perf_events: x86: Fix event constraint masks Peter Zijlstra
@ 2010-01-27 22:07 ` Peter Zijlstra
  2010-01-29  9:30   ` [tip:perf/core] perf_events, " tip-bot for Peter Zijlstra
  2010-01-27 22:07 ` [PATCH 3/4] perf_events: x86: Implement westmere support Peter Zijlstra
                   ` (3 subsequent siblings)
  5 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-01-27 22:07 UTC (permalink / raw)
  To: mingo, eranian; +Cc: linux-kernel, Peter Zijlstra

[-- Attachment #1: perf-x86-cleanup-enable-disable-all.patch --]
[-- Type: text/plain, Size: 3092 bytes --]

Put the recursion avoidance code in the generic hook instead of
replicating it in each implementation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
---
Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -1099,15 +1099,8 @@ static int __hw_perf_event_init(struct p
 
 static void p6_pmu_disable_all(void)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 val;
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
 	/* p6 only has one enable register */
 	rdmsrl(MSR_P6_EVNTSEL0, val);
 	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1118,12 +1111,6 @@ static void intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
@@ -1135,17 +1122,6 @@ static void amd_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	/*
-	 * ensure we write the disable before we start disabling the
-	 * events proper, so that amd_pmu_enable_event() does the
-	 * right thing.
-	 */
-	barrier();
-
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		u64 val;
 
@@ -1166,23 +1142,20 @@ void hw_perf_disable(void)
 	if (!x86_pmu_initialized())
 		return;
 
-	if (cpuc->enabled)
-		cpuc->n_added = 0;
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->n_added = 0;
+	cpuc->enabled = 0;
+	barrier();
 
 	x86_pmu.disable_all();
 }
 
 static void p6_pmu_enable_all(void)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	unsigned long val;
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	/* p6 only has one enable register */
 	rdmsrl(MSR_P6_EVNTSEL0, val);
 	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1193,12 +1166,6 @@ static void intel_pmu_enable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -1217,12 +1184,6 @@ static void amd_pmu_enable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		u64 val;
@@ -1417,6 +1378,10 @@ void hw_perf_enable(void)
 
 	if (!x86_pmu_initialized())
 		return;
+
+	if (cpuc->enabled)
+		return;
+
 	if (cpuc->n_added) {
 		/*
 		 * apply assignment obtained either from
@@ -1461,6 +1426,10 @@ void hw_perf_enable(void)
 		cpuc->n_added = 0;
 		perf_events_lapic_init();
 	}
+
+	cpuc->enabled = 1;
+	barrier();
+
 	x86_pmu.enable_all();
 }
 

-- 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 3/4] perf_events: x86: Implement westmere support
  2010-01-27 22:07 [PATCH 0/4] perf x86 fixes Peter Zijlstra
  2010-01-27 22:07 ` [PATCH 1/4] perf_events: x86: Fix event constraint masks Peter Zijlstra
  2010-01-27 22:07 ` [PATCH 2/4] perf_events: x86: Clean up hw_perf_*_all() implementation Peter Zijlstra
@ 2010-01-27 22:07 ` Peter Zijlstra
  2010-01-29  9:30   ` [tip:perf/core] perf_events, x86: Implement Intel Westmere support tip-bot for Peter Zijlstra
  2010-01-27 22:07 ` [PATCH 4/4] perf_events: x86: Remove spurious counter reset from x86_pmu_enable() Peter Zijlstra
                   ` (2 subsequent siblings)
  5 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-01-27 22:07 UTC (permalink / raw)
  To: mingo, eranian; +Cc: linux-kernel, Peter Zijlstra

[-- Attachment #1: perf-westmere.patch --]
[-- Type: text/plain, Size: 6263 bytes --]

The new Intel documentation includes Westmere arch specific event
maps that are significantly different from the Nehalem ones. Add
support for this generation.

Found the CPUID model numbers on wikipedia.

Also ammend some Nehalem constraints, spotted those when looking for
the differences between Nehalem and Westmere.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
---
 arch/x86/kernel/cpu/perf_event.c |  124 ++++++++++++++++++++++++++++++++++++---
 1 file changed, 117 insertions(+), 7 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -244,18 +244,26 @@ static struct event_constraint intel_cor
 
 static struct event_constraint intel_nehalem_event_constraints[] =
 {
-	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
 	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
 	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
 	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
 	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
 	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
-	INTEL_EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
 	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-	INTEL_EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
-	INTEL_EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
-	INTEL_EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
 	EVENT_CONSTRAINT_END
 };
 
@@ -286,6 +294,97 @@ static u64 __read_mostly hw_cache_event_
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
 
+static __initconst u64 westmere_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
 static __initconst u64 nehalem_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -2430,7 +2529,9 @@ static __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_core_event_constraints;
 		pr_cont("Core2 events, ");
 		break;
-	case 26:
+
+	case 26: /* 45 nm nehalem, "Bloomfield" */
+	case 30: /* 45 nm nehalem, "Lynnfield" */
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -2444,6 +2545,15 @@ static __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_gen_event_constraints;
 		pr_cont("Atom events, ");
 		break;
+
+	case 37: /* 32 nm nehalem, "Clarkdale" */
+	case 44: /* 32 nm nehalem, "Gulftown" */
+		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_westmere_event_constraints;
+		pr_cont("Westmere events, ");
+		break;
 	default:
 		/*
 		 * default constraints for v2 and up

-- 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 4/4] perf_events: x86: Remove spurious counter reset from x86_pmu_enable()
  2010-01-27 22:07 [PATCH 0/4] perf x86 fixes Peter Zijlstra
                   ` (2 preceding siblings ...)
  2010-01-27 22:07 ` [PATCH 3/4] perf_events: x86: Implement westmere support Peter Zijlstra
@ 2010-01-27 22:07 ` Peter Zijlstra
  2010-01-29  9:30   ` [tip:perf/core] perf_events, " tip-bot for Peter Zijlstra
  2010-01-29  7:55 ` [PATCH 0/4] perf x86 fixes Ingo Molnar
  2010-01-29  8:04 ` [PATCH 5/4] perf_events: Fix sample_period transfer on inherit Peter Zijlstra
  5 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-01-27 22:07 UTC (permalink / raw)
  To: mingo, eranian; +Cc: linux-kernel, Peter Zijlstra

[-- Attachment #1: perf-cleanup-enable.patch --]
[-- Type: text/plain, Size: 852 bytes --]

At enable time the counter might still have a ->idx pointing to a
previously occupied location that might now be taken by another event.
Resetting the counter at that location with data from this event will
destroy the other counter's count.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
---
 arch/x86/kernel/cpu/perf_event.c |    3 ---
 1 file changed, 3 deletions(-)

Index: linux-2.6/arch/x86/kernel/cpu/perf_event.c
===================================================================
--- linux-2.6.orig/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6/arch/x86/kernel/cpu/perf_event.c
@@ -1769,9 +1769,6 @@ static int x86_pmu_enable(struct perf_ev
 	cpuc->n_events = n;
 	cpuc->n_added  = n - n0;
 
-	if (hwc->idx != -1)
-		x86_perf_event_set_period(event, hwc, hwc->idx);
-
 	return 0;
 }
 

-- 


^ permalink raw reply	[flat|nested] 12+ messages in thread

* Re: [PATCH 0/4] perf x86 fixes
  2010-01-27 22:07 [PATCH 0/4] perf x86 fixes Peter Zijlstra
                   ` (3 preceding siblings ...)
  2010-01-27 22:07 ` [PATCH 4/4] perf_events: x86: Remove spurious counter reset from x86_pmu_enable() Peter Zijlstra
@ 2010-01-29  7:55 ` Ingo Molnar
  2010-01-29  8:04 ` [PATCH 5/4] perf_events: Fix sample_period transfer on inherit Peter Zijlstra
  5 siblings, 0 replies; 12+ messages in thread
From: Ingo Molnar @ 2010-01-29  7:55 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: eranian, linux-kernel, Paul Mackerras


* Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:

> These patches make the tmp.perf/core branch work for me, that is, esp.
> the first patch in this series cured a few funnies, the rest is
> basically fallout from staring at that code too long.

Thanks Peter, will have a look at getting the tmp.perf/core bits into 
tip:master today.

	Ingo

^ permalink raw reply	[flat|nested] 12+ messages in thread

* [PATCH 5/4] perf_events: Fix sample_period transfer on inherit
  2010-01-27 22:07 [PATCH 0/4] perf x86 fixes Peter Zijlstra
                   ` (4 preceding siblings ...)
  2010-01-29  7:55 ` [PATCH 0/4] perf x86 fixes Ingo Molnar
@ 2010-01-29  8:04 ` Peter Zijlstra
  2010-01-29  9:30   ` [tip:perf/core] " tip-bot for Peter Zijlstra
  5 siblings, 1 reply; 12+ messages in thread
From: Peter Zijlstra @ 2010-01-29  8:04 UTC (permalink / raw)
  To: mingo; +Cc: eranian, linux-kernel

Subject: perf_events: Fix sample_period transfer on inherit
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Thu Jan 28 12:34:07 CET 2010

One problem with frequency driven counters is that we cannot predict
the rate at which they trigger, therefore we have to start them at
period=1, this causes a ramp up effect. However, if we fail to
propagate the stable state on fork each new child will have to ramp up
again. This can lead to significant artifacts in sample data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
---
 kernel/perf_event.c |   11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

Index: linux-2.6/kernel/perf_event.c
===================================================================
--- linux-2.6.orig/kernel/perf_event.c
+++ linux-2.6/kernel/perf_event.c
@@ -5011,8 +5011,15 @@ inherit_event(struct perf_event *parent_
 	else
 		child_event->state = PERF_EVENT_STATE_OFF;
 
-	if (parent_event->attr.freq)
-		child_event->hw.sample_period = parent_event->hw.sample_period;
+	if (parent_event->attr.freq) {
+		u64 sample_period = parent_event->hw.sample_period;
+		struct hw_perf_event *hwc = &child_event->hw;
+
+		hwc->sample_period = sample_period;
+		hwc->last_period   = sample_period;
+
+		atomic64_set(&hwc->period_left, sample_period);
+	}
 
 	child_event->overflow_handler = parent_event->overflow_handler;



^ permalink raw reply	[flat|nested] 12+ messages in thread

* [tip:perf/core] perf_events, x86: Fix event constraint masks
  2010-01-27 22:07 ` [PATCH 1/4] perf_events: x86: Fix event constraint masks Peter Zijlstra
@ 2010-01-29  9:29   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-01-29  9:29 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, tglx, mingo

Commit-ID:  ed8777fc132e589d48a0ba854fdbb5d8203b58e5
Gitweb:     http://git.kernel.org/tip/ed8777fc132e589d48a0ba854fdbb5d8203b58e5
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 27 Jan 2010 23:07:46 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 29 Jan 2010 09:01:46 +0100

perf_events, x86: Fix event constraint masks

Since constraints are specified on the event number, not number
and unit mask shorten the constraint masks so that we'll
actually match something.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100127221121.967610372@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/include/asm/perf_event.h |    2 +-
 arch/x86/kernel/cpu/perf_event.c  |   13 +++++++++----
 2 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index dbc0826..ff5ede1 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -49,7 +49,7 @@
 	 INTEL_ARCH_INV_MASK| \
 	 INTEL_ARCH_EDGE_MASK|\
 	 INTEL_ARCH_UNIT_MASK|\
-	 INTEL_ARCH_EVENT_MASK)
+	 INTEL_ARCH_EVTSEL_MASK)
 
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL		      0x3c
 #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK		(0x00 << 8)
diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 07fa0c2..951213a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -100,12 +100,17 @@ struct cpu_hw_events {
 	.weight = HWEIGHT64((u64)(n)),	\
 }
 
-#define INTEL_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK)
-#define FIXED_EVENT_CONSTRAINT(c, n)		EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
+#define INTEL_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK)
 
-#define EVENT_CONSTRAINT_END			EVENT_CONSTRAINT(0, 0, 0)
+#define FIXED_EVENT_CONSTRAINT(c, n)	\
+	EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK)
 
-#define for_each_event_constraint(e, c)		for ((e) = (c); (e)->cmask; (e)++)
+#define EVENT_CONSTRAINT_END		\
+	EVENT_CONSTRAINT(0, 0, 0)
+
+#define for_each_event_constraint(e, c)	\
+	for ((e) = (c); (e)->cmask; (e)++)
 
 /*
  * struct x86_pmu - generic x86 pmu

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/core] perf_events, x86: Clean up hw_perf_*_all() implementation
  2010-01-27 22:07 ` [PATCH 2/4] perf_events: x86: Clean up hw_perf_*_all() implementation Peter Zijlstra
@ 2010-01-29  9:30   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-01-29  9:30 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, tglx, mingo

Commit-ID:  1a6e21f791fe85b40a9ddbafe999ab8ccffc3f78
Gitweb:     http://git.kernel.org/tip/1a6e21f791fe85b40a9ddbafe999ab8ccffc3f78
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 27 Jan 2010 23:07:47 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 29 Jan 2010 09:01:47 +0100

perf_events, x86: Clean up hw_perf_*_all() implementation

Put the recursion avoidance code in the generic hook instead of
replicating it in each implementation.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100127221122.057507285@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |   59 +++++++++-----------------------------
 1 files changed, 14 insertions(+), 45 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 951213a..cf10839 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1099,15 +1099,8 @@ static int __hw_perf_event_init(struct perf_event *event)
 
 static void p6_pmu_disable_all(void)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	u64 val;
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
 	/* p6 only has one enable register */
 	rdmsrl(MSR_P6_EVNTSEL0, val);
 	val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1118,12 +1111,6 @@ static void intel_pmu_disable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	barrier();
-
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask))
@@ -1135,17 +1122,6 @@ static void amd_pmu_disable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	if (!cpuc->enabled)
-		return;
-
-	cpuc->enabled = 0;
-	/*
-	 * ensure we write the disable before we start disabling the
-	 * events proper, so that amd_pmu_enable_event() does the
-	 * right thing.
-	 */
-	barrier();
-
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		u64 val;
 
@@ -1166,23 +1142,20 @@ void hw_perf_disable(void)
 	if (!x86_pmu_initialized())
 		return;
 
-	if (cpuc->enabled)
-		cpuc->n_added = 0;
+	if (!cpuc->enabled)
+		return;
+
+	cpuc->n_added = 0;
+	cpuc->enabled = 0;
+	barrier();
 
 	x86_pmu.disable_all();
 }
 
 static void p6_pmu_enable_all(void)
 {
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	unsigned long val;
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	/* p6 only has one enable register */
 	rdmsrl(MSR_P6_EVNTSEL0, val);
 	val |= ARCH_PERFMON_EVENTSEL0_ENABLE;
@@ -1193,12 +1166,6 @@ static void intel_pmu_enable_all(void)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl);
 
 	if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) {
@@ -1217,12 +1184,6 @@ static void amd_pmu_enable_all(void)
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	int idx;
 
-	if (cpuc->enabled)
-		return;
-
-	cpuc->enabled = 1;
-	barrier();
-
 	for (idx = 0; idx < x86_pmu.num_events; idx++) {
 		struct perf_event *event = cpuc->events[idx];
 		u64 val;
@@ -1417,6 +1378,10 @@ void hw_perf_enable(void)
 
 	if (!x86_pmu_initialized())
 		return;
+
+	if (cpuc->enabled)
+		return;
+
 	if (cpuc->n_added) {
 		/*
 		 * apply assignment obtained either from
@@ -1461,6 +1426,10 @@ void hw_perf_enable(void)
 		cpuc->n_added = 0;
 		perf_events_lapic_init();
 	}
+
+	cpuc->enabled = 1;
+	barrier();
+
 	x86_pmu.enable_all();
 }
 

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/core] perf_events, x86: Implement Intel Westmere support
  2010-01-27 22:07 ` [PATCH 3/4] perf_events: x86: Implement westmere support Peter Zijlstra
@ 2010-01-29  9:30   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-01-29  9:30 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, arjan, a.p.zijlstra, tglx,
	mingo

Commit-ID:  452a339a976e7f782c786eb3f73080401e2fa3a6
Gitweb:     http://git.kernel.org/tip/452a339a976e7f782c786eb3f73080401e2fa3a6
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 27 Jan 2010 23:07:48 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 29 Jan 2010 09:01:48 +0100

perf_events, x86: Implement Intel Westmere support

The new Intel documentation includes Westmere arch specific
event maps that are significantly different from the Nehalem
ones. Add support for this generation.

Found the CPUID model numbers on wikipedia.

Also ammend some Nehalem constraints, spotted those when looking
for the differences between Nehalem and Westmere.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100127221122.151865645@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |  124 +++++++++++++++++++++++++++++++++++--
 1 files changed, 117 insertions(+), 7 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index cf10839..3fac0bf 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -244,18 +244,26 @@ static struct event_constraint intel_core_event_constraints[] =
 
 static struct event_constraint intel_nehalem_event_constraints[] =
 {
-	FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
-	FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
 	INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */
 	INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */
 	INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */
 	INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */
+	INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */
 	INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */
-	INTEL_EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */
 	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
-	INTEL_EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */
-	INTEL_EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */
-	INTEL_EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
+	EVENT_CONSTRAINT_END
+};
+
+static struct event_constraint intel_westmere_event_constraints[] =
+{
+	FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */
+	FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */
+	INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */
+	INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */
+	INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */
 	EVENT_CONSTRAINT_END
 };
 
@@ -286,6 +294,97 @@ static u64 __read_mostly hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_OP_MAX]
 				[PERF_COUNT_HW_CACHE_RESULT_MAX];
 
+static __initconst u64 westmere_hw_cache_event_ids
+				[PERF_COUNT_HW_CACHE_MAX]
+				[PERF_COUNT_HW_CACHE_OP_MAX]
+				[PERF_COUNT_HW_CACHE_RESULT_MAX] =
+{
+ [ C(L1D) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0151, /* L1D.REPL                     */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x0251, /* L1D.M_REPL                   */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS        */
+		[ C(RESULT_MISS)   ] = 0x024e, /* L1D_PREFETCH.MISS            */
+	},
+ },
+ [ C(L1I ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS                    */
+		[ C(RESULT_MISS)   ] = 0x0280, /* L1I.MISSES                   */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(LL  ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
+		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
+		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
+		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+	},
+ },
+ [ C(DTLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS       */
+		[ C(RESULT_MISS)   ] = 0x0108, /* DTLB_LOAD_MISSES.ANY         */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES      */
+		[ C(RESULT_MISS)   ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS  */
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = 0x0,
+		[ C(RESULT_MISS)   ] = 0x0,
+	},
+ },
+ [ C(ITLB) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P           */
+		[ C(RESULT_MISS)   ] = 0x0185, /* ITLB_MISSES.ANY              */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+ [ C(BPU ) ] = {
+	[ C(OP_READ) ] = {
+		[ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */
+		[ C(RESULT_MISS)   ] = 0x03e8, /* BPU_CLEARS.ANY               */
+	},
+	[ C(OP_WRITE) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+	[ C(OP_PREFETCH) ] = {
+		[ C(RESULT_ACCESS) ] = -1,
+		[ C(RESULT_MISS)   ] = -1,
+	},
+ },
+};
+
 static __initconst u64 nehalem_hw_cache_event_ids
 				[PERF_COUNT_HW_CACHE_MAX]
 				[PERF_COUNT_HW_CACHE_OP_MAX]
@@ -2423,7 +2522,9 @@ static __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_core_event_constraints;
 		pr_cont("Core2 events, ");
 		break;
-	case 26:
+
+	case 26: /* 45 nm nehalem, "Bloomfield" */
+	case 30: /* 45 nm nehalem, "Lynnfield" */
 		memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids,
 		       sizeof(hw_cache_event_ids));
 
@@ -2437,6 +2538,15 @@ static __init int intel_pmu_init(void)
 		x86_pmu.event_constraints = intel_gen_event_constraints;
 		pr_cont("Atom events, ");
 		break;
+
+	case 37: /* 32 nm nehalem, "Clarkdale" */
+	case 44: /* 32 nm nehalem, "Gulftown" */
+		memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids,
+		       sizeof(hw_cache_event_ids));
+
+		x86_pmu.event_constraints = intel_westmere_event_constraints;
+		pr_cont("Westmere events, ");
+		break;
 	default:
 		/*
 		 * default constraints for v2 and up

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/core] perf_events, x86: Remove spurious counter reset from x86_pmu_enable()
  2010-01-27 22:07 ` [PATCH 4/4] perf_events: x86: Remove spurious counter reset from x86_pmu_enable() Peter Zijlstra
@ 2010-01-29  9:30   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-01-29  9:30 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, eranian, hpa, mingo, a.p.zijlstra, tglx, mingo

Commit-ID:  18c01f8abff51e4910cc5ffb4b710e8c6eea60c9
Gitweb:     http://git.kernel.org/tip/18c01f8abff51e4910cc5ffb4b710e8c6eea60c9
Author:     Peter Zijlstra <a.p.zijlstra@chello.nl>
AuthorDate: Wed, 27 Jan 2010 23:07:49 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 29 Jan 2010 09:01:49 +0100

perf_events, x86: Remove spurious counter reset from x86_pmu_enable()

At enable time the counter might still have a ->idx pointing to
a previously occupied location that might now be taken by
another event. Resetting the counter at that location with data
from this event will destroy the other counter's count.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Stephane Eranian <eranian@google.com>
LKML-Reference: <20100127221122.261477183@chello.nl>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 arch/x86/kernel/cpu/perf_event.c |    3 ---
 1 files changed, 0 insertions(+), 3 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 3fac0bf..518eb3e 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -1762,9 +1762,6 @@ static int x86_pmu_enable(struct perf_event *event)
 	cpuc->n_events = n;
 	cpuc->n_added  = n - n0;
 
-	if (hwc->idx != -1)
-		x86_perf_event_set_period(event, hwc, hwc->idx);
-
 	return 0;
 }
 

^ permalink raw reply related	[flat|nested] 12+ messages in thread

* [tip:perf/core] perf_events: Fix sample_period transfer on inherit
  2010-01-29  8:04 ` [PATCH 5/4] perf_events: Fix sample_period transfer on inherit Peter Zijlstra
@ 2010-01-29  9:30   ` tip-bot for Peter Zijlstra
  0 siblings, 0 replies; 12+ messages in thread
From: tip-bot for Peter Zijlstra @ 2010-01-29  9:30 UTC (permalink / raw)
  To: linux-tip-commits
  Cc: linux-kernel, acme, paulus, hpa, mingo, a.p.zijlstra, efault,
	peterz, fweisbec, tglx, mingo

Commit-ID:  75c9f3284a7ff957829f44baace82406a6354ceb
Gitweb:     http://git.kernel.org/tip/75c9f3284a7ff957829f44baace82406a6354ceb
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Fri, 29 Jan 2010 09:04:26 +0100
Committer:  Ingo Molnar <mingo@elte.hu>
CommitDate: Fri, 29 Jan 2010 09:15:26 +0100

perf_events: Fix sample_period transfer on inherit

One problem with frequency driven counters is that we cannot
predict the rate at which they trigger, therefore we have to
start them at period=1, this causes a ramp up effect. However,
if we fail to propagate the stable state on fork each new child
will have to ramp up again. This can lead to significant
artifacts in sample data.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: eranian@google.com
Cc: Mike Galbraith <efault@gmx.de>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
LKML-Reference: <1264752266.4283.2121.camel@laptop>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 kernel/perf_event.c |   11 +++++++++--
 1 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index 251fb95..53dc2a3 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -5002,8 +5002,15 @@ inherit_event(struct perf_event *parent_event,
 	else
 		child_event->state = PERF_EVENT_STATE_OFF;
 
-	if (parent_event->attr.freq)
-		child_event->hw.sample_period = parent_event->hw.sample_period;
+	if (parent_event->attr.freq) {
+		u64 sample_period = parent_event->hw.sample_period;
+		struct hw_perf_event *hwc = &child_event->hw;
+
+		hwc->sample_period = sample_period;
+		hwc->last_period   = sample_period;
+
+		atomic64_set(&hwc->period_left, sample_period);
+	}
 
 	child_event->overflow_handler = parent_event->overflow_handler;
 

^ permalink raw reply related	[flat|nested] 12+ messages in thread

end of thread, other threads:[~2010-01-29  9:31 UTC | newest]

Thread overview: 12+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-01-27 22:07 [PATCH 0/4] perf x86 fixes Peter Zijlstra
2010-01-27 22:07 ` [PATCH 1/4] perf_events: x86: Fix event constraint masks Peter Zijlstra
2010-01-29  9:29   ` [tip:perf/core] perf_events, " tip-bot for Peter Zijlstra
2010-01-27 22:07 ` [PATCH 2/4] perf_events: x86: Clean up hw_perf_*_all() implementation Peter Zijlstra
2010-01-29  9:30   ` [tip:perf/core] perf_events, " tip-bot for Peter Zijlstra
2010-01-27 22:07 ` [PATCH 3/4] perf_events: x86: Implement westmere support Peter Zijlstra
2010-01-29  9:30   ` [tip:perf/core] perf_events, x86: Implement Intel Westmere support tip-bot for Peter Zijlstra
2010-01-27 22:07 ` [PATCH 4/4] perf_events: x86: Remove spurious counter reset from x86_pmu_enable() Peter Zijlstra
2010-01-29  9:30   ` [tip:perf/core] perf_events, " tip-bot for Peter Zijlstra
2010-01-29  7:55 ` [PATCH 0/4] perf x86 fixes Ingo Molnar
2010-01-29  8:04 ` [PATCH 5/4] perf_events: Fix sample_period transfer on inherit Peter Zijlstra
2010-01-29  9:30   ` [tip:perf/core] " tip-bot for Peter Zijlstra

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.