public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [PATCH 1/2] perf-events: Add support for supplementary event registers v2
@ 2010-11-12 16:55 Andi Kleen
  2010-11-12 16:55 ` [PATCH 2/2] perf-events: Fix LLC-* events on Intel Nehalem/Westmere v2 Andi Kleen
                   ` (3 more replies)
  0 siblings, 4 replies; 19+ messages in thread
From: Andi Kleen @ 2010-11-12 16:55 UTC (permalink / raw)
  To: a.p.zijlstra; +Cc: eranian, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
that can be used to monitor any offcore accesses from a core.
This is a very useful event for various tunings, and it's
also needed to implement the generic LLC-* events correctly.

Unfortunately this event requires programming a mask in a separate
register. And worse this separate register is per core, not per
CPU thread.

This patch adds:
- Teaches perf_events that OFFCORE_RESPONSE need extra parameters.
The extra parameters are passed by user space in the unused upper
32bits of the config word.
- Add support to the Intel perf_event core to schedule the per
core resource. I tried to add generic infrastructure for this
that could be also used for other core resources.
The basic code has is patterned after the similar AMD northbridge
constraints code.

Thanks to Stephane Eranian who pointed out some problems
in the original version and suggested improvements.

Full git tree:
git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc-2.6.git perf-offcore2
Cc: eranian@google.com
v2: Lots of updates based on review feedback. Also fixes some issues
Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.c       |   70 ++++++++++++++
 arch/x86/kernel/cpu/perf_event_intel.c |  159 ++++++++++++++++++++++++++++++++
 include/linux/perf_event.h             |    2 +
 3 files changed, 231 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index ed63101..346006a 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -93,6 +93,8 @@ struct amd_nb {
 	struct event_constraint event_constraints[X86_PMC_IDX_MAX];
 };
 
+struct intel_percore;
+
 #define MAX_LBR_ENTRIES		16
 
 struct cpu_hw_events {
@@ -127,6 +129,13 @@ struct cpu_hw_events {
 	struct perf_branch_stack	lbr_stack;
 	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
 
+	/* 
+	 * Intel percore register state.
+	 * Coordinate shared resources between HT threads.
+	 */
+	int 				percore_used; /* Used by this CPU? */
+	struct intel_percore		*per_core;
+
 	/*
 	 * AMD specific bits
 	 */
@@ -175,6 +184,32 @@ struct cpu_hw_events {
 #define for_each_event_constraint(e, c)	\
 	for ((e) = (c); (e)->weight; (e)++)
 
+/*
+ * Extra registers for specific events.
+ * Some events need large masks and require external MSRs.
+ * Define a mapping to these extra registers.
+ * The actual contents are still encoded in unused parts of the
+ * original config u64.
+ */
+struct extra_reg {
+	unsigned int		event;
+	unsigned int		msr;
+	unsigned int		extra_shift;
+	u64 			config_mask;
+	u64 			valid_mask;
+};
+
+#define EVENT_EXTRA_REG(e, ms, m, vm, es) {	\
+	.event = (e),    	\
+	.msr = (ms),	     	\
+	.config_mask = (m),  	\
+	.valid_mask = (vm),	\
+	.extra_shift = (es),	\
+	}
+#define INTEL_EVENT_EXTRA_REG(event, msr, vm, es)	\
+	EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, es)
+#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, 0)
+
 union perf_capabilities {
 	struct {
 		u64	lbr_format    : 6;
@@ -219,6 +254,7 @@ struct x86_pmu {
 	void		(*put_event_constraints)(struct cpu_hw_events *cpuc,
 						 struct perf_event *event);
 	struct event_constraint *event_constraints;
+	struct event_constraint *percore_constraints;
 	void		(*quirks)(void);
 	int		perfctr_second_write;
 
@@ -247,6 +283,11 @@ struct x86_pmu {
 	 */
 	unsigned long	lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
 	int		lbr_nr;			   /* hardware stack size */
+
+	/*
+	 * Extra registers for events
+	 */
+	struct extra_reg *extra_regs;
 };
 
 static struct x86_pmu x86_pmu __read_mostly;
@@ -321,6 +362,33 @@ again:
 	return new_raw_count;
 }
 
+/*
+ * Find and validate any extra registers to set up.
+ */
+static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
+{
+	struct extra_reg *er;
+	u64 extra;
+
+	event->hw.extra_reg = 0;
+	event->hw.extra_config = 0;
+
+	if (!x86_pmu.extra_regs)
+		return 0;
+
+	for (er = x86_pmu.extra_regs; er->msr; er++) {
+		if (er->event != (config & er->config_mask))
+			continue;
+		event->hw.extra_reg = er->msr;
+		extra = config >> er->extra_shift;
+		if (extra & ~er->valid_mask)
+			return -EINVAL;
+		event->hw.extra_config = extra;
+		break;
+	}
+	return 0;
+}
+
 static atomic_t active_events;
 static DEFINE_MUTEX(pmc_reserve_mutex);
 
@@ -876,6 +944,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
 					  u64 enable_mask)
 {
 	wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
+	if (hwc->extra_reg)
+		wrmsrl(hwc->extra_reg, hwc->extra_config);
 }
 
 static inline void x86_pmu_disable_event(struct perf_event *event)
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index c8f5c08..a84de7e 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1,5 +1,20 @@
 #ifdef CONFIG_CPU_SUP_INTEL
 
+/* 
+ * Per core state
+ * This used to coordinate shared resources for HT threads.
+ * Exists per CPU, but only the entry for the first CPU
+ * in the core is used.
+ */
+struct intel_percore {
+	raw_spinlock_t 		lock;		/* protect structure */
+	int 			ref;		/* reference count */
+	u64 			config;	        /* main counter config */
+	unsigned int 		extra_reg;	/* extra MSR number */
+	u64 			extra_config;	/* extra MSR config */
+};
+static struct intel_percore __percpu *intel_percore;
+
 /*
  * Intel PerfMon, used on Core and later.
  */
@@ -64,6 +79,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
+static struct extra_reg intel_nehalem_extra_regs[] =
+{
+	INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff, 32), /* OFFCORE_RESPONSE_0 */
+	EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_nehalem_percore_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0xb7, 0),
+	EVENT_CONSTRAINT_END
+};
+
 static struct event_constraint intel_westmere_event_constraints[] =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -76,6 +103,20 @@ static struct event_constraint intel_westmere_event_constraints[] =
 	EVENT_CONSTRAINT_END
 };
 
+static struct extra_reg intel_westmere_extra_regs[] =
+{
+	INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff, 32), /* OFFCORE_RESPONSE_0 */
+	INTEL_EVENT_EXTRA_REG(0xbb, 0x1a7, 0xffff, 32), /* OFFCORE_RESPONSE_1 */
+	EVENT_EXTRA_END
+};
+
+static struct event_constraint intel_westmere_percore_constraints[] =
+{
+	INTEL_EVENT_CONSTRAINT(0xb7, 0),
+	INTEL_EVENT_CONSTRAINT(0xbb, 0),
+	EVENT_CONSTRAINT_END
+};
+
 static struct event_constraint intel_gen_event_constraints[] =
 {
 	FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
@@ -794,6 +835,56 @@ intel_bts_constraints(struct perf_event *event)
 }
 
 static struct event_constraint *
+intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
+{
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
+	struct event_constraint *c;
+	struct intel_percore *pc;
+
+	if (!x86_pmu.percore_constraints)
+		return NULL;
+
+	for (c = x86_pmu.percore_constraints; c->cmask; c++) { 
+		if (e != c->code)
+			continue;
+
+		c = NULL;
+
+		/*
+		 * Allocate resource per core.
+		 * Currently only one such per core resource can be allocated.
+		 */
+		pc = cpuc->per_core;
+		if (!pc)
+			break;
+		raw_spin_lock(&pc->lock);
+		if (pc->ref > 0) {
+			/* Allow identical settings */
+			if (hwc->config == pc->config &&
+			    hwc->extra_reg == pc->extra_reg &&
+			    hwc->extra_config == pc->extra_config) {
+				pc->ref++;
+				cpuc->percore_used = 1;
+			} else {
+				/* Deny due to conflict */
+				c = &emptyconstraint;
+			}
+		} else {
+			pc->config = hwc->config;
+			pc->extra_reg = hwc->extra_reg;
+			pc->extra_config = hwc->extra_config;
+			pc->ref = 1;
+			cpuc->percore_used = 1;
+		}
+		raw_spin_unlock(&pc->lock);
+		return c;
+	}
+
+	return NULL;
+}
+
+static struct event_constraint *
 intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
 {
 	struct event_constraint *c;
@@ -806,9 +897,38 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
 	if (c)
 		return c;
 
+	c = intel_percore_constraints(cpuc, event);
+	if (c)
+		return c;
+
 	return x86_get_event_constraints(cpuc, event);
 }
 
+static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
+					struct perf_event *event)
+{
+	struct event_constraint *c;
+	struct intel_percore *pc;
+	struct hw_perf_event *hwc = &event->hw;
+	unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
+
+	if (!cpuc->percore_used)
+		return;
+
+	for (c = x86_pmu.percore_constraints; c->cmask; c++) { 
+		if (e != c->code)
+			continue;
+
+		pc = cpuc->per_core;
+		raw_spin_lock(&pc->lock);
+		pc->ref--;
+		BUG_ON(pc->ref < 0);
+		raw_spin_unlock(&pc->lock);
+		cpuc->percore_used = 0;
+		break;
+	}
+}
+
 static int intel_pmu_hw_config(struct perf_event *event)
 {
 	int ret = x86_pmu_hw_config(event);
@@ -854,6 +974,7 @@ static __initconst const struct x86_pmu core_pmu = {
 	 */
 	.max_period		= (1ULL << 31) - 1,
 	.get_event_constraints	= intel_get_event_constraints,
+	.put_event_constraints	= intel_put_event_constraints,
 	.event_constraints	= intel_core_event_constraints,
 };
 
@@ -892,6 +1013,7 @@ static __initconst const struct x86_pmu intel_pmu = {
 	 */
 	.max_period		= (1ULL << 31) - 1,
 	.get_event_constraints	= intel_get_event_constraints,
+	.put_event_constraints	= intel_put_event_constraints,
 
 	.cpu_starting		= intel_pmu_cpu_starting,
 	.cpu_dying		= intel_pmu_cpu_dying,
@@ -923,6 +1045,35 @@ static void intel_clovertown_quirks(void)
 	x86_pmu.pebs_constraints = NULL;
 }
 
+static __initdata int needs_percore = 1; /* CHANGEME */
+
+static __init int init_intel_percore(void)
+{
+	int cpu;
+
+	if (!needs_percore)
+		return 0;
+
+	intel_percore = alloc_percpu(struct intel_percore);
+	if (!intel_percore)
+		return -ENOMEM;
+
+	for_each_possible_cpu(cpu) {
+		raw_spin_lock_init(&per_cpu_ptr(intel_percore, cpu)->lock);
+		per_cpu(cpu_hw_events, cpu).per_core =
+			per_cpu_ptr(intel_percore,
+			    cpumask_first(topology_thread_cpumask(cpu)));
+		printk("cpu %d core %d\n", 
+		       cpu, cpumask_first(topology_thread_cpumask(cpu)));
+	}
+
+	return 0;
+}
+/* 
+ * Runs later because per cpu allocations don't work early on.
+ */
+__initcall(init_intel_percore);
+
 static __init int intel_pmu_init(void)
 {
 	union cpuid10_edx edx;
@@ -1010,7 +1161,11 @@ static __init int intel_pmu_init(void)
 		intel_pmu_lbr_init_nhm();
 
 		x86_pmu.event_constraints = intel_nehalem_event_constraints;
+		x86_pmu.percore_constraints =
+			intel_nehalem_percore_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+		x86_pmu.extra_regs = intel_nehalem_extra_regs;
+		needs_percore = 1;
 		pr_cont("Nehalem events, ");
 		break;
 
@@ -1032,7 +1187,11 @@ static __init int intel_pmu_init(void)
 		intel_pmu_lbr_init_nhm();
 
 		x86_pmu.event_constraints = intel_westmere_event_constraints;
+		x86_pmu.percore_constraints =
+			intel_westmere_percore_constraints;
 		x86_pmu.enable_all = intel_pmu_nhm_enable_all;
+		x86_pmu.extra_regs = intel_westmere_extra_regs;
+		needs_percore = 1;
 		pr_cont("Westmere events, ");
 		break;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 057bf22..6124e93 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -529,6 +529,8 @@ struct hw_perf_event {
 			unsigned long	event_base;
 			int		idx;
 			int		last_cpu;
+			unsigned int	extra_reg;
+			u64		extra_config;
 		};
 		struct { /* software */
 			struct hrtimer	hrtimer;
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* [PATCH 2/2] perf-events: Fix LLC-* events on Intel Nehalem/Westmere v2
  2010-11-12 16:55 [PATCH 1/2] perf-events: Add support for supplementary event registers v2 Andi Kleen
@ 2010-11-12 16:55 ` Andi Kleen
  2010-11-12 17:17 ` [PATCH 1/2] perf-events: Add support for supplementary event registers v2 Peter Zijlstra
                   ` (2 subsequent siblings)
  3 siblings, 0 replies; 19+ messages in thread
From: Andi Kleen @ 2010-11-12 16:55 UTC (permalink / raw)
  To: a.p.zijlstra; +Cc: eranian, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

From: Andi Kleen <ak@linux.intel.com>

The generic perf LLC-* events do count the L2 caches, not the real
L3 LLC on Intel Nehalem and Westmere. This lead to quite some confusion.

Fixing this properly requires use of the special OFFCORE_RESPONSE
events which need a separate mask register. This has been implemented
in a earlier patch.

Now use this infrastructure to set correct events for the LLC-*
on Nehalem and Westmere

Signed-off-by: Andi Kleen <ak@linux.intel.com>
---
 arch/x86/kernel/cpu/perf_event.c       |   12 ++++----
 arch/x86/kernel/cpu/perf_event_intel.c |   48 ++++++++++++++++++++++---------
 2 files changed, 40 insertions(+), 20 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
index 346006a..b1abe89 100644
--- a/arch/x86/kernel/cpu/perf_event.c
+++ b/arch/x86/kernel/cpu/perf_event.c
@@ -467,8 +467,9 @@ static inline int x86_pmu_initialized(void)
 }
 
 static inline int
-set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
+set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event *event)
 {
+	struct perf_event_attr *attr = &event->attr;
 	unsigned int cache_type, cache_op, cache_result;
 	u64 config, val;
 
@@ -494,9 +495,8 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr)
 	if (val == -1)
 		return -EINVAL;
 
-	hwc->config |= val;
-
-	return 0;
+	hwc->config |= val & X86_RAW_EVENT_MASK;
+	return x86_pmu_extra_regs(val, event);
 }
 
 static int x86_setup_perfctr(struct perf_event *event)
@@ -521,10 +521,10 @@ static int x86_setup_perfctr(struct perf_event *event)
 	}
 
 	if (attr->type == PERF_TYPE_RAW)
-		return 0;
+		return x86_pmu_extra_regs(event->attr.config, event);
 
 	if (attr->type == PERF_TYPE_HW_CACHE)
-		return set_ext_hw_attr(hwc, attr);
+		return set_ext_hw_attr(hwc, event);
 
 	if (attr->config >= x86_pmu.max_events)
 		return -EINVAL;
diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index a84de7e..2ed1e9d 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -165,16 +165,26 @@ static __initconst const u64 westmere_hw_cache_event_ids
  },
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
-	},
+		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x00000711004301b7,
+		/* OFFCORE_RESPONSE_0.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x0000f811004301b7, 
+	},
+	/* 
+	 * Use RFO, not WRITEBACK, because a write miss would typically occur 
+	 * on RFO. 
+	 */
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+		/* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x00000722004301b7,
+		/* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x0000f822004301b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x00000770004301b7,
+		/* OFFCORE_RESPONSE_0.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x0000f870004301b7,
 	},
  },
  [ C(DTLB) ] = {
@@ -256,16 +266,26 @@ static __initconst const u64 nehalem_hw_cache_event_ids
  },
  [ C(LL  ) ] = {
 	[ C(OP_READ) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS               */
-		[ C(RESULT_MISS)   ] = 0x0224, /* L2_RQSTS.LD_MISS             */
-	},
+		/* OFFCORE_RESPONSE_0.ANY_DATA.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x00000711004301b7,
+		/* OFFCORE_RESPONSE_0.ANY_DATA.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x0000f811004301b7, 
+	},
+	/* 
+	 * Use RFO, not WRITEBACK, because a write miss would typically occur 
+	 * on RFO. 
+	 */
 	[ C(OP_WRITE) ] = {
-		[ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS                */
-		[ C(RESULT_MISS)   ] = 0x0824, /* L2_RQSTS.RFO_MISS            */
+		/* OFFCORE_RESPONSE_0.ANY_RFO.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x00000722004301b7,
+		/* OFFCORE_RESPONSE_0.ANY_RFO.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x0000f822004301b7,
 	},
 	[ C(OP_PREFETCH) ] = {
-		[ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference                */
-		[ C(RESULT_MISS)   ] = 0x412e, /* LLC Misses                   */
+		/* OFFCORE_RESPONSE_0.PREFETCH.LOCAL_CACHE */
+		[ C(RESULT_ACCESS) ] = 0x00000770004301b7,
+		/* OFFCORE_RESPONSE_0.PREFETCH.ANY_LLC_MISS */
+		[ C(RESULT_MISS)   ] = 0x0000f870004301b7,
 	},
  },
  [ C(DTLB) ] = {
-- 
1.7.1


^ permalink raw reply related	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-12 16:55 [PATCH 1/2] perf-events: Add support for supplementary event registers v2 Andi Kleen
  2010-11-12 16:55 ` [PATCH 2/2] perf-events: Fix LLC-* events on Intel Nehalem/Westmere v2 Andi Kleen
@ 2010-11-12 17:17 ` Peter Zijlstra
  2010-11-12 17:17 ` Stephane Eranian
  2010-11-12 17:23 ` Peter Zijlstra
  3 siblings, 0 replies; 19+ messages in thread
From: Peter Zijlstra @ 2010-11-12 17:17 UTC (permalink / raw)
  To: Andi Kleen; +Cc: eranian, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Fri, 2010-11-12 at 17:55 +0100, Andi Kleen wrote:

> @@ -127,6 +129,13 @@ struct cpu_hw_events {
>  	struct perf_branch_stack	lbr_stack;
>  	struct perf_branch_entry	lbr_entries[MAX_LBR_ENTRIES];
>  
> +	/* 
> +	 * Intel percore register state.
> +	 * Coordinate shared resources between HT threads.
> +	 */
> +	int 				percore_used; /* Used by this CPU? */
> +	struct intel_percore		*per_core;
> +
>  	/*
>  	 * AMD specific bits
>  	 */

> +/* 
> + * Per core state
> + * This used to coordinate shared resources for HT threads.
> + * Exists per CPU, but only the entry for the first CPU
> + * in the core is used.
> + */
> +struct intel_percore {
> +	raw_spinlock_t 		lock;		/* protect structure */
> +	int 			ref;		/* reference count */
> +	u64 			config;	        /* main counter config */
> +	unsigned int 		extra_reg;	/* extra MSR number */
> +	u64 			extra_config;	/* extra MSR config */
> +};
> +static struct intel_percore __percpu *intel_percore;

Why have this pointer?

> @@ -923,6 +1045,35 @@ static void intel_clovertown_quirks(void)
>  	x86_pmu.pebs_constraints = NULL;
>  }
>  
> +static __initdata int needs_percore = 1; /* CHANGEME */
> +
> +static __init int init_intel_percore(void)
> +{
> +	int cpu;
> +
> +	if (!needs_percore)
> +		return 0;
> +
> +	intel_percore = alloc_percpu(struct intel_percore);
> +	if (!intel_percore)
> +		return -ENOMEM;
> +
> +	for_each_possible_cpu(cpu) {
> +		raw_spin_lock_init(&per_cpu_ptr(intel_percore, cpu)->lock);
> +		per_cpu(cpu_hw_events, cpu).per_core =
> +			per_cpu_ptr(intel_percore,
> +			    cpumask_first(topology_thread_cpumask(cpu)));
> +		printk("cpu %d core %d\n", 
> +		       cpu, cpumask_first(topology_thread_cpumask(cpu)));
> +	}
> +
> +	return 0;
> +}
> +/* 
> + * Runs later because per cpu allocations don't work early on.
> + */
> +__initcall(init_intel_percore);

they should, perf is initialized from sched_init() [ I really should
move it into init/main.c some time ], which is after mm_init(), which
should be sufficient for all allocators to work.

Oh, wait, I ran into this a few days ago, the arch specific init call
actually runs before the main perf init, which is mightly strange, I
have this hunk (http://lkml.org/lkml/2010/11/9/530):

-void __init init_hw_perf_events(void)
+static int __init init_hw_perf_events(void)
 {
 	struct event_constraint *c;
 	int err;
@@ -1363,11 +1363,11 @@ void __init init_hw_perf_events(void)
 		err = amd_pmu_init();
 		break;
 	default:
-		return;
+		return 0;
 	}
 	if (err != 0) {
 		pr_cont("no PMU driver, software events only.\n");
-		return;
+		return 0;
 	}
 
 	pmu_check_apic();
@@ -1418,9 +1418,12 @@ void __init init_hw_perf_events(void)
 	pr_info("... fixed-purpose events:   %d\n",     x86_pmu.num_counters_fixed);
 	pr_info("... event mask:             %016Lx\n", x86_pmu.intel_ctrl);
 
	perf_pmu_register(&pmu);
 	perf_cpu_notifier(x86_pmu_notifier);
+
+	return 0;
 }
+early_initcall(init_hw_perf_events);

To cure that.


That said, why not simply:
  kmalloc_node(sizeof(struct intel_percore)), GFP_KERNEL | __GFP_ZERO, 
	       cpu_to_node(cpu))

For each core, just like amd_alloc_nb() does. The amd code even frees
the allocation on cpu-hot-unplug of after all relevant cpus are gone.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-12 16:55 [PATCH 1/2] perf-events: Add support for supplementary event registers v2 Andi Kleen
  2010-11-12 16:55 ` [PATCH 2/2] perf-events: Fix LLC-* events on Intel Nehalem/Westmere v2 Andi Kleen
  2010-11-12 17:17 ` [PATCH 1/2] perf-events: Add support for supplementary event registers v2 Peter Zijlstra
@ 2010-11-12 17:17 ` Stephane Eranian
  2010-11-12 17:33   ` Peter Zijlstra
  2010-11-12 17:23 ` Peter Zijlstra
  3 siblings, 1 reply; 19+ messages in thread
From: Stephane Eranian @ 2010-11-12 17:17 UTC (permalink / raw)
  To: Andi Kleen
  Cc: a.p.zijlstra, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

I looked at this patch thinking how could this be reused for LBR_SELECT.

I am wondering if storing the extra MSR value in attr.config is really the way
to go now as opposed to adding/overloading a field.

For OFFCORE_RESPONSE, it makes sense to use attr.config because this is
a refinement of the event (a sub-sub event if you want).

For LBR_SELECT, you also need to pass a value, but there is no specific event
associated with it. You can enable LBR with any event you want. Thus,
storing the
LBR_SELECT value independently would also make sense. And if we have this
field for LBR_SELECT then we may as well use it for OFFCORE_REPONSE.

The alternative would be to consider LBR_SELECT also as a refinement of
the event being measured. Though by itself, it wouldn't do anything, it would
have to be combine with a PERF_SAMPLE_BRANCH_STACK to attr.sample_type.

What do you think?

On Fri, Nov 12, 2010 at 5:55 PM, Andi Kleen <andi@firstfloor.org> wrote:
> From: Andi Kleen <ak@linux.intel.com>
>
> Intel Nehalem/Westmere have a special OFFCORE_RESPONSE event
> that can be used to monitor any offcore accesses from a core.
> This is a very useful event for various tunings, and it's
> also needed to implement the generic LLC-* events correctly.
>
> Unfortunately this event requires programming a mask in a separate
> register. And worse this separate register is per core, not per
> CPU thread.
>
> This patch adds:
> - Teaches perf_events that OFFCORE_RESPONSE need extra parameters.
> The extra parameters are passed by user space in the unused upper
> 32bits of the config word.
> - Add support to the Intel perf_event core to schedule the per
> core resource. I tried to add generic infrastructure for this
> that could be also used for other core resources.
> The basic code has is patterned after the similar AMD northbridge
> constraints code.
>
> Thanks to Stephane Eranian who pointed out some problems
> in the original version and suggested improvements.
>
> Full git tree:
> git://git.kernel.org/pub/scm/linux/kernel/git/ak/linux-misc-2.6.git perf-offcore2
> Cc: eranian@google.com
> v2: Lots of updates based on review feedback. Also fixes some issues
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event.c       |   70 ++++++++++++++
>  arch/x86/kernel/cpu/perf_event_intel.c |  159 ++++++++++++++++++++++++++++++++
>  include/linux/perf_event.h             |    2 +
>  3 files changed, 231 insertions(+), 0 deletions(-)
>
> diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c
> index ed63101..346006a 100644
> --- a/arch/x86/kernel/cpu/perf_event.c
> +++ b/arch/x86/kernel/cpu/perf_event.c
> @@ -93,6 +93,8 @@ struct amd_nb {
>        struct event_constraint event_constraints[X86_PMC_IDX_MAX];
>  };
>
> +struct intel_percore;
> +
>  #define MAX_LBR_ENTRIES                16
>
>  struct cpu_hw_events {
> @@ -127,6 +129,13 @@ struct cpu_hw_events {
>        struct perf_branch_stack        lbr_stack;
>        struct perf_branch_entry        lbr_entries[MAX_LBR_ENTRIES];
>
> +       /*
> +        * Intel percore register state.
> +        * Coordinate shared resources between HT threads.
> +        */
> +       int                             percore_used; /* Used by this CPU? */
> +       struct intel_percore            *per_core;
> +
>        /*
>         * AMD specific bits
>         */
> @@ -175,6 +184,32 @@ struct cpu_hw_events {
>  #define for_each_event_constraint(e, c)        \
>        for ((e) = (c); (e)->weight; (e)++)
>
> +/*
> + * Extra registers for specific events.
> + * Some events need large masks and require external MSRs.
> + * Define a mapping to these extra registers.
> + * The actual contents are still encoded in unused parts of the
> + * original config u64.
> + */
> +struct extra_reg {
> +       unsigned int            event;
> +       unsigned int            msr;
> +       unsigned int            extra_shift;
> +       u64                     config_mask;
> +       u64                     valid_mask;
> +};
> +
> +#define EVENT_EXTRA_REG(e, ms, m, vm, es) {    \
> +       .event = (e),           \
> +       .msr = (ms),            \
> +       .config_mask = (m),     \
> +       .valid_mask = (vm),     \
> +       .extra_shift = (es),    \
> +       }
> +#define INTEL_EVENT_EXTRA_REG(event, msr, vm, es)      \
> +       EVENT_EXTRA_REG(event, msr, ARCH_PERFMON_EVENTSEL_EVENT, vm, es)
> +#define EVENT_EXTRA_END EVENT_EXTRA_REG(0, 0, 0, 0, 0)
> +
>  union perf_capabilities {
>        struct {
>                u64     lbr_format    : 6;
> @@ -219,6 +254,7 @@ struct x86_pmu {
>        void            (*put_event_constraints)(struct cpu_hw_events *cpuc,
>                                                 struct perf_event *event);
>        struct event_constraint *event_constraints;
> +       struct event_constraint *percore_constraints;
>        void            (*quirks)(void);
>        int             perfctr_second_write;
>
> @@ -247,6 +283,11 @@ struct x86_pmu {
>         */
>        unsigned long   lbr_tos, lbr_from, lbr_to; /* MSR base regs       */
>        int             lbr_nr;                    /* hardware stack size */
> +
> +       /*
> +        * Extra registers for events
> +        */
> +       struct extra_reg *extra_regs;
>  };
>
>  static struct x86_pmu x86_pmu __read_mostly;
> @@ -321,6 +362,33 @@ again:
>        return new_raw_count;
>  }
>
> +/*
> + * Find and validate any extra registers to set up.
> + */
> +static int x86_pmu_extra_regs(u64 config, struct perf_event *event)
> +{
> +       struct extra_reg *er;
> +       u64 extra;
> +
> +       event->hw.extra_reg = 0;
> +       event->hw.extra_config = 0;
> +
> +       if (!x86_pmu.extra_regs)
> +               return 0;
> +
> +       for (er = x86_pmu.extra_regs; er->msr; er++) {
> +               if (er->event != (config & er->config_mask))
> +                       continue;
> +               event->hw.extra_reg = er->msr;
> +               extra = config >> er->extra_shift;
> +               if (extra & ~er->valid_mask)
> +                       return -EINVAL;
> +               event->hw.extra_config = extra;
> +               break;
> +       }
> +       return 0;
> +}
> +
>  static atomic_t active_events;
>  static DEFINE_MUTEX(pmc_reserve_mutex);
>
> @@ -876,6 +944,8 @@ static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc,
>                                          u64 enable_mask)
>  {
>        wrmsrl(hwc->config_base + hwc->idx, hwc->config | enable_mask);
> +       if (hwc->extra_reg)
> +               wrmsrl(hwc->extra_reg, hwc->extra_config);
>  }
>
>  static inline void x86_pmu_disable_event(struct perf_event *event)
> diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
> index c8f5c08..a84de7e 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel.c
> @@ -1,5 +1,20 @@
>  #ifdef CONFIG_CPU_SUP_INTEL
>
> +/*
> + * Per core state
> + * This used to coordinate shared resources for HT threads.
> + * Exists per CPU, but only the entry for the first CPU
> + * in the core is used.
> + */
> +struct intel_percore {
> +       raw_spinlock_t          lock;           /* protect structure */
> +       int                     ref;            /* reference count */
> +       u64                     config;         /* main counter config */
> +       unsigned int            extra_reg;      /* extra MSR number */
> +       u64                     extra_config;   /* extra MSR config */
> +};
> +static struct intel_percore __percpu *intel_percore;
> +
>  /*
>  * Intel PerfMon, used on Core and later.
>  */
> @@ -64,6 +79,18 @@ static struct event_constraint intel_nehalem_event_constraints[] =
>        EVENT_CONSTRAINT_END
>  };
>
> +static struct extra_reg intel_nehalem_extra_regs[] =
> +{
> +       INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff, 32), /* OFFCORE_RESPONSE_0 */
> +       EVENT_EXTRA_END
> +};
> +
> +static struct event_constraint intel_nehalem_percore_constraints[] =
> +{
> +       INTEL_EVENT_CONSTRAINT(0xb7, 0),
> +       EVENT_CONSTRAINT_END
> +};
> +
>  static struct event_constraint intel_westmere_event_constraints[] =
>  {
>        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> @@ -76,6 +103,20 @@ static struct event_constraint intel_westmere_event_constraints[] =
>        EVENT_CONSTRAINT_END
>  };
>
> +static struct extra_reg intel_westmere_extra_regs[] =
> +{
> +       INTEL_EVENT_EXTRA_REG(0xb7, 0x1a6, 0xffff, 32), /* OFFCORE_RESPONSE_0 */
> +       INTEL_EVENT_EXTRA_REG(0xbb, 0x1a7, 0xffff, 32), /* OFFCORE_RESPONSE_1 */
> +       EVENT_EXTRA_END
> +};
> +
> +static struct event_constraint intel_westmere_percore_constraints[] =
> +{
> +       INTEL_EVENT_CONSTRAINT(0xb7, 0),
> +       INTEL_EVENT_CONSTRAINT(0xbb, 0),
> +       EVENT_CONSTRAINT_END
> +};
> +
>  static struct event_constraint intel_gen_event_constraints[] =
>  {
>        FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */
> @@ -794,6 +835,56 @@ intel_bts_constraints(struct perf_event *event)
>  }
>
>  static struct event_constraint *
> +intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
> +       struct event_constraint *c;
> +       struct intel_percore *pc;
> +
> +       if (!x86_pmu.percore_constraints)
> +               return NULL;
> +
> +       for (c = x86_pmu.percore_constraints; c->cmask; c++) {
> +               if (e != c->code)
> +                       continue;
> +
> +               c = NULL;
> +
> +               /*
> +                * Allocate resource per core.
> +                * Currently only one such per core resource can be allocated.
> +                */
> +               pc = cpuc->per_core;
> +               if (!pc)
> +                       break;
> +               raw_spin_lock(&pc->lock);
> +               if (pc->ref > 0) {
> +                       /* Allow identical settings */
> +                       if (hwc->config == pc->config &&
> +                           hwc->extra_reg == pc->extra_reg &&
> +                           hwc->extra_config == pc->extra_config) {
> +                               pc->ref++;
> +                               cpuc->percore_used = 1;
> +                       } else {
> +                               /* Deny due to conflict */
> +                               c = &emptyconstraint;
> +                       }
> +               } else {
> +                       pc->config = hwc->config;
> +                       pc->extra_reg = hwc->extra_reg;
> +                       pc->extra_config = hwc->extra_config;
> +                       pc->ref = 1;
> +                       cpuc->percore_used = 1;
> +               }
> +               raw_spin_unlock(&pc->lock);
> +               return c;
> +       }
> +
> +       return NULL;
> +}
> +
> +static struct event_constraint *
>  intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
>  {
>        struct event_constraint *c;
> @@ -806,9 +897,38 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event
>        if (c)
>                return c;
>
> +       c = intel_percore_constraints(cpuc, event);
> +       if (c)
> +               return c;
> +
>        return x86_get_event_constraints(cpuc, event);
>  }
>
> +static void intel_put_event_constraints(struct cpu_hw_events *cpuc,
> +                                       struct perf_event *event)
> +{
> +       struct event_constraint *c;
> +       struct intel_percore *pc;
> +       struct hw_perf_event *hwc = &event->hw;
> +       unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
> +
> +       if (!cpuc->percore_used)
> +               return;
> +
> +       for (c = x86_pmu.percore_constraints; c->cmask; c++) {
> +               if (e != c->code)
> +                       continue;
> +
> +               pc = cpuc->per_core;
> +               raw_spin_lock(&pc->lock);
> +               pc->ref--;
> +               BUG_ON(pc->ref < 0);
> +               raw_spin_unlock(&pc->lock);
> +               cpuc->percore_used = 0;
> +               break;
> +       }
> +}
> +
>  static int intel_pmu_hw_config(struct perf_event *event)
>  {
>        int ret = x86_pmu_hw_config(event);
> @@ -854,6 +974,7 @@ static __initconst const struct x86_pmu core_pmu = {
>         */
>        .max_period             = (1ULL << 31) - 1,
>        .get_event_constraints  = intel_get_event_constraints,
> +       .put_event_constraints  = intel_put_event_constraints,
>        .event_constraints      = intel_core_event_constraints,
>  };
>
> @@ -892,6 +1013,7 @@ static __initconst const struct x86_pmu intel_pmu = {
>         */
>        .max_period             = (1ULL << 31) - 1,
>        .get_event_constraints  = intel_get_event_constraints,
> +       .put_event_constraints  = intel_put_event_constraints,
>
>        .cpu_starting           = intel_pmu_cpu_starting,
>        .cpu_dying              = intel_pmu_cpu_dying,
> @@ -923,6 +1045,35 @@ static void intel_clovertown_quirks(void)
>        x86_pmu.pebs_constraints = NULL;
>  }
>
> +static __initdata int needs_percore = 1; /* CHANGEME */
> +
> +static __init int init_intel_percore(void)
> +{
> +       int cpu;
> +
> +       if (!needs_percore)
> +               return 0;
> +
> +       intel_percore = alloc_percpu(struct intel_percore);
> +       if (!intel_percore)
> +               return -ENOMEM;
> +
> +       for_each_possible_cpu(cpu) {
> +               raw_spin_lock_init(&per_cpu_ptr(intel_percore, cpu)->lock);
> +               per_cpu(cpu_hw_events, cpu).per_core =
> +                       per_cpu_ptr(intel_percore,
> +                           cpumask_first(topology_thread_cpumask(cpu)));
> +               printk("cpu %d core %d\n",
> +                      cpu, cpumask_first(topology_thread_cpumask(cpu)));
> +       }
> +
> +       return 0;
> +}
> +/*
> + * Runs later because per cpu allocations don't work early on.
> + */
> +__initcall(init_intel_percore);
> +
>  static __init int intel_pmu_init(void)
>  {
>        union cpuid10_edx edx;
> @@ -1010,7 +1161,11 @@ static __init int intel_pmu_init(void)
>                intel_pmu_lbr_init_nhm();
>
>                x86_pmu.event_constraints = intel_nehalem_event_constraints;
> +               x86_pmu.percore_constraints =
> +                       intel_nehalem_percore_constraints;
>                x86_pmu.enable_all = intel_pmu_nhm_enable_all;
> +               x86_pmu.extra_regs = intel_nehalem_extra_regs;
> +               needs_percore = 1;
>                pr_cont("Nehalem events, ");
>                break;
>
> @@ -1032,7 +1187,11 @@ static __init int intel_pmu_init(void)
>                intel_pmu_lbr_init_nhm();
>
>                x86_pmu.event_constraints = intel_westmere_event_constraints;
> +               x86_pmu.percore_constraints =
> +                       intel_westmere_percore_constraints;
>                x86_pmu.enable_all = intel_pmu_nhm_enable_all;
> +               x86_pmu.extra_regs = intel_westmere_extra_regs;
> +               needs_percore = 1;
>                pr_cont("Westmere events, ");
>                break;
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index 057bf22..6124e93 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -529,6 +529,8 @@ struct hw_perf_event {
>                        unsigned long   event_base;
>                        int             idx;
>                        int             last_cpu;
> +                       unsigned int    extra_reg;
> +                       u64             extra_config;
>                };
>                struct { /* software */
>                        struct hrtimer  hrtimer;
> --
> 1.7.1
>
>

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-12 16:55 [PATCH 1/2] perf-events: Add support for supplementary event registers v2 Andi Kleen
                   ` (2 preceding siblings ...)
  2010-11-12 17:17 ` Stephane Eranian
@ 2010-11-12 17:23 ` Peter Zijlstra
  2010-11-13 10:13   ` Andi Kleen
  3 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2010-11-12 17:23 UTC (permalink / raw)
  To: Andi Kleen; +Cc: eranian, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Fri, 2010-11-12 at 17:55 +0100, Andi Kleen wrote:
>  static struct event_constraint *
> +intel_percore_constraints(struct cpu_hw_events *cpuc, struct perf_event *event)
> +{
> +       struct hw_perf_event *hwc = &event->hw;
> +       unsigned int e = hwc->config & ARCH_PERFMON_EVENTSEL_EVENT;
> +       struct event_constraint *c;
> +       struct intel_percore *pc;
> +
> +       if (!x86_pmu.percore_constraints)
> +               return NULL;
> +
> +       for (c = x86_pmu.percore_constraints; c->cmask; c++) { 
> +               if (e != c->code)
> +                       continue;
> +
> +               c = NULL;
> +
> +               /*
> +                * Allocate resource per core.
> +                * Currently only one such per core resource can be allocated.
> +                */
> +               pc = cpuc->per_core;
> +               if (!pc)
> +                       break;
> +               raw_spin_lock(&pc->lock);
> +               if (pc->ref > 0) {
> +                       /* Allow identical settings */
> +                       if (hwc->config == pc->config &&
> +                           hwc->extra_reg == pc->extra_reg &&
> +                           hwc->extra_config == pc->extra_config) {

Hrmm,. this doesn't really scale right wrt multiple extra_regs.

You made the extra_regs thing fairly extensible, but the above doesn't
really work well if there's multiple extra regs (say OFFCORE and
LBR_CONFIG -- possibly even the two OFFCORE regs present on westmere).

It basically needs a per-core state for each extra_reg possible.

> +                               pc->ref++;
> +                               cpuc->percore_used = 1;
> +                       } else {
> +                               /* Deny due to conflict */
> +                               c = &emptyconstraint;
> +                       }
> +               } else {
> +                       pc->config = hwc->config;
> +                       pc->extra_reg = hwc->extra_reg;
> +                       pc->extra_config = hwc->extra_config;
> +                       pc->ref = 1;
> +                       cpuc->percore_used = 1;
> +               }
> +               raw_spin_unlock(&pc->lock);
> +               return c;
> +       }
> +
> +       return NULL;
> +} 

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-12 17:17 ` Stephane Eranian
@ 2010-11-12 17:33   ` Peter Zijlstra
  2010-11-12 21:26     ` Stephane Eranian
  2010-11-13 10:17     ` Andi Kleen
  0 siblings, 2 replies; 19+ messages in thread
From: Peter Zijlstra @ 2010-11-12 17:33 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Fri, 2010-11-12 at 18:17 +0100, Stephane Eranian wrote:
> I looked at this patch thinking how could this be reused for LBR_SELECT.
> 
> I am wondering if storing the extra MSR value in attr.config is really the way
> to go now as opposed to adding/overloading a field.
> 
> For OFFCORE_RESPONSE, it makes sense to use attr.config because this is
> a refinement of the event (a sub-sub event if you want).

Correct, it makes sense for offcore and load-latency, not so much for
lbr_config.

> For LBR_SELECT, you also need to pass a value, but there is no specific event
> associated with it. You can enable LBR with any event you want. Thus,
> storing the
> LBR_SELECT value independently would also make sense. And if we have this
> field for LBR_SELECT then we may as well use it for OFFCORE_REPONSE.

That would assume a single event doesn't contain offcore and lbr, no?
Currently the extra_reg thing assumes there's only one extra reg encoded
in the config word.

> The alternative would be to consider LBR_SELECT also as a refinement of
> the event being measured. Though by itself, it wouldn't do anything, it would
> have to be combine with a PERF_SAMPLE_BRANCH_STACK to attr.sample_type.

A separate field for the lbr_config seems to make most sense, we could
of course use the top 16 bits for lbr, the next 16 for offcore/ll and
the bottom 32 for eventsel, but that's mighty crowded.



^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-12 17:33   ` Peter Zijlstra
@ 2010-11-12 21:26     ` Stephane Eranian
  2010-11-13 10:17     ` Andi Kleen
  1 sibling, 0 replies; 19+ messages in thread
From: Stephane Eranian @ 2010-11-12 21:26 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Fri, Nov 12, 2010 at 6:33 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Fri, 2010-11-12 at 18:17 +0100, Stephane Eranian wrote:
>> I looked at this patch thinking how could this be reused for LBR_SELECT.
>>
>> I am wondering if storing the extra MSR value in attr.config is really the way
>> to go now as opposed to adding/overloading a field.
>>
>> For OFFCORE_RESPONSE, it makes sense to use attr.config because this is
>> a refinement of the event (a sub-sub event if you want).
>
> Correct, it makes sense for offcore and load-latency, not so much for
> lbr_config.
>
Good.

>> For LBR_SELECT, you also need to pass a value, but there is no specific event
>> associated with it. You can enable LBR with any event you want. Thus,
>> storing the
>> LBR_SELECT value independently would also make sense. And if we have this
>> field for LBR_SELECT then we may as well use it for OFFCORE_REPONSE.
>
> That would assume a single event doesn't contain offcore and lbr, no?
> Currently the extra_reg thing assumes there's only one extra reg encoded
> in the config word.
>
Good point. You want to allow simultaneous LBR + OFFCORE, i.e., to figure
out the path that led to the event (with some skid). So those need to be in
separate fields. I would go with offcore in attr.config, lbr somewhere else.
If you do this then you have the issue of how generic in the content of that
extra field.


>> The alternative would be to consider LBR_SELECT also as a refinement of
>> the event being measured. Though by itself, it wouldn't do anything, it would
>> have to be combine with a PERF_SAMPLE_BRANCH_STACK to attr.sample_type.
>
> A separate field for the lbr_config seems to make most sense, we could
> of course use the top 16 bits for lbr, the next 16 for offcore/ll and
> the bottom 32 for eventsel, but that's mighty crowded.
>
Agreed, not such a good idea. I like the separate lbr_config better.
Could be made
more generic, e.g., branch_stack_config. This is how you've named LBR in the
generic code currently.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-12 17:23 ` Peter Zijlstra
@ 2010-11-13 10:13   ` Andi Kleen
  2010-11-13 10:32     ` Peter Zijlstra
  0 siblings, 1 reply; 19+ messages in thread
From: Andi Kleen @ 2010-11-13 10:13 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: eranian, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

> Hrmm,. this doesn't really scale right wrt multiple extra_regs.
> 
> You made the extra_regs thing fairly extensible, but the above doesn't
> really work well if there's multiple extra regs (say OFFCORE and
> LBR_CONFIG -- possibly even the two OFFCORE regs present on westmere).
> 
> It basically needs a per-core state for each extra_reg possible.

It shouldn't be too hard to extend it to use a table. I didn't want
to do that for the first iteration. I guess it could be done as followons
once LBR is implemented.

-Andi

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-12 17:33   ` Peter Zijlstra
  2010-11-12 21:26     ` Stephane Eranian
@ 2010-11-13 10:17     ` Andi Kleen
  2010-11-13 10:34       ` Peter Zijlstra
  2010-11-15 11:00       ` Stephane Eranian
  1 sibling, 2 replies; 19+ messages in thread
From: Andi Kleen @ 2010-11-13 10:17 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Stephane Eranian, linux-kernel, cjashfor, mingo, fweisbec,
	Andi Kleen

> That would assume a single event doesn't contain offcore and lbr, no?
> Currently the extra_reg thing assumes there's only one extra reg encoded
> in the config word.

It would not be too hard to make it an array if that's needed.

Before going fancy I would prefer to include the plain base patch
first though.

-Andi

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-13 10:13   ` Andi Kleen
@ 2010-11-13 10:32     ` Peter Zijlstra
  2010-11-15 11:01       ` Stephane Eranian
  0 siblings, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2010-11-13 10:32 UTC (permalink / raw)
  To: Andi Kleen; +Cc: eranian, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Sat, 2010-11-13 at 11:13 +0100, Andi Kleen wrote:
> > Hrmm,. this doesn't really scale right wrt multiple extra_regs.
> > 
> > You made the extra_regs thing fairly extensible, but the above doesn't
> > really work well if there's multiple extra regs (say OFFCORE and
> > LBR_CONFIG -- possibly even the two OFFCORE regs present on westmere).
> > 
> > It basically needs a per-core state for each extra_reg possible.
> 
> It shouldn't be too hard to extend it to use a table. I didn't want
> to do that for the first iteration. I guess it could be done as followons
> once LBR is implemented.

Isn't it also relevant for the two offcore regs for wsm?

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-13 10:17     ` Andi Kleen
@ 2010-11-13 10:34       ` Peter Zijlstra
  2010-11-15 11:03         ` Stephane Eranian
  2010-11-15 11:00       ` Stephane Eranian
  1 sibling, 1 reply; 19+ messages in thread
From: Peter Zijlstra @ 2010-11-13 10:34 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Stephane Eranian, linux-kernel, cjashfor, mingo, fweisbec,
	Andi Kleen

On Sat, 2010-11-13 at 11:17 +0100, Andi Kleen wrote:
> > That would assume a single event doesn't contain offcore and lbr, no?
> > Currently the extra_reg thing assumes there's only one extra reg encoded
> > in the config word.
> 
> It would not be too hard to make it an array if that's needed.

I don't think we want to go there anyway, one extra_reg per config value
is fine.


^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-13 10:17     ` Andi Kleen
  2010-11-13 10:34       ` Peter Zijlstra
@ 2010-11-15 11:00       ` Stephane Eranian
  1 sibling, 0 replies; 19+ messages in thread
From: Stephane Eranian @ 2010-11-15 11:00 UTC (permalink / raw)
  To: Andi Kleen
  Cc: Peter Zijlstra, linux-kernel, cjashfor, mingo, fweisbec,
	Andi Kleen

On Sat, Nov 13, 2010 at 11:17 AM, Andi Kleen <andi@firstfloor.org> wrote:
>
> > That would assume a single event doesn't contain offcore and lbr, no?
> > Currently the extra_reg thing assumes there's only one extra reg encoded
> > in the config word.
>
> It would not be too hard to make it an array if that's needed.
>
> Before going fancy I would prefer to include the plain base patch
> first though.
>
I agree. Let's do offcore first.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-13 10:32     ` Peter Zijlstra
@ 2010-11-15 11:01       ` Stephane Eranian
  2010-11-15 11:06         ` Peter Zijlstra
  0 siblings, 1 reply; 19+ messages in thread
From: Stephane Eranian @ 2010-11-15 11:01 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Sat, Nov 13, 2010 at 11:32 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Sat, 2010-11-13 at 11:13 +0100, Andi Kleen wrote:
>> > Hrmm,. this doesn't really scale right wrt multiple extra_regs.
>> >
>> > You made the extra_regs thing fairly extensible, but the above doesn't
>> > really work well if there's multiple extra regs (say OFFCORE and
>> > LBR_CONFIG -- possibly even the two OFFCORE regs present on westmere).
>> >
>> > It basically needs a per-core state for each extra_reg possible.
>>
>> It shouldn't be too hard to extend it to use a table. I didn't want
>> to do that for the first iteration. I guess it could be done as followons
>> once LBR is implemented.
>
> Isn't it also relevant for the two offcore regs for wsm?
>
But that's two distinct events (0xbb and 0xb7), though they behave
exactly the same and have the same set of umasks and extra MSR
values.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-13 10:34       ` Peter Zijlstra
@ 2010-11-15 11:03         ` Stephane Eranian
  2010-11-15 11:06           ` Andi Kleen
  0 siblings, 1 reply; 19+ messages in thread
From: Stephane Eranian @ 2010-11-15 11:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Sat, Nov 13, 2010 at 11:34 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Sat, 2010-11-13 at 11:17 +0100, Andi Kleen wrote:
>> > That would assume a single event doesn't contain offcore and lbr, no?
>> > Currently the extra_reg thing assumes there's only one extra reg encoded
>> > in the config word.
>>
>> It would not be too hard to make it an array if that's needed.
>
> I don't think we want to go there anyway, one extra_reg per config value
> is fine.
>
I think offcore goes into .config, lbr goes into extra_reg. That should take
care of combinations as well.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-15 11:03         ` Stephane Eranian
@ 2010-11-15 11:06           ` Andi Kleen
  0 siblings, 0 replies; 19+ messages in thread
From: Andi Kleen @ 2010-11-15 11:06 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Peter Zijlstra, Andi Kleen, linux-kernel, cjashfor, mingo,
	fweisbec, Andi Kleen

On Mon, Nov 15, 2010 at 12:03:28PM +0100, Stephane Eranian wrote:
> On Sat, Nov 13, 2010 at 11:34 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > On Sat, 2010-11-13 at 11:17 +0100, Andi Kleen wrote:
> >> > That would assume a single event doesn't contain offcore and lbr, no?
> >> > Currently the extra_reg thing assumes there's only one extra reg encoded
> >> > in the config word.
> >>
> >> It would not be too hard to make it an array if that's needed.
> >
> > I don't think we want to go there anyway, one extra_reg per config value
> > is fine.
> >
> I think offcore goes into .config, lbr goes into extra_reg. That should take
> care of combinations as well.

The context switch code needs extra_reg for any extra regs including 
offcore, I don't want to walk the event table for that.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-15 11:01       ` Stephane Eranian
@ 2010-11-15 11:06         ` Peter Zijlstra
  2010-11-15 11:17           ` Andi Kleen
  2010-11-15 11:18           ` Stephane Eranian
  0 siblings, 2 replies; 19+ messages in thread
From: Peter Zijlstra @ 2010-11-15 11:06 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Andi Kleen, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Mon, 2010-11-15 at 12:01 +0100, Stephane Eranian wrote:
> On Sat, Nov 13, 2010 at 11:32 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> > On Sat, 2010-11-13 at 11:13 +0100, Andi Kleen wrote:
> >> > Hrmm,. this doesn't really scale right wrt multiple extra_regs.
> >> >
> >> > You made the extra_regs thing fairly extensible, but the above doesn't
> >> > really work well if there's multiple extra regs (say OFFCORE and
> >> > LBR_CONFIG -- possibly even the two OFFCORE regs present on westmere).
> >> >
> >> > It basically needs a per-core state for each extra_reg possible.
> >>
> >> It shouldn't be too hard to extend it to use a table. I didn't want
> >> to do that for the first iteration. I guess it could be done as followons
> >> once LBR is implemented.
> >
> > Isn't it also relevant for the two offcore regs for wsm?
> >
> But that's two distinct events (0xbb and 0xb7), though they behave
> exactly the same and have the same set of umasks and extra MSR
> values.

Ah, my bad, I thought there were two separate msrs, ok I'll take a last
look at these patches and if nothing else pops out I'll take them.

Thanks!

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-15 11:06         ` Peter Zijlstra
@ 2010-11-15 11:17           ` Andi Kleen
  2010-11-15 11:18           ` Stephane Eranian
  1 sibling, 0 replies; 19+ messages in thread
From: Andi Kleen @ 2010-11-15 11:17 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Stephane Eranian, Andi Kleen, linux-kernel, cjashfor, mingo,
	fweisbec, Andi Kleen

> Ah, my bad, I thought there were two separate msrs, ok I'll take a last
> look at these patches and if nothing else pops out I'll take them.

There are actually two separate MSRs. I'm redoing my patchkit to handle
them. I also found another problem I'm fixing.

-Andi
-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-15 11:06         ` Peter Zijlstra
  2010-11-15 11:17           ` Andi Kleen
@ 2010-11-15 11:18           ` Stephane Eranian
  2010-11-15 11:31             ` Andi Kleen
  1 sibling, 1 reply; 19+ messages in thread
From: Stephane Eranian @ 2010-11-15 11:18 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, linux-kernel, cjashfor, mingo, fweisbec, Andi Kleen

On Mon, Nov 15, 2010 at 12:06 PM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
> On Mon, 2010-11-15 at 12:01 +0100, Stephane Eranian wrote:
>> On Sat, Nov 13, 2010 at 11:32 AM, Peter Zijlstra <a.p.zijlstra@chello.nl> wrote:
>> > On Sat, 2010-11-13 at 11:13 +0100, Andi Kleen wrote:
>> >> > Hrmm,. this doesn't really scale right wrt multiple extra_regs.
>> >> >
>> >> > You made the extra_regs thing fairly extensible, but the above doesn't
>> >> > really work well if there's multiple extra regs (say OFFCORE and
>> >> > LBR_CONFIG -- possibly even the two OFFCORE regs present on westmere).
>> >> >
>> >> > It basically needs a per-core state for each extra_reg possible.
>> >>
>> >> It shouldn't be too hard to extend it to use a table. I didn't want
>> >> to do that for the first iteration. I guess it could be done as followons
>> >> once LBR is implemented.
>> >
>> > Isn't it also relevant for the two offcore regs for wsm?
>> >
>> But that's two distinct events (0xbb and 0xb7), though they behave
>> exactly the same and have the same set of umasks and extra MSR
>> values.
>
> Ah, my bad, I thought there were two separate msrs, ok I'll take a last
> look at these patches and if nothing else pops out I'll take them.
>
OFFCORE_RESPONSE_0, event code 0xb7 -> extra MSR@0x1a6
OFFCORE_REPPONSE_1, event code 0xbb -> extra MSR@0x1a7

Why would you want to measure both at the same time?
Because you can measure two distinct request/response in one run.

If you want to measure both you need to create two distinct perf events.

^ permalink raw reply	[flat|nested] 19+ messages in thread

* Re: [PATCH 1/2] perf-events: Add support for supplementary event registers v2
  2010-11-15 11:18           ` Stephane Eranian
@ 2010-11-15 11:31             ` Andi Kleen
  0 siblings, 0 replies; 19+ messages in thread
From: Andi Kleen @ 2010-11-15 11:31 UTC (permalink / raw)
  To: Stephane Eranian
  Cc: Peter Zijlstra, Andi Kleen, linux-kernel, cjashfor, mingo,
	fweisbec, Andi Kleen

> Why would you want to measure both at the same time?
> Because you can measure two distinct request/response in one run.
> 
> If you want to measure both you need to create two distinct perf events.

What I did currently was to assign some cache events to 0xb7 and
others to bb on Westmere. That's a bit arbitrary, but allows to 
measure some common combinations together at least.

In theory one could teach the scheduler that these events
are equivalent, but that seems like too much complexity for 
me currently.
-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 19+ messages in thread

end of thread, other threads:[~2010-11-15 11:31 UTC | newest]

Thread overview: 19+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2010-11-12 16:55 [PATCH 1/2] perf-events: Add support for supplementary event registers v2 Andi Kleen
2010-11-12 16:55 ` [PATCH 2/2] perf-events: Fix LLC-* events on Intel Nehalem/Westmere v2 Andi Kleen
2010-11-12 17:17 ` [PATCH 1/2] perf-events: Add support for supplementary event registers v2 Peter Zijlstra
2010-11-12 17:17 ` Stephane Eranian
2010-11-12 17:33   ` Peter Zijlstra
2010-11-12 21:26     ` Stephane Eranian
2010-11-13 10:17     ` Andi Kleen
2010-11-13 10:34       ` Peter Zijlstra
2010-11-15 11:03         ` Stephane Eranian
2010-11-15 11:06           ` Andi Kleen
2010-11-15 11:00       ` Stephane Eranian
2010-11-12 17:23 ` Peter Zijlstra
2010-11-13 10:13   ` Andi Kleen
2010-11-13 10:32     ` Peter Zijlstra
2010-11-15 11:01       ` Stephane Eranian
2010-11-15 11:06         ` Peter Zijlstra
2010-11-15 11:17           ` Andi Kleen
2010-11-15 11:18           ` Stephane Eranian
2010-11-15 11:31             ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox