Re: [RFC perf,x86] P4 PMU early draft

public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed

From: Cyrill Gorcunov <gorcunov@gmail.com>
To: Ingo Molnar <mingo@elte.hu>,
	Peter Zijlstra <peterz@infradead.org>,
	Stephane Eranian <eranian@google.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
	Don Zickus <dzickus@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [RFC perf,x86] P4 PMU early draft
Date: Tue, 9 Feb 2010 00:56:14 +0300	[thread overview]
Message-ID: <20100208215614.GC5130@lenovo> (raw)
In-Reply-To: <20100208184504.GB5130@lenovo>

On Mon, Feb 08, 2010 at 09:45:04PM +0300, Cyrill Gorcunov wrote:
> Hi all,
> 
...
> All in one -- If you have some spare minutes, please take a glance. I can't
> say I like this code -- it's overcomplicated and I fear hard to understand.
> and still a bit buggy. Complains are welcome!
> 
...

Just updated some snippets, here is an interdiff on top of previous post
(just to not post too much). The key moment is to use cpu in new
x86_pmu.schedule_events routine. This will allow to find if hw_perf_event::config
has been migrated to a different logical cpu if HT is turned on. On non-HT
machine it will have no effect. As result we should swap the ESCR+CCCR thread
specific flags I think.

	-- Cyrill
---

diff -u linux-2.6.git/arch/x86/include/asm/perf_p4.h linux-2.6.git/arch/x86/include/asm/perf_p4.h
--- linux-2.6.git/arch/x86/include/asm/perf_p4.h
+++ linux-2.6.git/arch/x86/include/asm/perf_p4.h
@@ -120,7 +120,7 @@
 	return !!((P4_EVENT_UNPACK_SELECTOR(config)) & P4_CCCR_CASCADE);
 }
 
-static inline bool p4_is_ht_slave(u64 config)
+static inline int p4_is_ht_slave(u64 config)
 {
 	return !!(config & P4_CONFIG_HT);
 }
@@ -146,7 +146,8 @@
 static inline int p4_ht_thread(int cpu)
 {
 #ifdef CONFIG_SMP
-	return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
+	if (smp_num_siblings == 2)
+		return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
 #endif
 	return 0;
 }
diff -u linux-2.6.git/arch/x86/kernel/cpu/perf_event.c linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
--- linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
@@ -141,7 +141,7 @@
 	u64		max_period;
 	u64		intel_ctrl;
 	int		(*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
-	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign, int cpu);
 	void		(*enable_bts)(u64 config);
 	void		(*disable_bts)(void);
 
@@ -1236,7 +1236,7 @@
 }
 
 /*
- * offset: 0 - BSP thread, 1 - secondary thread
+ * offset: 0,1 - HT threads
  * used in HT enabled cpu
  */
 struct p4_event_template {
@@ -1254,19 +1254,6 @@
 } p4_pmu_config;
 
 /*
- * Netburst is heavily constrained :(
- */
-#if 0
-#define P4_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, (P4_EVNTSEL_MASK | P4_CCCR_MASK))
-
-static struct event_constraint p4_event_constraints[] =
-{
-	EVENT_CONSTRAINT_END
-};
-#endif
-
-/*
  * CCCR1 doesn't have a working enable bit so do not use it ever
  *
  * Also as only we start to support raw events we will need to
@@ -1382,6 +1369,11 @@
 	return config;
 }
 
+/*
+ * note-to-self: this could be a bottleneck and we may need some hash structure
+ * based on "packed" event CRC, currently even if we may almost ideal
+ * hashing we will still have 5 intersected opcodes, introduce kind of salt?
+ */
 static struct p4_event_template *p4_pmu_template_lookup(u64 config)
 {
 	u32 opcode = p4_config_unpack_opcode(config);
@@ -1411,6 +1403,12 @@
 {
 	int cpu = smp_processor_id();
 
+	/*
+	 * the reason we use cpu that early is that if we get scheduled
+	 * first time on the same cpu -- we will not need swap thread
+	 * specific flags in config which will save some cycles
+	 */
+
 	/* CCCR by default */
 	hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu));
 
@@ -1522,15 +1520,26 @@
 	return handled;
 }
 
+/* swap some thread specific flags in cofing */
+static u64 p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
+{
+	u64 conf = hwc->config;
+
+	if ((p4_is_ht_slave(hwc->config) ^ p4_ht_thread(cpu))) {
+		/* FIXME: swap them here */
+	}
+
+	return conf;
+}
+
 /*
  * Netburst has a quite constrained architecture
  */
-static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign, int cpu)
 {
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct p4_event_template *tpl;
 	struct hw_perf_event *hwc;
-	int cpu = smp_processor_id();
 	int thread;
 	int i, j, num = 0;
 
@@ -1678,7 +1687,8 @@
 	return event->pmu == &pmu;
 }
 
-static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+/* we don't use cpu argument here at all */
+static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign, int cpu)
 {
 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -2143,7 +2153,7 @@
 	if (n < 0)
 		return n;
 
-	ret = x86_schedule_events(cpuc, n, assign);
+	ret = x86_pmu.schedule_events(cpuc, n, assign, 0);
 	if (ret)
 		return ret;
 	/*
@@ -2660,7 +2670,7 @@
 	if (n0 < 0)
 		return n0;
 
-	ret = x86_pmu.schedule_events(cpuc, n0, assign);
+	ret = x86_pmu.schedule_events(cpuc, n0, assign, cpu);
 	if (ret)
 		return ret;
 
@@ -3070,6 +3080,7 @@
 {
 	struct perf_event *leader = event->group_leader;
 	struct cpu_hw_events *fake_cpuc;
+	int cpu = smp_processor_id();
 	int ret, n;
 
 	ret = -ENOMEM;
@@ -3095,7 +3106,7 @@
 
 	fake_cpuc->n_events = n;
 
-	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
+	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL, cpu);
 
 out_free:
 	kfree(fake_cpuc);

next prev parent reply	other threads:[~2010-02-08 21:56 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-08 18:45 [RFC perf,x86] P4 PMU early draft Cyrill Gorcunov
2010-02-08 21:56 ` Cyrill Gorcunov [this message]
2010-02-09  4:17 ` Ingo Molnar
2010-02-09  6:54   ` Cyrill Gorcunov
2010-02-09 22:39   ` Cyrill Gorcunov
2010-02-10 10:12     ` Peter Zijlstra
2010-02-10 10:38       ` Cyrill Gorcunov
2010-02-10 10:52         ` Peter Zijlstra
2010-02-10 11:23           ` Cyrill Gorcunov
2010-02-11 12:21           ` Peter Zijlstra
2010-02-11 15:22             ` Cyrill Gorcunov
2010-02-26 10:25             ` [tip:perf/core] perf_events: Simplify code by removing cpu argument to hw_perf_group_sched_in() tip-bot for Peter Zijlstra
2010-02-09  4:23 ` [RFC perf,x86] P4 PMU early draft Paul Mackerras
2010-02-09  6:57   ` Cyrill Gorcunov
2010-02-09  8:54   ` Peter Zijlstra
2010-02-09 21:13 ` Stephane Eranian
2010-02-09 21:35   ` Cyrill Gorcunov
2010-02-15 20:11 ` Robert Richter
2010-02-15 20:32   ` Cyrill Gorcunov
2010-02-17 22:14   ` Cyrill Gorcunov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100208215614.GC5130@lenovo \
    --to=gorcunov@gmail.com \
    --cc=dzickus@redhat.com \
    --cc=eranian@google.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox