All of lore.kernel.org
 help / color / mirror / Atom feed
From: Cyrill Gorcunov <gorcunov@gmail.com>
To: Ingo Molnar <mingo@elte.hu>,
	Peter Zijlstra <peterz@infradead.org>,
	Stephane Eranian <eranian@google.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>,
	Don Zickus <dzickus@redhat.com>,
	LKML <linux-kernel@vger.kernel.org>
Subject: Re: [RFC perf,x86] P4 PMU early draft
Date: Tue, 9 Feb 2010 00:56:14 +0300	[thread overview]
Message-ID: <20100208215614.GC5130@lenovo> (raw)
In-Reply-To: <20100208184504.GB5130@lenovo>

On Mon, Feb 08, 2010 at 09:45:04PM +0300, Cyrill Gorcunov wrote:
> Hi all,
> 
...
> All in one -- If you have some spare minutes, please take a glance. I can't
> say I like this code -- it's overcomplicated and I fear hard to understand.
> and still a bit buggy. Complains are welcome!
> 
...

Just updated some snippets, here is an interdiff on top of previous post
(just to not post too much). The key moment is to use cpu in new
x86_pmu.schedule_events routine. This will allow to find if hw_perf_event::config
has been migrated to a different logical cpu if HT is turned on. On non-HT
machine it will have no effect. As result we should swap the ESCR+CCCR thread
specific flags I think.

	-- Cyrill
---

diff -u linux-2.6.git/arch/x86/include/asm/perf_p4.h linux-2.6.git/arch/x86/include/asm/perf_p4.h
--- linux-2.6.git/arch/x86/include/asm/perf_p4.h
+++ linux-2.6.git/arch/x86/include/asm/perf_p4.h
@@ -120,7 +120,7 @@
 	return !!((P4_EVENT_UNPACK_SELECTOR(config)) & P4_CCCR_CASCADE);
 }
 
-static inline bool p4_is_ht_slave(u64 config)
+static inline int p4_is_ht_slave(u64 config)
 {
 	return !!(config & P4_CONFIG_HT);
 }
@@ -146,7 +146,8 @@
 static inline int p4_ht_thread(int cpu)
 {
 #ifdef CONFIG_SMP
-	return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
+	if (smp_num_siblings == 2)
+		return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
 #endif
 	return 0;
 }
diff -u linux-2.6.git/arch/x86/kernel/cpu/perf_event.c linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
--- linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
@@ -141,7 +141,7 @@
 	u64		max_period;
 	u64		intel_ctrl;
 	int		(*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
-	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+	int		(*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign, int cpu);
 	void		(*enable_bts)(u64 config);
 	void		(*disable_bts)(void);
 
@@ -1236,7 +1236,7 @@
 }
 
 /*
- * offset: 0 - BSP thread, 1 - secondary thread
+ * offset: 0,1 - HT threads
  * used in HT enabled cpu
  */
 struct p4_event_template {
@@ -1254,19 +1254,6 @@
 } p4_pmu_config;
 
 /*
- * Netburst is heavily constrained :(
- */
-#if 0
-#define P4_EVENT_CONSTRAINT(c, n)	\
-	EVENT_CONSTRAINT(c, n, (P4_EVNTSEL_MASK | P4_CCCR_MASK))
-
-static struct event_constraint p4_event_constraints[] =
-{
-	EVENT_CONSTRAINT_END
-};
-#endif
-
-/*
  * CCCR1 doesn't have a working enable bit so do not use it ever
  *
  * Also as only we start to support raw events we will need to
@@ -1382,6 +1369,11 @@
 	return config;
 }
 
+/*
+ * note-to-self: this could be a bottleneck and we may need some hash structure
+ * based on "packed" event CRC, currently even if we may almost ideal
+ * hashing we will still have 5 intersected opcodes, introduce kind of salt?
+ */
 static struct p4_event_template *p4_pmu_template_lookup(u64 config)
 {
 	u32 opcode = p4_config_unpack_opcode(config);
@@ -1411,6 +1403,12 @@
 {
 	int cpu = smp_processor_id();
 
+	/*
+	 * the reason we use cpu that early is that if we get scheduled
+	 * first time on the same cpu -- we will not need swap thread
+	 * specific flags in config which will save some cycles
+	 */
+
 	/* CCCR by default */
 	hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu));
 
@@ -1522,15 +1520,26 @@
 	return handled;
 }
 
+/* swap some thread specific flags in cofing */
+static u64 p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
+{
+	u64 conf = hwc->config;
+
+	if ((p4_is_ht_slave(hwc->config) ^ p4_ht_thread(cpu))) {
+		/* FIXME: swap them here */
+	}
+
+	return conf;
+}
+
 /*
  * Netburst has a quite constrained architecture
  */
-static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign, int cpu)
 {
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
 	struct p4_event_template *tpl;
 	struct hw_perf_event *hwc;
-	int cpu = smp_processor_id();
 	int thread;
 	int i, j, num = 0;
 
@@ -1678,7 +1687,8 @@
 	return event->pmu == &pmu;
 }
 
-static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+/* we don't use cpu argument here at all */
+static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign, int cpu)
 {
 	struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
 	unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -2143,7 +2153,7 @@
 	if (n < 0)
 		return n;
 
-	ret = x86_schedule_events(cpuc, n, assign);
+	ret = x86_pmu.schedule_events(cpuc, n, assign, 0);
 	if (ret)
 		return ret;
 	/*
@@ -2660,7 +2670,7 @@
 	if (n0 < 0)
 		return n0;
 
-	ret = x86_pmu.schedule_events(cpuc, n0, assign);
+	ret = x86_pmu.schedule_events(cpuc, n0, assign, cpu);
 	if (ret)
 		return ret;
 
@@ -3070,6 +3080,7 @@
 {
 	struct perf_event *leader = event->group_leader;
 	struct cpu_hw_events *fake_cpuc;
+	int cpu = smp_processor_id();
 	int ret, n;
 
 	ret = -ENOMEM;
@@ -3095,7 +3106,7 @@
 
 	fake_cpuc->n_events = n;
 
-	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
+	ret = x86_pmu.schedule_events(fake_cpuc, n, NULL, cpu);
 
 out_free:
 	kfree(fake_cpuc);

  reply	other threads:[~2010-02-08 21:56 UTC|newest]

Thread overview: 20+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2010-02-08 18:45 [RFC perf,x86] P4 PMU early draft Cyrill Gorcunov
2010-02-08 21:56 ` Cyrill Gorcunov [this message]
2010-02-09  4:17 ` Ingo Molnar
2010-02-09  6:54   ` Cyrill Gorcunov
2010-02-09 22:39   ` Cyrill Gorcunov
2010-02-10 10:12     ` Peter Zijlstra
2010-02-10 10:38       ` Cyrill Gorcunov
2010-02-10 10:52         ` Peter Zijlstra
2010-02-10 11:23           ` Cyrill Gorcunov
2010-02-11 12:21           ` Peter Zijlstra
2010-02-11 15:22             ` Cyrill Gorcunov
2010-02-26 10:25             ` [tip:perf/core] perf_events: Simplify code by removing cpu argument to hw_perf_group_sched_in() tip-bot for Peter Zijlstra
2010-02-09  4:23 ` [RFC perf,x86] P4 PMU early draft Paul Mackerras
2010-02-09  6:57   ` Cyrill Gorcunov
2010-02-09  8:54   ` Peter Zijlstra
2010-02-09 21:13 ` Stephane Eranian
2010-02-09 21:35   ` Cyrill Gorcunov
2010-02-15 20:11 ` Robert Richter
2010-02-15 20:32   ` Cyrill Gorcunov
2010-02-17 22:14   ` Cyrill Gorcunov

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20100208215614.GC5130@lenovo \
    --to=gorcunov@gmail.com \
    --cc=dzickus@redhat.com \
    --cc=eranian@google.com \
    --cc=fweisbec@gmail.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@elte.hu \
    --cc=peterz@infradead.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.