public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
* [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code
@ 2013-09-12 18:04 tip-bot for Peter Zijlstra
  2013-09-14  6:18 ` Andi Kleen
  0 siblings, 1 reply; 7+ messages in thread
From: tip-bot for Peter Zijlstra @ 2013-09-12 18:04 UTC (permalink / raw)
  To: linux-tip-commits; +Cc: linux-kernel, hpa, mingo, peterz, tglx

Commit-ID:  d2beea4a3419e63804094e9ac4b6d1518bc17a9b
Gitweb:     http://git.kernel.org/tip/d2beea4a3419e63804094e9ac4b6d1518bc17a9b
Author:     Peter Zijlstra <peterz@infradead.org>
AuthorDate: Thu, 12 Sep 2013 13:00:47 +0200
Committer:  Ingo Molnar <mingo@kernel.org>
CommitDate: Thu, 12 Sep 2013 19:13:37 +0200

perf/x86/intel: Clean-up/reduce PEBS code

Get rid of some pointless duplication introduced by the Haswell code.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/n/tip-8q6y4davda9aawwv5yxe7klp@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 94 +++++++++----------------------
 1 file changed, 26 insertions(+), 68 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index 104cbba..f364c13 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -188,8 +188,7 @@ struct pebs_record_hsw {
 	u64 r8,  r9,  r10, r11;
 	u64 r12, r13, r14, r15;
 	u64 status, dla, dse, lat;
-	u64 real_ip; /* the actual eventing ip */
-	u64 tsx_tuning; /* TSX abort cycles and flags */
+	u64 real_ip, tsx_tuning;
 };
 
 union hsw_tsx_tuning {
@@ -811,10 +810,8 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 				   struct pt_regs *iregs, void *__pebs)
 {
 	/*
-	 * We cast to pebs_record_nhm to get the load latency data
-	 * if extra_reg MSR_PEBS_LD_LAT_THRESHOLD used
-	 * We cast to the biggest PEBS record are careful not
-	 * to access out-of-bounds members.
+	 * We cast to the biggest pebs_record but are careful not to
+	 * unconditionally access the 'extra' entries.
 	 */
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct pebs_record_hsw *pebs = __pebs;
@@ -884,12 +881,11 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
 		regs.flags &= ~PERF_EFLAGS_EXACT;
 
 	if ((event->attr.sample_type & PERF_SAMPLE_ADDR) &&
-		x86_pmu.intel_cap.pebs_format >= 1)
+	    x86_pmu.intel_cap.pebs_format >= 1)
 		data.addr = pebs->dla;
 
 	/* Only set the TSX weight when no memory weight was requested. */
-	if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) &&
-	    !fll &&
+	if ((event->attr.sample_type & PERF_SAMPLE_WEIGHT) && !fll &&
 	    (x86_pmu.intel_cap.pebs_format >= 2))
 		data.weight = intel_hsw_weight(pebs);
 
@@ -941,17 +937,34 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
 	__intel_pmu_pebs_event(event, iregs, at);
 }
 
-static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
-					void *top)
+static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
 {
 	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
 	struct debug_store *ds = cpuc->ds;
 	struct perf_event *event = NULL;
+	void *at, *top;
 	u64 status = 0;
-	int bit;
+	int bit, n;
+
+	if (!x86_pmu.pebs_active)
+		return;
+
+	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
+	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
+	n = (top - at) / x86_pmu.pebs_record_size;
+	if (n <= 0)
+		return;
+
+	/*
+	 * Should not happen, we program the threshold at 1 and do not
+	 * set a reset value.
+	 */
+	WARN_ONCE(n > x86_pmu.max_pebs_events,
+		  "Unexpected number of pebs records %d\n", n);
+
 	for (; at < top; at += x86_pmu.pebs_record_size) {
 		struct pebs_record_nhm *p = at;
 
@@ -979,61 +992,6 @@ static void __intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, void *at,
 	}
 }
 
-static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct debug_store *ds = cpuc->ds;
-	struct pebs_record_nhm *at, *top;
-	int n;
-
-	if (!x86_pmu.pebs_active)
-		return;
-
-	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
-	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
-
-	ds->pebs_index = ds->pebs_buffer_base;
-
-	n = top - at;
-	if (n <= 0)
-		return;
-
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(n > x86_pmu.max_pebs_events,
-		  "Unexpected number of pebs records %d\n", n);
-
-	return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
-static void intel_pmu_drain_pebs_hsw(struct pt_regs *iregs)
-{
-	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
-	struct debug_store *ds = cpuc->ds;
-	struct pebs_record_hsw *at, *top;
-	int n;
-
-	if (!x86_pmu.pebs_active)
-		return;
-
-	at  = (struct pebs_record_hsw *)(unsigned long)ds->pebs_buffer_base;
-	top = (struct pebs_record_hsw *)(unsigned long)ds->pebs_index;
-
-	n = top - at;
-	if (n <= 0)
-		return;
-	/*
-	 * Should not happen, we program the threshold at 1 and do not
-	 * set a reset value.
-	 */
-	WARN_ONCE(n > x86_pmu.max_pebs_events,
-		  "Unexpected number of pebs records %d\n", n);
-
-	return __intel_pmu_drain_pebs_nhm(iregs, at, top);
-}
-
 /*
  * BTS, PEBS probe and setup
  */
@@ -1068,7 +1026,7 @@ void intel_ds_init(void)
 		case 2:
 			pr_cont("PEBS fmt2%c, ", pebs_type);
 			x86_pmu.pebs_record_size = sizeof(struct pebs_record_hsw);
-			x86_pmu.drain_pebs = intel_pmu_drain_pebs_hsw;
+			x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm;
 			break;
 
 		default:

^ permalink raw reply related	[flat|nested] 7+ messages in thread

* Re: [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code
  2013-09-12 18:04 [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code tip-bot for Peter Zijlstra
@ 2013-09-14  6:18 ` Andi Kleen
  2013-09-16  6:07   ` Peter Zijlstra
  0 siblings, 1 reply; 7+ messages in thread
From: Andi Kleen @ 2013-09-14  6:18 UTC (permalink / raw)
  To: mingo; +Cc: hpa, linux-kernel, peterz, tglx, linux-tip-commits

tip-bot for Peter Zijlstra <tipbot@zytor.com> writes:
> +
> +	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
> +	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
>  
>  	ds->pebs_index = ds->pebs_buffer_base;
>  
> +	n = (top - at) / x86_pmu.pebs_record_size;


This adds a full slow division to the PEBS hot path.

Does not seem like a improvement to me.

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code
  2013-09-14  6:18 ` Andi Kleen
@ 2013-09-16  6:07   ` Peter Zijlstra
  2013-09-16  6:17     ` H. Peter Anvin
                       ` (2 more replies)
  0 siblings, 3 replies; 7+ messages in thread
From: Peter Zijlstra @ 2013-09-16  6:07 UTC (permalink / raw)
  To: Andi Kleen; +Cc: mingo, hpa, linux-kernel, tglx, linux-tip-commits

On Fri, Sep 13, 2013 at 11:18:21PM -0700, Andi Kleen wrote:
> tip-bot for Peter Zijlstra <tipbot@zytor.com> writes:
> > +
> > +	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
> > +	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
> >  
> >  	ds->pebs_index = ds->pebs_buffer_base;
> >  
> > +	n = (top - at) / x86_pmu.pebs_record_size;
> 
> 
> This adds a full slow division to the PEBS hot path.
> 
> Does not seem like a improvement to me.

There already was an implicit division there, and
sizeof(pebs_record_hsw) = 176, can it really optimize that constant
division?

I suppose we could go and introduce CONFIG_PERF_DEBUG and stuff sanity
checks under that.. :/

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code
  2013-09-16  6:07   ` Peter Zijlstra
@ 2013-09-16  6:17     ` H. Peter Anvin
  2013-09-16  7:24     ` Peter Zijlstra
  2013-09-16 16:03     ` Andi Kleen
  2 siblings, 0 replies; 7+ messages in thread
From: H. Peter Anvin @ 2013-09-16  6:17 UTC (permalink / raw)
  To: Peter Zijlstra; +Cc: Andi Kleen, mingo, linux-kernel, tglx, linux-tip-commits

On 09/16/2013 01:07 AM, Peter Zijlstra wrote:
> On Fri, Sep 13, 2013 at 11:18:21PM -0700, Andi Kleen wrote:
>> tip-bot for Peter Zijlstra <tipbot@zytor.com> writes:
>>> +
>>> +	at  = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
>>> +	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
>>>
>>>   	ds->pebs_index = ds->pebs_buffer_base;
>>>
>>> +	n = (top - at) / x86_pmu.pebs_record_size;
>>
>>
>> This adds a full slow division to the PEBS hot path.
>>
>> Does not seem like a improvement to me.
>
> There already was an implicit division there, and
> sizeof(pebs_record_hsw) = 176, can it really optimize that constant
> division?
>

gcc can optimize ANY constant division, if nothing else then by 
reciprocal multiplication.

	-hpa



^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code
  2013-09-16  6:07   ` Peter Zijlstra
  2013-09-16  6:17     ` H. Peter Anvin
@ 2013-09-16  7:24     ` Peter Zijlstra
  2013-09-16 16:09       ` Andi Kleen
  2013-09-16 16:03     ` Andi Kleen
  2 siblings, 1 reply; 7+ messages in thread
From: Peter Zijlstra @ 2013-09-16  7:24 UTC (permalink / raw)
  To: Andi Kleen; +Cc: mingo, hpa, linux-kernel, tglx, linux-tip-commits

On Mon, Sep 16, 2013 at 08:07:36AM +0200, Peter Zijlstra wrote:
> There already was an implicit division there, and
> sizeof(pebs_record_hsw) = 176, can it really optimize that constant
> division?
> 
> I suppose we could go and introduce CONFIG_PERF_DEBUG and stuff sanity
> checks under that.. :/

Or we could write it like so:

---
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -954,16 +954,16 @@ static void intel_pmu_drain_pebs_nhm(str
 
 	ds->pebs_index = ds->pebs_buffer_base;
 
-	n = (top - at) / x86_pmu.pebs_record_size;
-	if (n <= 0)
+	if (unlikely(at > top))
 		return;
 
 	/*
 	 * Should not happen, we program the threshold at 1 and do not
 	 * set a reset value.
 	 */
-	WARN_ONCE(n > x86_pmu.max_pebs_events,
-		  "Unexpected number of pebs records %d\n", n);
+	WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,
+		  "Unexpected number of pebs records %d\n", 
+		  (top - at) / x86_pmu.pebs_record_size);
 
 	for (; at < top; at += x86_pmu.pebs_record_size) {
 		struct pebs_record_nhm *p = at;

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code
  2013-09-16  6:07   ` Peter Zijlstra
  2013-09-16  6:17     ` H. Peter Anvin
  2013-09-16  7:24     ` Peter Zijlstra
@ 2013-09-16 16:03     ` Andi Kleen
  2 siblings, 0 replies; 7+ messages in thread
From: Andi Kleen @ 2013-09-16 16:03 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, mingo, hpa, linux-kernel, tglx, linux-tip-commits

> There already was an implicit division there, and
> sizeof(pebs_record_hsw) = 176, can it really optimize that constant
> division?

Yes it can (if nothing else then by converting it to * 1/x)

The only exception is with -Os, but if you want performance
you should not be using that anyways.

You could do that manually, but it's easiest to just keep
the code paths separate and let the compiler worry about it.

> I suppose we could go and introduce CONFIG_PERF_DEBUG and stuff sanity
> checks under that.. :/

Hmm, it's not a sanity check?

-Andi

-- 
ak@linux.intel.com -- Speaking for myself only.

^ permalink raw reply	[flat|nested] 7+ messages in thread

* Re: [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code
  2013-09-16  7:24     ` Peter Zijlstra
@ 2013-09-16 16:09       ` Andi Kleen
  0 siblings, 0 replies; 7+ messages in thread
From: Andi Kleen @ 2013-09-16 16:09 UTC (permalink / raw)
  To: Peter Zijlstra
  Cc: Andi Kleen, mingo, hpa, linux-kernel, tglx, linux-tip-commits

>  
>  	/*
>  	 * Should not happen, we program the threshold at 1 and do not
>  	 * set a reset value.
>  	 */
> -	WARN_ONCE(n > x86_pmu.max_pebs_events,
> -		  "Unexpected number of pebs records %d\n", n);
> +	WARN_ONCE(top - at > x86_pmu.max_pebs_events * x86_pmu.pebs_record_size,

That should work yes.

-Andi

^ permalink raw reply	[flat|nested] 7+ messages in thread

end of thread, other threads:[~2013-09-16 16:09 UTC | newest]

Thread overview: 7+ messages (download: mbox.gz follow: Atom feed
-- links below jump to the message on this page --
2013-09-12 18:04 [tip:perf/core] perf/x86/intel: Clean-up/reduce PEBS code tip-bot for Peter Zijlstra
2013-09-14  6:18 ` Andi Kleen
2013-09-16  6:07   ` Peter Zijlstra
2013-09-16  6:17     ` H. Peter Anvin
2013-09-16  7:24     ` Peter Zijlstra
2013-09-16 16:09       ` Andi Kleen
2013-09-16 16:03     ` Andi Kleen

This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox