All of lore.kernel.org
 help / color / mirror / Atom feed
From: Peter Zijlstra <peterz@infradead.org>
To: "Yan, Zheng" <zheng.z.yan@intel.com>
Cc: linux-kernel@vger.kernel.org, mingo@kernel.org,
	acme@infradead.org, eranian@google.com, andi@firstfloor.org
Subject: Re: [PATCH v2 4/7] perf, x86: large PEBS interrupt threshold
Date: Tue, 15 Jul 2014 13:38:41 +0200	[thread overview]
Message-ID: <20140715113841.GC9918@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <1405414739-31455-5-git-send-email-zheng.z.yan@intel.com>

[-- Attachment #1: Type: text/plain, Size: 7181 bytes --]

On Tue, Jul 15, 2014 at 04:58:56PM +0800, Yan, Zheng wrote:
> Signed-off-by: Yan, Zheng <zheng.z.yan@intel.com>
> ---
>  arch/x86/kernel/cpu/perf_event.h           |  1 +
>  arch/x86/kernel/cpu/perf_event_intel_ds.c  | 98 +++++++++++++++++++++---------
>  arch/x86/kernel/cpu/perf_event_intel_lbr.c |  5 --
>  3 files changed, 71 insertions(+), 33 deletions(-)
> 
> diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h
> index d8165f3..cb7cda8 100644
> --- a/arch/x86/kernel/cpu/perf_event.h
> +++ b/arch/x86/kernel/cpu/perf_event.h
> @@ -450,6 +450,7 @@ struct x86_pmu {
>  	struct event_constraint *pebs_constraints;
>  	void		(*pebs_aliases)(struct perf_event *event);
>  	int 		max_pebs_events;
> +	bool		multi_pebs;

This needs to die.

>  	/*
>  	 * Intel LBR
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> index 1db4ce5..e17eb5b 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
> @@ -11,7 +11,7 @@
>  #define BTS_RECORD_SIZE		24
>  
>  #define BTS_BUFFER_SIZE		(PAGE_SIZE << 4)
> -#define PEBS_BUFFER_SIZE	PAGE_SIZE
> +#define PEBS_BUFFER_SIZE	(PAGE_SIZE << 4)

See: http://lkml.kernel.org/r/alpine.DEB.2.02.1406301600460.26302@chino.kir.corp.google.com

Also talk about why 64k, mention NMI duration/processing overhead etc..

> @@ -708,14 +705,29 @@ struct event_constraint *intel_pebs_constraints(struct perf_event *event)
>  	return &emptyconstraint;
>  }
>  
> +/*
> + * Flags PEBS can handle without an PMI.
> + *
> + * TID can only be handled by flushing at context switch.
> + */
> +#define PEBS_FREERUNNING_FLAGS \
> +	(PERF_SAMPLE_IP | PERF_SAMPLE_TID | PERF_SAMPLE_ADDR | \
> +	 PERF_SAMPLE_ID | PERF_SAMPLE_CPU | PERF_SAMPLE_STREAM_ID | \
> +	 PERF_SAMPLE_DATA_SRC | PERF_SAMPLE_IDENTIFIER | \
> +	 PERF_SAMPLE_TRANSACTION)
> +
>  void intel_pmu_pebs_enable(struct perf_event *event)
>  {
>  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
>  	struct hw_perf_event *hwc = &event->hw;
> +	struct debug_store *ds = cpuc->ds;
> +	u64 threshold;
> +	bool first_pebs;

flip those two lines

>  
>  	hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT;
>  	hwc->autoreload = !event->attr.freq;
>  
> +	first_pebs = !(cpuc->pebs_enabled & ((1ULL << MAX_PEBS_EVENTS) - 1));
>  	cpuc->pebs_enabled |= 1ULL << hwc->idx;
>  
>  	if (event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT)
> @@ -723,6 +735,20 @@ void intel_pmu_pebs_enable(struct perf_event *event)
>  	else if (event->hw.flags & PERF_X86_EVENT_PEBS_ST)
>  		cpuc->pebs_enabled |= 1ULL << 63;
>  
> +	/*
> +	 * When the event is constrained enough we can use a larger
> +	 * threshold and run the event with less frequent PMI.
> +	 */
> +	if (x86_pmu.multi_pebs && hwc->autoreload &&
> +	    !(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS)) {
> +		threshold = ds->pebs_absolute_maximum -
> +			x86_pmu.max_pebs_events * x86_pmu.pebs_record_size;
> +	} else {
> +		threshold = ds->pebs_buffer_base + x86_pmu.pebs_record_size;
> +	}

	threshold = 1;
	if ((hwc->flags & PERF_X86_EVENT_PEBS_RELOAD) &&
	    !(event->attr.sample_type & ~PEBS_FREERUNNING_FLAGS))
		threshold = x86_pmu.max_pebs_events;

	threshold = ds->pebs_buffer_base + threshold * x86_pmu.pebs_record_size;

> +	if (first_pebs || ds->pebs_interrupt_threshold > threshold)
> +		ds->pebs_interrupt_threshold = threshold;
> +
>  	/* Use auto-reload if possible to save a MSR write in the PMI */
>  	if (hwc->autoreload)
>  		ds->pebs_event_reset[hwc->idx] =

> @@ -880,7 +907,7 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
>  	u64 sample_type;
>  	int fll, fst;
>  
> -	if (!intel_pmu_save_and_restart(event))
> +	if (first_record && !intel_pmu_save_and_restart(event))
>  		return;
>  
>  	fll = event->hw.flags & PERF_X86_EVENT_PEBS_LDLAT;
> @@ -956,8 +983,22 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
>  	if (has_branch_stack(event))
>  		data.br_stack = &cpuc->lbr_stack;
>  
> -	if (perf_event_overflow(event, &data, &regs))
> -		x86_pmu_stop(event, 0);
> +	if (first_record) {
> +		if (perf_event_overflow(event, &data, &regs))
> +			x86_pmu_stop(event, 0);
> +	} else {
> +		struct perf_output_handle handle;
> +		struct perf_event_header header;
> +
> +		perf_prepare_sample(&header, &data, event, &regs);
> +
> +		if (perf_output_begin(&handle, event, header.size))
> +			return;
> +
> +		perf_output_sample(&handle, &header, &data, event);
> +
> +		perf_output_end(&handle);
> +	}

That is disgusting, have a look at drain_bts_buffer() and try again.

>  }
>  
>  static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
> @@ -998,17 +1039,18 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
>  	WARN_ONCE(n > 1, "bad leftover pebs %d\n", n);
>  	at += n - 1;
>  
> -	__intel_pmu_pebs_event(event, iregs, at);
> +	__intel_pmu_pebs_event(event, iregs, at, true);
>  }
>  
>  static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
>  {
>  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
>  	struct debug_store *ds = cpuc->ds;
> -	struct perf_event *event = NULL;
> +	struct perf_event *event;
>  	void *at, *top;
>  	u64 status = 0;
>  	int bit;
> +	bool multi_pebs, first_record;

These should not be needed, but its also at the wrong place if it were.

>  	if (!x86_pmu.pebs_active)
>  		return;

> @@ -1042,17 +1086,15 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
>  
>  			if (!event->attr.precise_ip)
>  				continue;
> -
> -			if (__test_and_set_bit(bit, (unsigned long *)&status))
> -				continue;
> -
> -			break;
> +			if (!__test_and_set_bit(bit, (unsigned long *)&status)) {
> +				first_record = true;
> +			} else {
> +				if (!multi_pebs)
> +					continue;
> +				first_record = false;
> +			}
> +			__intel_pmu_pebs_event(event, iregs, at, first_record);
>  		}
> -
> -		if (!event || bit >= x86_pmu.max_pebs_events)
> -			continue;
> -
> -		__intel_pmu_pebs_event(event, iregs, at);

Distinct lack of properly handling the multi overflow case.

>  	}
>  }
>  
> diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> index d6d5fcf..430f1ad 100644
> --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c
> @@ -184,10 +184,6 @@ void intel_pmu_lbr_reset(void)
>  void intel_pmu_lbr_sched_task(struct perf_event_context *ctx, bool sched_in)
>  {
>  	struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events);
> -
> -	if (!x86_pmu.lbr_nr)
> -		return;
> -
>  	/*
>  	 * It is necessary to flush the stack on context switch. This happens
>  	 * when the branch stack does not tag its entries with the pid of the
> @@ -408,7 +404,6 @@ static void intel_pmu_setup_sw_lbr_filter(struct perf_event *event)
>  
>  	if (br_type & PERF_SAMPLE_BRANCH_COND)
>  		mask |= X86_BR_JCC;
> -
>  	/*
>  	 * stash actual user request into reg, it may
>  	 * be used by fixup code for some CPU

WTF?

[-- Attachment #2: Type: application/pgp-signature, Size: 836 bytes --]

  parent reply	other threads:[~2014-07-15 11:38 UTC|newest]

Thread overview: 16+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-07-15  8:58 [PATCH v2 0/7] perf, x86: large PEBS interrupt threshold Yan, Zheng
2014-07-15  8:58 ` [PATCH v2 1/7] perf, core: introduce pmu context switch callback Yan, Zheng
2014-07-15 11:39   ` Peter Zijlstra
2014-07-15  8:58 ` [PATCH v2 2/7] perf, x86: use context switch callback to flush LBR stack Yan, Zheng
2014-07-15  8:58 ` [PATCH v2 3/7] perf, x86: use the PEBS auto reload mechanism when possible Yan, Zheng
2014-07-15 10:14   ` Peter Zijlstra
2014-07-15  8:58 ` [PATCH v2 4/7] perf, x86: large PEBS interrupt threshold Yan, Zheng
2014-07-15 10:41   ` Peter Zijlstra
2014-07-15 11:38   ` Peter Zijlstra [this message]
2014-07-15  8:58 ` [PATCH v2 5/7] perf, x86: drain PEBS buffer during context switch Yan, Zheng
2014-07-15 11:57   ` Peter Zijlstra
2014-07-15  8:58 ` [PATCH v2 6/7] perf, x86: enable large PEBS interrupt threshold for SNB/IVB/HSW Yan, Zheng
2014-07-15 11:12   ` Peter Zijlstra
2014-07-15  8:58 ` [PATCH v2 7/7] tools, perf: Allow the user to disable time stamps Yan, Zheng
2014-07-15 10:02 ` [PATCH v2 0/7] perf, x86: large PEBS interrupt threshold Peter Zijlstra
2014-07-16  1:12   ` Yan, Zheng

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140715113841.GC9918@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@infradead.org \
    --cc=andi@firstfloor.org \
    --cc=eranian@google.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=zheng.z.yan@intel.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.