Re: [PATCH V7 3/6] perf, x86: handle multiple records in PEBS buffer

All of lore.kernel.org
 help / color / mirror / Atom feed

From: Peter Zijlstra <peterz@infradead.org>
To: Kan Liang <kan.liang@intel.com>
Cc: mingo@kernel.org, acme@infradead.org, eranian@google.com,
	andi@firstfloor.org, linux-kernel@vger.kernel.org
Subject: Re: [PATCH V7 3/6] perf, x86: handle multiple records in PEBS buffer
Date: Tue, 5 May 2015 15:16:37 +0200	[thread overview]
Message-ID: <20150505131637.GO23123@twins.programming.kicks-ass.net> (raw)
In-Reply-To: <1429517270-8079-4-git-send-email-kan.liang@intel.com>

On Mon, Apr 20, 2015 at 04:07:47AM -0400, Kan Liang wrote:
> +static inline void *
> +get_next_pebs_record_by_bit(void *base, void *top, int bit)
> +{
> +	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
> +	void *at;
> +	u64 pebs_status;
> +
> +	if (base == NULL)
> +		return NULL;
> +
> +	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
> +		struct pebs_record_nhm *p = at;
> +
> +		if (test_bit(bit, (unsigned long *)&p->status)) {

Just wondering, is that BT better than: p->state & (1 << bit) ?

> +
> +			if (p->status == (1 << bit))
> +				return at;
> +
> +			/* clear non-PEBS bit and re-check */
> +			pebs_status = p->status & cpuc->pebs_enabled;
> +			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
> +			if (pebs_status == (1 << bit))
> +				return at;
> +		}
> +	}
> +	return NULL;
> +}
> +
>  static void __intel_pmu_pebs_event(struct perf_event *event,
> +				   struct pt_regs *iregs,
> +				   void *base, void *top,
> +				   int bit, int count)
>  {
>  	struct perf_sample_data data;
>  	struct pt_regs regs;
> +	int i;
> +	void *at = get_next_pebs_record_by_bit(base, top, bit);
>  
> +	if (!intel_pmu_save_and_restart(event) &&
> +	    !(event->hw.flags & PERF_X86_EVENT_AUTO_RELOAD))
>  		return;
>  
> +	if (count > 1) {
> +		for (i = 0; i < count - 1; i++) {
> +			setup_pebs_sample_data(event, iregs, at, &data, &regs);
> +			perf_event_output(event, &data, &regs);
> +			at += x86_pmu.pebs_record_size;
> +			at = get_next_pebs_record_by_bit(at, top, bit);
> +		}
> +	}
> +
> +	setup_pebs_sample_data(event, iregs, at, &data, &regs);
>  
> +	/* all records are processed, handle event overflow now */

All but the last. There explicitly is one left to be able to call the
overflow handler is there not?

> +	if (perf_event_overflow(event, &data, &regs)) {
>  		x86_pmu_stop(event, 0);
> +		return;
> +	}
> +
>  }
>  
>  static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
> @@ -1000,72 +1081,86 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs)
>  	if (!event->attr.precise_ip)
>  		return;
>  
> +	n = (top - at) / x86_pmu.pebs_record_size;
>  	if (n <= 0)
>  		return;
>  
> +	__intel_pmu_pebs_event(event, iregs, at,
> +			       top, 0, n);
>  }
>  
>  static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs)
>  {
>  	struct cpu_hw_events *cpuc = this_cpu_ptr(&cpu_hw_events);
>  	struct debug_store *ds = cpuc->ds;
> +	struct perf_event *event;
> +	void *base, *at, *top;
>  	int bit;
> +	int counts[MAX_PEBS_EVENTS] = {};
>  
>  	if (!x86_pmu.pebs_active)
>  		return;
>  
> +	base = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base;
>  	top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index;
>  
>  	ds->pebs_index = ds->pebs_buffer_base;
>  
> +	if (unlikely(base >= top))
>  		return;
>  
> +	for (at = base; at < top; at += x86_pmu.pebs_record_size) {
>  		struct pebs_record_nhm *p = at;
>  
>  		for_each_set_bit(bit, (unsigned long *)&p->status,
>  				 x86_pmu.max_pebs_events) {
>  			event = cpuc->events[bit];
>  			WARN_ON_ONCE(!event);
>  
> +			if (event->attr.precise_ip)
> +				break;
> +		}

Would it make sense to delay looking for the event until you've found
there is a single bit set -- and already know which bit that is?

>  
> +		if (bit >= x86_pmu.max_pebs_events)
> +			continue;
> +		if (!test_bit(bit, cpuc->active_mask))
> +			continue;
> +		/*
> +		 * The PEBS hardware does not deal well with the situation
> +		 * when events happen near to each other and multiple bits
> +		 * are set. But it should happen rarely.
> +		 *
> +		 * If these events include one PEBS and multiple non-PEBS
> +		 * events, it doesn't impact PEBS record. The record will
> +		 * be handled normally. (slow path)
> +		 *
> +		 * If these events include two or more PEBS events, the
> +		 * records for the events can be collapsed into a single
> +		 * one, and it's not possible to reconstruct all events
> +		 * that caused the PEBS record. It's called collision.
> +		 * If collision happened, the record will be dropped.
> +		 *
> +		 */
> +		if (p->status != (1 << bit)) {
> +			u64 pebs_status;
> +
> +			/* slow path */
> +			pebs_status = p->status & cpuc->pebs_enabled;
> +			pebs_status &= (1ULL << MAX_PEBS_EVENTS) - 1;
> +			if (pebs_status != (1 << bit)) {
> +				perf_log_lost(event);

Does it make sense to keep an error[bit] count and only log once with
the actual number in? -- when !0 obviously.

>  				continue;
> +			}
>  		}
> +		counts[bit]++;
> +	}
>  
> +	for (bit = 0; bit < x86_pmu.max_pebs_events; bit++) {
> +		if (counts[bit] == 0)
>  			continue;
> +		event = cpuc->events[bit];
> +		__intel_pmu_pebs_event(event, iregs, base,
> +				       top, bit, counts[bit]);
>  	}
>  }

next prev parent reply	other threads:[~2015-05-05 13:17 UTC|newest]

Thread overview: 17+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2015-04-20  8:07 [PATCH V7 0/6] large PEBS interrupt threshold Kan Liang
2015-04-20  8:07 ` [PATCH V7 1/6] perf, x86: use the PEBS auto reload mechanism when possible Kan Liang
2015-04-20  8:07 ` [PATCH V7 2/6] perf, x86: introduce setup_pebs_sample_data() Kan Liang
2015-04-20  8:07 ` [PATCH V7 3/6] perf, x86: handle multiple records in PEBS buffer Kan Liang
2015-05-05 13:07   ` Peter Zijlstra
2015-05-05 13:17     ` Peter Zijlstra
2015-05-05 16:36       ` Liang, Kan
2015-05-05 17:00         ` Peter Zijlstra
2015-05-05 13:16   ` Peter Zijlstra [this message]
2015-05-05 16:30     ` Liang, Kan
2015-05-05 17:08       ` Peter Zijlstra
2015-05-05 17:22         ` Liang, Kan
2015-05-06 13:01       ` Andi Kleen
2015-05-06 13:13         ` Peter Zijlstra
2015-04-20  8:07 ` [PATCH V7 4/6] perf, x86: large PEBS interrupt threshold Kan Liang
2015-04-20  8:07 ` [PATCH V7 5/6] perf, x86: drain PEBS buffer during context switch Kan Liang
2015-04-20  8:07 ` [PATCH V7 6/6] perf, x86: enlarge PEBS buffer Kan Liang

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20150505131637.GO23123@twins.programming.kicks-ass.net \
    --to=peterz@infradead.org \
    --cc=acme@infradead.org \
    --cc=andi@firstfloor.org \
    --cc=eranian@google.com \
    --cc=kan.liang@intel.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link

Be sure your reply has a Subject: header at the top and a blank line before the message body.

This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.