All of lore.kernel.org
 help / color / mirror / Atom feed
From: Adrian Hunter <adrian.hunter@intel.com>
To: Andi Kleen <andi@firstfloor.org>, acme@kernel.org
Cc: jolsa@kernel.org, linux-kernel@vger.kernel.org,
	Andi Kleen <ak@linux.intel.com>
Subject: Re: [PATCH 1/3] perf, tools: Add support for skipping itrace instructions
Date: Wed, 2 Mar 2016 11:43:49 +0200	[thread overview]
Message-ID: <56D6B5D5.3070302@intel.com> (raw)
In-Reply-To: <1456787103-22496-1-git-send-email-andi@firstfloor.org>

On 01/03/16 01:05, Andi Kleen wrote:
> From: Andi Kleen <ak@linux.intel.com>
> 
> When using perf script to look at PT traces it is often
> useful to ignore the initialization code at the beginning.
> 
> On larger traces which may have many millions of instructions
> in initialization code doing that in a pipeline can be very
> slow, with perf script spending a lot of CPU time calling
> printf and writing data.
> 
> This patch adds an extension to the --itrace argument
> that skips 'n' events (instructions, branches or transactions)
> at the beginning. This is much more efficient.
> 
> Cc: Adrian Hunter <adrian.hunter@intel.com>
> Signed-off-by: Andi Kleen <ak@linux.intel.com>
> ---
>  tools/perf/Documentation/intel-pt.txt |  7 +++++++
>  tools/perf/util/auxtrace.c            |  7 +++++++
>  tools/perf/util/auxtrace.h            |  2 ++
>  tools/perf/util/intel-pt.c            | 14 ++++++++++++--
>  4 files changed, 28 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/Documentation/intel-pt.txt b/tools/perf/Documentation/intel-pt.txt
> index be764f9..c6c8318 100644
> --- a/tools/perf/Documentation/intel-pt.txt
> +++ b/tools/perf/Documentation/intel-pt.txt
> @@ -672,6 +672,7 @@ The letters are:
>  	d	create a debug log
>  	g	synthesize a call chain (use with i or x)
>  	l	synthesize last branch entries (use with i or x)
> +	s	skip initial number of events
>  
>  "Instructions" events look like they were recorded by "perf record -e
>  instructions".
> @@ -730,6 +731,12 @@ from one sample to the next.
>  
>  To disable trace decoding entirely, use the option --no-itrace.
>  
> +It is also possible to skip events generated (instructions, branches, transactions)
> +at the beginning. This is useful to ignore initialization code.
> +
> +	--itrace=i0nss1000000
> +
> +skips the first million instructions.
>  
>  dump option
>  -----------

Also need to update tools/perf/Documentation/itrace.txt

> diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
> index 360fda0..a013c1f 100644
> --- a/tools/perf/util/auxtrace.c
> +++ b/tools/perf/util/auxtrace.c
> @@ -939,6 +939,7 @@ void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts)
>  	synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD;
>  	synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ;
>  	synth_opts->last_branch_sz = PERF_ITRACE_DEFAULT_LAST_BRANCH_SZ;
> +	synth_opts->initial_skip = 0;
>  }
>  
>  /*
> @@ -1063,6 +1064,12 @@ int itrace_parse_synth_opts(const struct option *opt, const char *str,
>  				synth_opts->last_branch_sz = val;
>  			}
>  			break;
> +		case 's':
> +			synth_opts->initial_skip = strtoul(p, &endptr, 10);
> +			if (p == endptr)
> +				goto out_err;
> +			p = endptr;
> +			break;
>  		case ' ':
>  		case ',':
>  			break;
> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
> index b86f90db..235d3c3 100644
> --- a/tools/perf/util/auxtrace.h
> +++ b/tools/perf/util/auxtrace.h
> @@ -68,6 +68,7 @@ enum itrace_period_type {
>   * @last_branch_sz: branch context size
>   * @period: 'instructions' events period
>   * @period_type: 'instructions' events period type
> + * @initial_skip: skip N events at the beginning.
>   */
>  struct itrace_synth_opts {
>  	bool			set;
> @@ -86,6 +87,7 @@ struct itrace_synth_opts {
>  	unsigned int		last_branch_sz;
>  	unsigned long long	period;
>  	enum itrace_period_type	period_type;
> +	unsigned long		initial_skip;
>  };
>  
>  /**
> diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
> index 05d8158..a0c5e7d 100644
> --- a/tools/perf/util/intel-pt.c
> +++ b/tools/perf/util/intel-pt.c
> @@ -100,6 +100,8 @@ struct intel_pt {
>  	u64 cyc_bit;
>  	u64 noretcomp_bit;
>  	unsigned max_non_turbo_ratio;
> +
> +	unsigned long num_events;
>  };
>  
>  enum switch_state {
> @@ -972,6 +974,10 @@ static int intel_pt_synth_branch_sample(struct intel_pt_queue *ptq)
>  	if (pt->branches_filter && !(pt->branches_filter & ptq->flags))
>  		return 0;
>  
> +	if (pt->synth_opts.initial_skip &&
> +	    pt->num_events++ >= pt->synth_opts.initial_skip)

The '>' looks the wrong way around

> +		return 0;
> +

For consistency, wouldn't it be better to do it this way for "instructions"
and "transactions" too. i.e. in intel_pt_synth_instruction_sample() and
intel_pt_synth_transaction_sample()

>  	event->sample.header.type = PERF_RECORD_SAMPLE;
>  	event->sample.header.misc = PERF_RECORD_MISC_USER;
>  	event->sample.header.size = sizeof(struct perf_event_header);
> @@ -1196,14 +1202,18 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
>  	ptq->have_sample = false;
>  
>  	if (pt->sample_instructions &&
> -	    (state->type & INTEL_PT_INSTRUCTION)) {
> +	    (state->type & INTEL_PT_INSTRUCTION) &&
> +	    (!pt->synth_opts.initial_skip ||
> +	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
>  		err = intel_pt_synth_instruction_sample(ptq);
>  		if (err)
>  			return err;
>  	}
>  
>  	if (pt->sample_transactions &&
> -	    (state->type & INTEL_PT_TRANSACTION)) {
> +	    (state->type & INTEL_PT_TRANSACTION) &&
> +	    (!pt->synth_opts.initial_skip ||
> +	     pt->num_events++ >= pt->synth_opts.initial_skip)) {
>  		err = intel_pt_synth_transaction_sample(ptq);
>  		if (err)
>  			return err;
> 

The BTS code is almost the same, so I would suggest doing BTS as well as PT.
i.e.

diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
index eb0e7f8bf515..e3cc1057a097 100644
--- a/tools/perf/util/intel-bts.c
+++ b/tools/perf/util/intel-bts.c
@@ -66,6 +66,7 @@ struct intel_bts {
 	u64				branches_id;
 	size_t				branches_event_size;
 	bool				synth_needs_swap;
+	unsigned long			num_events;
 };
 
 struct intel_bts_queue {
@@ -275,6 +276,10 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
 	union perf_event event;
 	struct perf_sample sample = { .ip = 0, };
 
+	if (bts->synth_opts.initial_skip &&
+	    bts->num_events++ <= bts->synth_opts.initial_skip)
+		return 0;
+
 	event.sample.header.type = PERF_RECORD_SAMPLE;
 	event.sample.header.misc = PERF_RECORD_MISC_USER;
 	event.sample.header.size = sizeof(struct perf_event_header);

      parent reply	other threads:[~2016-03-02  9:47 UTC|newest]

Thread overview: 4+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2016-02-29 23:05 [PATCH 1/3] perf, tools: Add support for skipping itrace instructions Andi Kleen
2016-02-29 23:05 ` [PATCH 2/3] perf, tools: Add support for probing for udev86 Andi Kleen
2016-02-29 23:05 ` [PATCH 3/3] perf, tools, script: Add support for printing assembler Andi Kleen
2016-03-02  9:43 ` Adrian Hunter [this message]

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=56D6B5D5.3070302@intel.com \
    --to=adrian.hunter@intel.com \
    --cc=acme@kernel.org \
    --cc=ak@linux.intel.com \
    --cc=andi@firstfloor.org \
    --cc=jolsa@kernel.org \
    --cc=linux-kernel@vger.kernel.org \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.