linux-kernel.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Stanislav Fomichev <stfomichev@yandex-team.ru>
Cc: a.p.zijlstra@chello.nl, paulus@samba.org, mingo@redhat.com,
	dsahern@gmail.com, jolsa@redhat.com,
	xiaoguangrong@linux.vnet.ibm.com, yangds.fnst@cn.fujitsu.com,
	adrian.hunter@intel.com, namhyung@kernel.org,
	linux-kernel@vger.kernel.org
Subject: Re: [PATCH 2/7] perf trace: add support for pagefault tracing
Date: Fri, 20 Jun 2014 11:59:55 -0300	[thread overview]
Message-ID: <20140620145955.GG31524@kernel.org> (raw)
In-Reply-To: <1403261389-13423-3-git-send-email-stfomichev@yandex-team.ru>

Em Fri, Jun 20, 2014 at 02:49:44PM +0400, Stanislav Fomichev escreveu:
> This patch adds optional pagefault tracing support to 'perf trace'.
> Using -F/--pf option user can specify whether he wants minor, major or
> all pagefault events to be traced. This patch adds only live mode,
> record and replace will come in a separate patch.
> 
> Example output:
> 1756272.905 ( 0.000 ms): curl/5937 majfault [0x7fa7261978b6] => /usr/lib/x86_64-linux-gnu/libkrb5.so.26.0.0+0x85288 (d.)

Things like the above may be better expressed using dso@symbol+offset,
even more when the symbol is not resolved within a DSO, due to
insufficient expressivity in the available symtab, i.e. perhaps the user
needs to install a debuginfo package.

> 1862866.036 ( 0.000 ms): wget/8460 majfault [__clear_user+0x3f] => 0x659cb4 (?k)

And here, where is this __clear_user symbol from? The kernel? Probaly
that 'k' there means that? If so, probably that deserves an entry in the
documentation.

WRT documentation, please consider sending followup patches providing
examples in the documentation, talking about the possible need of
installing debuginfo packages as well.

But its shaping up nicely, good work!

- Arnaldo

 
> Signed-off-by: Stanislav Fomichev <stfomichev@yandex-team.ru>
> ---
>  tools/perf/Documentation/perf-trace.txt |  12 ++++
>  tools/perf/builtin-trace.c              | 116 +++++++++++++++++++++++++++++++-
>  2 files changed, 127 insertions(+), 1 deletion(-)
> 
> diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt
> index fae38d9a44a4..8e5f710aa45d 100644
> --- a/tools/perf/Documentation/perf-trace.txt
> +++ b/tools/perf/Documentation/perf-trace.txt
> @@ -107,6 +107,18 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs.
>  	Show tool stats such as number of times fd->pathname was discovered thru
>  	hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc.
>  
> +-F=[all|min|maj]::
> +--pf=[all|min|maj]::
> +	Trace pagefaults. Optionally, you can specify whether you want minor,
> +	major or all pagefaults. Default value is maj.
> +
> +EXAMPLES
> +--------
> +
> +Trace syscalls, major and minor pagefaults:
> +
> + $ perf trace -F all
> +
>  SEE ALSO
>  --------
>  linkperf:perf-record[1], linkperf:perf-script[1]
> diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c
> index a9b542918da0..a80aae2bba40 100644
> --- a/tools/perf/builtin-trace.c
> +++ b/tools/perf/builtin-trace.c
> @@ -1177,6 +1177,9 @@ fail:
>  	return NULL;
>  }
>  
> +#define TRACE_PFMAJ		(1 << 0)
> +#define TRACE_PFMIN		(1 << 1)
> +
>  struct trace {
>  	struct perf_tool	tool;
>  	struct {
> @@ -1211,6 +1214,7 @@ struct trace {
>  	bool			summary_only;
>  	bool			show_comm;
>  	bool			show_tool_stats;
> +	int			trace_pgfaults;
>  };
>  
>  static int trace__set_fd_pathname(struct thread *thread, int fd, const char *pathname)
> @@ -1773,6 +1777,59 @@ out_dump:
>  	return 0;
>  }
>  
> +static int trace__pgfault(struct trace *trace,
> +			  struct perf_evsel *evsel,
> +			  union perf_event *event,
> +			  struct perf_sample *sample)
> +{
> +	struct thread *thread;
> +	u8 cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK;
> +	struct addr_location al;
> +	char map_type = 'd';
> +
> +	thread = machine__findnew_thread(trace->host, sample->pid, sample->tid);
> +
> +	thread__find_addr_location(thread, trace->host, cpumode, MAP__FUNCTION,
> +			      sample->ip, &al);
> +
> +	trace__fprintf_entry_head(trace, thread, 0, sample->time, trace->output);
> +
> +	fprintf(trace->output, "%sfault ",
> +		evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ ?
> +		"maj" : "min");
> +
> +	if (al.sym)
> +		fprintf(trace->output, "[%s+0x%lx]",
> +			al.sym->name, al.addr - al.sym->start);
> +	else
> +		fprintf(trace->output, "[0x%lx]", sample->ip);
> +
> +	fprintf(trace->output, " => ");
> +
> +	thread__find_addr_map(thread, trace->host, cpumode, MAP__VARIABLE,
> +			      sample->addr, &al);
> +
> +	if (!al.map) {
> +		thread__find_addr_map(thread, trace->host, cpumode,
> +				      MAP__FUNCTION, sample->addr, &al);
> +
> +		if (al.map)
> +			map_type = 'x';
> +	}
> +
> +	if (al.map) {
> +		fprintf(trace->output, "%s+0x%lx",
> +			al.map->dso->long_name, al.addr);
> +	} else {
> +		map_type = '?';
> +		fprintf(trace->output, "0x%lx", sample->addr);
> +	}
> +
> +	fprintf(trace->output, " (%c%c)\n", map_type, al.level);
> +
> +	return 0;
> +}
> +
>  static bool skip_sample(struct trace *trace, struct perf_sample *sample)
>  {
>  	if ((trace->pid_list && intlist__find(trace->pid_list, sample->pid)) ||
> @@ -1887,6 +1944,30 @@ static void perf_evlist__add_vfs_getname(struct perf_evlist *evlist)
>  	perf_evlist__add(evlist, evsel);
>  }
>  
> +static int perf_evlist__add_pgfault(struct perf_evlist *evlist,
> +				    u64 config)
> +{
> +	struct perf_evsel *evsel;
> +	struct perf_event_attr attr = {
> +		.type = PERF_TYPE_SOFTWARE,
> +		.mmap_data = 1,
> +		.sample_period = 1,
> +	};
> +
> +	attr.config = config;
> +
> +	event_attr_init(&attr);
> +
> +	evsel = perf_evsel__new(&attr);
> +	if (!evsel)
> +		return -ENOMEM;
> +
> +	evsel->handler = trace__pgfault;
> +	perf_evlist__add(evlist, evsel);
> +
> +	return 0;
> +}
> +
>  static int trace__run(struct trace *trace, int argc, const char **argv)
>  {
>  	struct perf_evlist *evlist = perf_evlist__new();
> @@ -1907,6 +1988,14 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
>  
>  	perf_evlist__add_vfs_getname(evlist);
>  
> +	if ((trace->trace_pgfaults & TRACE_PFMAJ) &&
> +	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MAJ))
> +		goto out_error_tp;
> +
> +	if ((trace->trace_pgfaults & TRACE_PFMIN) &&
> +	    perf_evlist__add_pgfault(evlist, PERF_COUNT_SW_PAGE_FAULTS_MIN))
> +		goto out_error_tp;
> +
>  	if (trace->sched &&
>  		perf_evlist__add_newtp(evlist, "sched", "sched_stat_runtime",
>  				trace__sched_stat_runtime))
> @@ -1987,7 +2076,8 @@ again:
>  				goto next_event;
>  			}
>  
> -			if (sample.raw_data == NULL) {
> +			if (evsel->attr.type == PERF_TYPE_TRACEPOINT &&
> +			    sample.raw_data == NULL) {
>  				fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n",
>  				       perf_evsel__name(evsel), sample.tid,
>  				       sample.cpu, sample.raw_size);
> @@ -2269,6 +2359,23 @@ static int trace__open_output(struct trace *trace, const char *filename)
>  	return trace->output == NULL ? -errno : 0;
>  }
>  
> +static int parse_pagefaults(const struct option *opt, const char *str,
> +			    int unset __maybe_unused)
> +{
> +	int *trace_pgfaults = opt->value;
> +
> +	if (strcmp(str, "all") == 0)
> +		*trace_pgfaults |= TRACE_PFMAJ | TRACE_PFMIN;
> +	else if (strcmp(str, "maj") == 0)
> +		*trace_pgfaults |= TRACE_PFMAJ;
> +	else if (strcmp(str, "min") == 0)
> +		*trace_pgfaults |= TRACE_PFMIN;
> +	else
> +		return -1;
> +
> +	return 0;
> +}
> +
>  int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
>  {
>  	const char * const trace_usage[] = {
> @@ -2335,6 +2442,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
>  		    "Show only syscall summary with statistics"),
>  	OPT_BOOLEAN('S', "with-summary", &trace.summary,
>  		    "Show all syscalls and summary with statistics"),
> +	OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min",
> +		     "Trace pagefaults", parse_pagefaults, "maj"),
>  	OPT_END()
>  	};
>  	int err;
> @@ -2349,6 +2458,11 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused)
>  	if (trace.summary_only)
>  		trace.summary = trace.summary_only;
>  
> +	if (trace.trace_pgfaults) {
> +		trace.opts.sample_address = true;
> +		trace.opts.sample_time = true;
> +	}
> +
>  	if (output_name != NULL) {
>  		err = trace__open_output(&trace, output_name);
>  		if (err < 0) {
> -- 
> 1.8.3.2

  reply	other threads:[~2014-06-20 15:00 UTC|newest]

Thread overview: 27+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2014-06-20 10:49 [PATCH v2 0/7] perf trace pagefaults Stanislav Fomichev
2014-06-20 10:49 ` [PATCH 1/7] perf trace: add perf_event parameter to tracepoint_handler Stanislav Fomichev
2014-06-20 10:49 ` [PATCH 2/7] perf trace: add support for pagefault tracing Stanislav Fomichev
2014-06-20 14:59   ` Arnaldo Carvalho de Melo [this message]
2014-06-20 15:49     ` Stanislav Fomichev
2014-06-20 16:11       ` Arnaldo Carvalho de Melo
2014-06-24 12:46         ` Stanislav Fomichev
2014-06-24 15:21           ` Arnaldo Carvalho de Melo
2014-06-20 10:49 ` [PATCH 3/7] perf trace: add pagefaults record and replay support Stanislav Fomichev
2014-06-20 10:49 ` [PATCH 4/7] perf trace: add pagefault statistics Stanislav Fomichev
2014-06-20 10:49 ` [PATCH 5/7] perf trace: add possibility to switch off syscall events Stanislav Fomichev
2014-06-20 10:49 ` [PATCH 6/7] perf kvm: move perf_kvm__mmap_read into session utils Stanislav Fomichev
2014-06-20 14:44   ` Arnaldo Carvalho de Melo
2014-06-20 15:07     ` Stanislav Fomichev
2014-06-20 15:25       ` Arnaldo Carvalho de Melo
2014-06-23 14:06     ` David Ahern
2014-06-23 14:14       ` Stanislav Fomichev
2014-06-20 10:49 ` [PATCH 7/7] perf trace: add events cache Stanislav Fomichev
2014-06-20 13:21 ` [PATCH v2 0/7] perf trace pagefaults Arnaldo Carvalho de Melo
2014-06-20 15:03   ` Stanislav Fomichev
2014-06-20 15:24     ` Arnaldo Carvalho de Melo
2014-06-20 16:18       ` Stanislav Fomichev
2014-06-20 18:30         ` Arnaldo Carvalho de Melo
2014-06-23 11:41           ` Stanislav Fomichev
2014-06-24 14:15             ` Arnaldo Carvalho de Melo
2014-06-23 14:00       ` David Ahern
2014-06-24  7:17   ` Namhyung Kim

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20140620145955.GG31524@kernel.org \
    --to=acme@kernel.org \
    --cc=a.p.zijlstra@chello.nl \
    --cc=adrian.hunter@intel.com \
    --cc=dsahern@gmail.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=mingo@redhat.com \
    --cc=namhyung@kernel.org \
    --cc=paulus@samba.org \
    --cc=stfomichev@yandex-team.ru \
    --cc=xiaoguangrong@linux.vnet.ibm.com \
    --cc=yangds.fnst@cn.fujitsu.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).