From: Adrian Hunter <adrian.hunter@intel.com>
To: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: linux-kernel@vger.kernel.org, Jiri Olsa <jolsa@redhat.com>
Subject: Re: [PATCH 2/2] perf tools: Add support for PERF_RECORD_SWITCH to Intel PT
Date: Wed, 2 Sep 2015 16:09:47 +0300 [thread overview]
Message-ID: <55E6F51B.9010203@intel.com> (raw)
In-Reply-To: <1439458857-30636-3-git-send-email-adrian.hunter@intel.com>
On 13/08/15 12:40, Adrian Hunter wrote:
> Add support for selecting and processing PERF_RECORD_SWITCH
> events for use by Intel PT. If they are available, they will be
> used in preference to sched_switch events.
>
> This enables an unprivileged user to trace multi-threaded or
> multi-process workloads with any level of perf_event_paranoid.
> However it depends on kernel support for PERF_RECORD_SWITCH.
>
> Without this patch, tracing a multi-threaded workload will
> decode without error but all the data will be attributed to
> the main thread.
>
> Without this patch, tracing a multi-process workload will
> result in decoder errors because the decoder will not know
> which executable is executing.
>
> Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
This one still applies.
> ---
> tools/perf/arch/x86/util/intel-pt.c | 55 ++++++++++++---
> tools/perf/util/intel-pt.c | 129 +++++++++++++++++++++++++++++-------
> 2 files changed, 151 insertions(+), 33 deletions(-)
>
> diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
> index 2ca10d796c0b..b02af064f0f9 100644
> --- a/tools/perf/arch/x86/util/intel-pt.c
> +++ b/tools/perf/arch/x86/util/intel-pt.c
> @@ -624,13 +624,49 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
> * threads.
> */
> if (have_timing_info && !cpu_map__empty(cpus)) {
> - err = intel_pt_track_switches(evlist);
> - if (err == -EPERM)
> - pr_debug2("Unable to select sched:sched_switch\n");
> - else if (err)
> - return err;
> - else
> - ptr->have_sched_switch = 1;
> + if (perf_can_record_switch_events()) {
> + bool cpu_wide = !target__none(&opts->target) &&
> + !target__has_task(&opts->target);
> +
> + if (!cpu_wide && perf_can_record_cpu_wide()) {
> + struct perf_evsel *switch_evsel;
> +
> + err = parse_events(evlist, "dummy:u", NULL);
> + if (err)
> + return err;
> +
> + switch_evsel = perf_evlist__last(evlist);
> +
> + switch_evsel->attr.freq = 0;
> + switch_evsel->attr.sample_period = 1;
> + switch_evsel->attr.context_switch = 1;
> +
> + switch_evsel->system_wide = true;
> + switch_evsel->no_aux_samples = true;
> + switch_evsel->immediate = true;
> +
> + perf_evsel__set_sample_bit(switch_evsel, TID);
> + perf_evsel__set_sample_bit(switch_evsel, TIME);
> + perf_evsel__set_sample_bit(switch_evsel, CPU);
> +
> + opts->record_switch_events = false;
> + ptr->have_sched_switch = 3;
> + } else {
> + opts->record_switch_events = true;
> + if (cpu_wide)
> + ptr->have_sched_switch = 3;
> + else
> + ptr->have_sched_switch = 2;
> + }
> + } else {
> + err = intel_pt_track_switches(evlist);
> + if (err == -EPERM)
> + pr_debug2("Unable to select sched:sched_switch\n");
> + else if (err)
> + return err;
> + else
> + ptr->have_sched_switch = 1;
> + }
> }
>
> if (intel_pt_evsel) {
> @@ -663,8 +699,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
> tracking_evsel->attr.sample_period = 1;
>
> /* In per-cpu case, always need the time of mmap events etc */
> - if (!cpu_map__empty(cpus))
> + if (!cpu_map__empty(cpus)) {
> perf_evsel__set_sample_bit(tracking_evsel, TIME);
> + /* And the CPU for switch events */
> + perf_evsel__set_sample_bit(tracking_evsel, CPU);
> + }
> }
>
> /*
> diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
> index 4bae958096d4..1f6aab58e931 100644
> --- a/tools/perf/util/intel-pt.c
> +++ b/tools/perf/util/intel-pt.c
> @@ -1145,11 +1145,13 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
> return 0;
> }
>
> -static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
> +static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
> {
> + struct machine *machine = pt->machine;
> struct map *map;
> struct symbol *sym, *start;
> u64 ip, switch_ip = 0;
> + const char *ptss;
>
> if (ptss_ip)
> *ptss_ip = 0;
> @@ -1177,8 +1179,13 @@ static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
> if (!switch_ip || !ptss_ip)
> return 0;
>
> + if (pt->have_sched_switch == 1)
> + ptss = "perf_trace_sched_switch";
> + else
> + ptss = "__perf_event_task_sched_out";
> +
> for (sym = start; sym; sym = dso__next_symbol(sym)) {
> - if (!strcmp(sym->name, "perf_trace_sched_switch")) {
> + if (!strcmp(sym->name, ptss)) {
> ip = map->unmap_ip(map, sym->start);
> if (ip >= map->start && ip < map->end) {
> *ptss_ip = ip;
> @@ -1198,11 +1205,11 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
>
> if (!pt->kernel_start) {
> pt->kernel_start = machine__kernel_start(pt->machine);
> - if (pt->per_cpu_mmaps && pt->have_sched_switch &&
> + if (pt->per_cpu_mmaps &&
> + (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
> !pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
> !pt->sampling_mode) {
> - pt->switch_ip = intel_pt_switch_ip(pt->machine,
> - &pt->ptss_ip);
> + pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
> if (pt->switch_ip) {
> intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
> pt->switch_ip, pt->ptss_ip);
> @@ -1387,31 +1394,18 @@ static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
> return NULL;
> }
>
> -static int intel_pt_process_switch(struct intel_pt *pt,
> - struct perf_sample *sample)
> +static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
> + u64 timestamp)
> {
> struct intel_pt_queue *ptq;
> - struct perf_evsel *evsel;
> - pid_t tid;
> - int cpu, err;
> -
> - evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
> - if (evsel != pt->switch_evsel)
> - return 0;
> -
> - tid = perf_evsel__intval(evsel, sample, "next_pid");
> - cpu = sample->cpu;
> -
> - intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
> - cpu, tid, sample->time, perf_time_to_tsc(sample->time,
> - &pt->tc));
> + int err;
>
> if (!pt->sync_switch)
> - goto out;
> + return 1;
>
> ptq = intel_pt_cpu_to_ptq(pt, cpu);
> if (!ptq)
> - goto out;
> + return 1;
>
> switch (ptq->switch_state) {
> case INTEL_PT_SS_NOT_TRACING:
> @@ -1424,7 +1418,7 @@ static int intel_pt_process_switch(struct intel_pt *pt,
> return 0;
> case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
> if (!ptq->on_heap) {
> - ptq->timestamp = perf_time_to_tsc(sample->time,
> + ptq->timestamp = perf_time_to_tsc(timestamp,
> &pt->tc);
> err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
> ptq->timestamp);
> @@ -1441,10 +1435,76 @@ static int intel_pt_process_switch(struct intel_pt *pt,
> default:
> break;
> }
> -out:
> +
> + return 1;
> +}
> +
> +static int intel_pt_process_switch(struct intel_pt *pt,
> + struct perf_sample *sample)
> +{
> + struct perf_evsel *evsel;
> + pid_t tid;
> + int cpu, ret;
> +
> + evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
> + if (evsel != pt->switch_evsel)
> + return 0;
> +
> + tid = perf_evsel__intval(evsel, sample, "next_pid");
> + cpu = sample->cpu;
> +
> + intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
> + cpu, tid, sample->time, perf_time_to_tsc(sample->time,
> + &pt->tc));
> +
> + ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
> + if (ret <= 0)
> + return ret;
> +
> return machine__set_current_tid(pt->machine, cpu, -1, tid);
> }
>
> +static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
> + struct perf_sample *sample)
> +{
> + bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
> + pid_t pid, tid;
> + int cpu, ret;
> +
> + cpu = sample->cpu;
> +
> + if (pt->have_sched_switch == 3) {
> + if (!out)
> + return 0;
> + if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
> + pr_err("Expecting CPU-wide context switch event\n");
> + return -EINVAL;
> + }
> + pid = event->context_switch.next_prev_pid;
> + tid = event->context_switch.next_prev_tid;
> + } else {
> + if (out)
> + return 0;
> + pid = sample->pid;
> + tid = sample->tid;
> + }
> +
> + if (tid == -1) {
> + pr_err("context_switch event has no tid\n");
> + return -EINVAL;
> + }
> +
> + intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
> + cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
> + &pt->tc));
> +
> + ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
> + if (ret <= 0)
> + return ret;
> +
> + return machine__set_current_tid(pt->machine, cpu, pid, tid);
> +}
> +
> static int intel_pt_process_itrace_start(struct intel_pt *pt,
> union perf_event *event,
> struct perf_sample *sample)
> @@ -1515,6 +1575,9 @@ static int intel_pt_process_event(struct perf_session *session,
> err = intel_pt_process_switch(pt, sample);
> else if (event->header.type == PERF_RECORD_ITRACE_START)
> err = intel_pt_process_itrace_start(pt, event, sample);
> + else if (event->header.type == PERF_RECORD_SWITCH ||
> + event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
> + err = intel_pt_context_switch(pt, event, sample);
>
> intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
> perf_event__name(event->header.type), event->header.type,
> @@ -1777,6 +1840,18 @@ static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
> return NULL;
> }
>
> +static bool intel_pt_find_switch(struct perf_evlist *evlist)
> +{
> + struct perf_evsel *evsel;
> +
> + evlist__for_each(evlist, evsel) {
> + if (evsel->attr.context_switch)
> + return true;
> + }
> +
> + return false;
> +}
> +
> static const char * const intel_pt_info_fmts[] = {
> [INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
> [INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
> @@ -1888,6 +1963,10 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
> pr_err("%s: missing sched_switch event\n", __func__);
> goto err_delete_thread;
> }
> + } else if (pt->have_sched_switch == 2 &&
> + !intel_pt_find_switch(session->evlist)) {
> + pr_err("%s: missing context_switch attribute flag\n", __func__);
> + goto err_delete_thread;
> }
>
> if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
>
next prev parent reply other threads:[~2015-09-02 13:14 UTC|newest]
Thread overview: 6+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-08-13 9:40 [PATCH 0/2] perf tools: Add support for PERF_RECORD_SWITCH to Intel PT Adrian Hunter
2015-08-13 9:40 ` [PATCH 1/2] perf tools: Add a helper function to probe whether cpu-wide tracing is possible Adrian Hunter
2015-08-20 9:56 ` [tip:perf/core] " tip-bot for Adrian Hunter
2015-08-13 9:40 ` [PATCH 2/2] perf tools: Add support for PERF_RECORD_SWITCH to Intel PT Adrian Hunter
2015-09-02 13:09 ` Adrian Hunter [this message]
2015-09-08 14:38 ` [tip:perf/core] perf intel-pt: Add support for PERF_RECORD_SWITCH tip-bot for Adrian Hunter
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=55E6F51B.9010203@intel.com \
--to=adrian.hunter@intel.com \
--cc=acme@kernel.org \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.