From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: linux-kernel@vger.kernel.org,
Adrian Hunter <adrian.hunter@intel.com>,
Jiri Olsa <jolsa@redhat.com>,
Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 25/31] perf intel-pt: Add support for PERF_RECORD_SWITCH
Date: Fri, 4 Sep 2015 13:45:09 -0300 [thread overview]
Message-ID: <1441385115-6744-26-git-send-email-acme@kernel.org> (raw)
In-Reply-To: <1441385115-6744-1-git-send-email-acme@kernel.org>
From: Adrian Hunter <adrian.hunter@intel.com>
Add support for selecting and processing PERF_RECORD_SWITCH events for
use by Intel PT. If they are available, they will be used in preference
to sched_switch events.
This enables an unprivileged user to trace multi-threaded or
multi-process workloads with any level of perf_event_paranoid. However
it depends on kernel support for PERF_RECORD_SWITCH.
Without this patch, tracing a multi-threaded workload will decode
without error but all the data will be attributed to the main thread.
Without this patch, tracing a multi-process workload will result in
decoder errors because the decoder will not know which executable is
executing.
Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lkml.kernel.org/r/1439458857-30636-3-git-send-email-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/arch/x86/util/intel-pt.c | 55 ++++++++++++---
tools/perf/util/intel-pt.c | 129 +++++++++++++++++++++++++++++-------
2 files changed, 151 insertions(+), 33 deletions(-)
diff --git a/tools/perf/arch/x86/util/intel-pt.c b/tools/perf/arch/x86/util/intel-pt.c
index 2ca10d796c0b..b02af064f0f9 100644
--- a/tools/perf/arch/x86/util/intel-pt.c
+++ b/tools/perf/arch/x86/util/intel-pt.c
@@ -624,13 +624,49 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
* threads.
*/
if (have_timing_info && !cpu_map__empty(cpus)) {
- err = intel_pt_track_switches(evlist);
- if (err == -EPERM)
- pr_debug2("Unable to select sched:sched_switch\n");
- else if (err)
- return err;
- else
- ptr->have_sched_switch = 1;
+ if (perf_can_record_switch_events()) {
+ bool cpu_wide = !target__none(&opts->target) &&
+ !target__has_task(&opts->target);
+
+ if (!cpu_wide && perf_can_record_cpu_wide()) {
+ struct perf_evsel *switch_evsel;
+
+ err = parse_events(evlist, "dummy:u", NULL);
+ if (err)
+ return err;
+
+ switch_evsel = perf_evlist__last(evlist);
+
+ switch_evsel->attr.freq = 0;
+ switch_evsel->attr.sample_period = 1;
+ switch_evsel->attr.context_switch = 1;
+
+ switch_evsel->system_wide = true;
+ switch_evsel->no_aux_samples = true;
+ switch_evsel->immediate = true;
+
+ perf_evsel__set_sample_bit(switch_evsel, TID);
+ perf_evsel__set_sample_bit(switch_evsel, TIME);
+ perf_evsel__set_sample_bit(switch_evsel, CPU);
+
+ opts->record_switch_events = false;
+ ptr->have_sched_switch = 3;
+ } else {
+ opts->record_switch_events = true;
+ if (cpu_wide)
+ ptr->have_sched_switch = 3;
+ else
+ ptr->have_sched_switch = 2;
+ }
+ } else {
+ err = intel_pt_track_switches(evlist);
+ if (err == -EPERM)
+ pr_debug2("Unable to select sched:sched_switch\n");
+ else if (err)
+ return err;
+ else
+ ptr->have_sched_switch = 1;
+ }
}
if (intel_pt_evsel) {
@@ -663,8 +699,11 @@ static int intel_pt_recording_options(struct auxtrace_record *itr,
tracking_evsel->attr.sample_period = 1;
/* In per-cpu case, always need the time of mmap events etc */
- if (!cpu_map__empty(cpus))
+ if (!cpu_map__empty(cpus)) {
perf_evsel__set_sample_bit(tracking_evsel, TIME);
+ /* And the CPU for switch events */
+ perf_evsel__set_sample_bit(tracking_evsel, CPU);
+ }
}
/*
diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index bb41c20e6005..2968b37ed9b0 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -1145,11 +1145,13 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
return 0;
}
-static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
+static u64 intel_pt_switch_ip(struct intel_pt *pt, u64 *ptss_ip)
{
+ struct machine *machine = pt->machine;
struct map *map;
struct symbol *sym, *start;
u64 ip, switch_ip = 0;
+ const char *ptss;
if (ptss_ip)
*ptss_ip = 0;
@@ -1177,8 +1179,13 @@ static u64 intel_pt_switch_ip(struct machine *machine, u64 *ptss_ip)
if (!switch_ip || !ptss_ip)
return 0;
+ if (pt->have_sched_switch == 1)
+ ptss = "perf_trace_sched_switch";
+ else
+ ptss = "__perf_event_task_sched_out";
+
for (sym = start; sym; sym = dso__next_symbol(sym)) {
- if (!strcmp(sym->name, "perf_trace_sched_switch")) {
+ if (!strcmp(sym->name, ptss)) {
ip = map->unmap_ip(map, sym->start);
if (ip >= map->start && ip < map->end) {
*ptss_ip = ip;
@@ -1198,11 +1205,11 @@ static int intel_pt_run_decoder(struct intel_pt_queue *ptq, u64 *timestamp)
if (!pt->kernel_start) {
pt->kernel_start = machine__kernel_start(pt->machine);
- if (pt->per_cpu_mmaps && pt->have_sched_switch &&
+ if (pt->per_cpu_mmaps &&
+ (pt->have_sched_switch == 1 || pt->have_sched_switch == 3) &&
!pt->timeless_decoding && intel_pt_tracing_kernel(pt) &&
!pt->sampling_mode) {
- pt->switch_ip = intel_pt_switch_ip(pt->machine,
- &pt->ptss_ip);
+ pt->switch_ip = intel_pt_switch_ip(pt, &pt->ptss_ip);
if (pt->switch_ip) {
intel_pt_log("switch_ip: %"PRIx64" ptss_ip: %"PRIx64"\n",
pt->switch_ip, pt->ptss_ip);
@@ -1387,31 +1394,18 @@ static struct intel_pt_queue *intel_pt_cpu_to_ptq(struct intel_pt *pt, int cpu)
return NULL;
}
-static int intel_pt_process_switch(struct intel_pt *pt,
- struct perf_sample *sample)
+static int intel_pt_sync_switch(struct intel_pt *pt, int cpu, pid_t tid,
+ u64 timestamp)
{
struct intel_pt_queue *ptq;
- struct perf_evsel *evsel;
- pid_t tid;
- int cpu, err;
-
- evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
- if (evsel != pt->switch_evsel)
- return 0;
-
- tid = perf_evsel__intval(evsel, sample, "next_pid");
- cpu = sample->cpu;
-
- intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
- cpu, tid, sample->time, perf_time_to_tsc(sample->time,
- &pt->tc));
+ int err;
if (!pt->sync_switch)
- goto out;
+ return 1;
ptq = intel_pt_cpu_to_ptq(pt, cpu);
if (!ptq)
- goto out;
+ return 1;
switch (ptq->switch_state) {
case INTEL_PT_SS_NOT_TRACING:
@@ -1424,7 +1418,7 @@ static int intel_pt_process_switch(struct intel_pt *pt,
return 0;
case INTEL_PT_SS_EXPECTING_SWITCH_EVENT:
if (!ptq->on_heap) {
- ptq->timestamp = perf_time_to_tsc(sample->time,
+ ptq->timestamp = perf_time_to_tsc(timestamp,
&pt->tc);
err = auxtrace_heap__add(&pt->heap, ptq->queue_nr,
ptq->timestamp);
@@ -1441,10 +1435,76 @@ static int intel_pt_process_switch(struct intel_pt *pt,
default:
break;
}
-out:
+
+ return 1;
+}
+
+static int intel_pt_process_switch(struct intel_pt *pt,
+ struct perf_sample *sample)
+{
+ struct perf_evsel *evsel;
+ pid_t tid;
+ int cpu, ret;
+
+ evsel = perf_evlist__id2evsel(pt->session->evlist, sample->id);
+ if (evsel != pt->switch_evsel)
+ return 0;
+
+ tid = perf_evsel__intval(evsel, sample, "next_pid");
+ cpu = sample->cpu;
+
+ intel_pt_log("sched_switch: cpu %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ cpu, tid, sample->time, perf_time_to_tsc(sample->time,
+ &pt->tc));
+
+ ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+ if (ret <= 0)
+ return ret;
+
return machine__set_current_tid(pt->machine, cpu, -1, tid);
}
+static int intel_pt_context_switch(struct intel_pt *pt, union perf_event *event,
+ struct perf_sample *sample)
+{
+ bool out = event->header.misc & PERF_RECORD_MISC_SWITCH_OUT;
+ pid_t pid, tid;
+ int cpu, ret;
+
+ cpu = sample->cpu;
+
+ if (pt->have_sched_switch == 3) {
+ if (!out)
+ return 0;
+ if (event->header.type != PERF_RECORD_SWITCH_CPU_WIDE) {
+ pr_err("Expecting CPU-wide context switch event\n");
+ return -EINVAL;
+ }
+ pid = event->context_switch.next_prev_pid;
+ tid = event->context_switch.next_prev_tid;
+ } else {
+ if (out)
+ return 0;
+ pid = sample->pid;
+ tid = sample->tid;
+ }
+
+ if (tid == -1) {
+ pr_err("context_switch event has no tid\n");
+ return -EINVAL;
+ }
+
+ intel_pt_log("context_switch: cpu %d pid %d tid %d time %"PRIu64" tsc %#"PRIx64"\n",
+ cpu, pid, tid, sample->time, perf_time_to_tsc(sample->time,
+ &pt->tc));
+
+ ret = intel_pt_sync_switch(pt, cpu, tid, sample->time);
+ if (ret <= 0)
+ return ret;
+
+ return machine__set_current_tid(pt->machine, cpu, pid, tid);
+}
+
static int intel_pt_process_itrace_start(struct intel_pt *pt,
union perf_event *event,
struct perf_sample *sample)
@@ -1515,6 +1575,9 @@ static int intel_pt_process_event(struct perf_session *session,
err = intel_pt_process_switch(pt, sample);
else if (event->header.type == PERF_RECORD_ITRACE_START)
err = intel_pt_process_itrace_start(pt, event, sample);
+ else if (event->header.type == PERF_RECORD_SWITCH ||
+ event->header.type == PERF_RECORD_SWITCH_CPU_WIDE)
+ err = intel_pt_context_switch(pt, event, sample);
intel_pt_log("event %s (%u): cpu %d time %"PRIu64" tsc %#"PRIx64"\n",
perf_event__name(event->header.type), event->header.type,
@@ -1777,6 +1840,18 @@ static struct perf_evsel *intel_pt_find_sched_switch(struct perf_evlist *evlist)
return NULL;
}
+static bool intel_pt_find_switch(struct perf_evlist *evlist)
+{
+ struct perf_evsel *evsel;
+
+ evlist__for_each(evlist, evsel) {
+ if (evsel->attr.context_switch)
+ return true;
+ }
+
+ return false;
+}
+
static const char * const intel_pt_info_fmts[] = {
[INTEL_PT_PMU_TYPE] = " PMU Type %"PRId64"\n",
[INTEL_PT_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
@@ -1888,6 +1963,10 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
pr_err("%s: missing sched_switch event\n", __func__);
goto err_delete_thread;
}
+ } else if (pt->have_sched_switch == 2 &&
+ !intel_pt_find_switch(session->evlist)) {
+ pr_err("%s: missing context_switch attribute flag\n", __func__);
+ goto err_delete_thread;
}
if (session->itrace_synth_opts && session->itrace_synth_opts->set) {
--
2.1.0
next prev parent reply other threads:[~2015-09-04 16:45 UTC|newest]
Thread overview: 36+ messages / expand[flat|nested] mbox.gz Atom feed top
2015-09-04 16:44 [GIT PULL 00/31] perf/core improvements and fixes Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 01/31] perf tools: Always use non inlined file name for 'srcfile' sort key Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 02/31] perf tools: Copy linux/filter.h to tools/include Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 03/31] tools lib traceevent: Support function __get_dynamic_array_len Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 04/31] perf cpumap: Factor out functions to get core_id and socket_id Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 05/31] perf tools: Store the cpu socket and core ids in the perf.data header Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 06/31] perf stat: Quieten failed to read counter message Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 07/31] perf tools: Remove mountpoint arg from perf_debugfs_mount Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 08/31] perf tools: Move tracing_path stuff under same namespace Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 09/31] tools lib api fs: Move tracing_path interface into api/fs/tracing_path.c Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 10/31] tools lib api fs: Move debugfs__strerror_open into tracing_path.c object Arnaldo Carvalho de Melo
2015-09-04 16:57 ` Raphaël Beamonte
2015-09-04 20:38 ` Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 11/31] tools lib api fs: Add STR and PATH_MAX macros to fs object Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 12/31] tools lib api fs: Move SYSFS_MAGIC PROC_SUPER_MAGIC into fs.c Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 13/31] tools lib api fs: Add debugfs into fs.c object Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 14/31] tools lib api fs: Add tracefs " Arnaldo Carvalho de Melo
2015-09-04 16:44 ` [PATCH 15/31] tools lib api fs: Add FSTYPE__mount() method Arnaldo Carvalho de Melo
2015-09-04 17:02 ` Raphaël Beamonte
2015-09-04 16:45 ` [PATCH 16/31] tools lib api fs: Add FSTYPE__configured() method Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 17/31] perf tools: Display build warning if x86 instruction decoder differs from kernel Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 18/31] perf tools: Add a test for decoding of new x86 instructions Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 19/31] x86/insn: perf tools: Pedantically tweak opcode map for MPX instructions Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 20/31] x86/insn: perf tools: Add new SHA instructions Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 21/31] x86/insn: perf tools: Add new memory instructions Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 22/31] x86/insn: perf tools: Add new memory protection keys instructions Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 23/31] x86/insn: perf tools: Add new xsave instructions Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 24/31] perf session: Don't call dump_sample() when evsel is NULL Arnaldo Carvalho de Melo
2015-09-04 16:45 ` Arnaldo Carvalho de Melo [this message]
2015-09-04 16:45 ` [PATCH 26/31] perf probe: Split add_perf_probe_events() Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 27/31] perf probe: Link trace_probe_event into perf_probe_event Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 28/31] perf probe: Move print logic into cmd_probe() Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 29/31] perf probe: Split del_perf_probe_events() Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 30/31] perf probe: Print deleted events in cmd_probe() Arnaldo Carvalho de Melo
2015-09-04 16:45 ` [PATCH 31/31] perf trace: Add read/write to the file group Arnaldo Carvalho de Melo
2015-09-08 14:24 ` [GIT PULL 00/31] perf/core improvements and fixes Ingo Molnar
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=1441385115-6744-26-git-send-email-acme@kernel.org \
--to=acme@kernel.org \
--cc=acme@redhat.com \
--cc=adrian.hunter@intel.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=mingo@kernel.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is an external index of several public inboxes,
see mirroring instructions on how to clone and mirror
all data and code used by this external index.