public inbox for linux-kernel@vger.kernel.org
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>, Thomas Gleixner <tglx@linutronix.de>
Cc: Jiri Olsa <jolsa@kernel.org>, Namhyung Kim <namhyung@kernel.org>,
	Clark Williams <williams@redhat.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Adrian Hunter <adrian.hunter@intel.com>,
	Andi Kleen <ak@linux.intel.com>, Jiri Olsa <jolsa@redhat.com>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 45/91] perf intel-pt: Add support for synthesizing branch stacks for regular events
Date: Wed,  6 May 2020 12:21:48 -0300	[thread overview]
Message-ID: <20200506152234.21977-46-acme@kernel.org> (raw)
In-Reply-To: <20200506152234.21977-1-acme@kernel.org>

From: Adrian Hunter <adrian.hunter@intel.com>

Use the new thread_stack__br_sample_late() function to create a thread
stack for regular events.

Example:

 # perf record --kcore --aux-sample -e '{intel_pt//,cycles:ppp}' -c 10000 uname
 Linux
 [ perf record: Woken up 2 times to write data ]
 [ perf record: Captured and wrote 0.743 MB perf.data ]
 # perf report --itrace=Le --stdio | head -30 | tail -18

 # Samples: 11K of event 'cycles:ppp'
 # Event count (approx.): 11648
 #
 # Overhead  Command  Source Shared Object  Source Symbol                 Target Symbol                 Basic Block Cycles
 # ........  .......  ....................  ............................  ............................  ..................
 #
      5.49%  uname    libc-2.30.so          [.] _dl_addr                  [.] _dl_addr                  -
      2.41%  uname    ld-2.30.so            [.] _dl_relocate_object       [.] _dl_relocate_object       -
      2.31%  uname    ld-2.30.so            [.] do_lookup_x               [.] do_lookup_x               -
      2.17%  uname    [kernel.kallsyms]     [k] unmap_page_range          [k] unmap_page_range          -
      2.05%  uname    ld-2.30.so            [k] _dl_start                 [k] _dl_start                 -
      1.97%  uname    ld-2.30.so            [.] _dl_lookup_symbol_x       [.] _dl_lookup_symbol_x       -
      1.94%  uname    [kernel.kallsyms]     [k] filemap_map_pages         [k] filemap_map_pages         -
      1.60%  uname    [kernel.kallsyms]     [k] __handle_mm_fault         [k] __handle_mm_fault         -
      1.44%  uname    [kernel.kallsyms]     [k] page_add_file_rmap        [k] page_add_file_rmap        -
      1.12%  uname    [kernel.kallsyms]     [k] vma_interval_tree_insert  [k] vma_interval_tree_insert  -
      0.94%  uname    [kernel.kallsyms]     [k] perf_iterate_ctx          [k] perf_iterate_ctx          -

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Link: http://lore.kernel.org/lkml/20200429150751.12570-8-adrian.hunter@intel.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/intel-pt.c | 73 ++++++++++++++++++++++++++++++++++----
 1 file changed, 66 insertions(+), 7 deletions(-)

diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c
index 03b76904ca52..59811b39430c 100644
--- a/tools/perf/util/intel-pt.c
+++ b/tools/perf/util/intel-pt.c
@@ -72,6 +72,7 @@ struct intel_pt {
 	bool use_thread_stack;
 	bool callstack;
 	unsigned int br_stack_sz;
+	unsigned int br_stack_sz_plus;
 	int have_sched_switch;
 	u32 pmu_type;
 	u64 kernel_start;
@@ -130,6 +131,7 @@ struct intel_pt {
 	unsigned int range_cnt;
 
 	struct ip_callchain *chain;
+	struct branch_stack *br_stack;
 };
 
 enum switch_state {
@@ -911,6 +913,44 @@ static void intel_pt_add_callchain(struct intel_pt *pt,
 	sample->callchain = pt->chain;
 }
 
+static struct branch_stack *intel_pt_alloc_br_stack(struct intel_pt *pt)
+{
+	size_t sz = sizeof(struct branch_stack);
+
+	sz += pt->br_stack_sz * sizeof(struct branch_entry);
+	return zalloc(sz);
+}
+
+static int intel_pt_br_stack_init(struct intel_pt *pt)
+{
+	struct evsel *evsel;
+
+	evlist__for_each_entry(pt->session->evlist, evsel) {
+		if (!(evsel->core.attr.sample_type & PERF_SAMPLE_BRANCH_STACK))
+			evsel->synth_sample_type |= PERF_SAMPLE_BRANCH_STACK;
+	}
+
+	pt->br_stack = intel_pt_alloc_br_stack(pt);
+	if (!pt->br_stack)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void intel_pt_add_br_stack(struct intel_pt *pt,
+				  struct perf_sample *sample)
+{
+	struct thread *thread = machine__findnew_thread(pt->machine,
+							sample->pid,
+							sample->tid);
+
+	thread_stack__br_sample_late(thread, sample->cpu, pt->br_stack,
+				     pt->br_stack_sz, sample->ip,
+				     pt->kernel_start);
+
+	sample->branch_stack = pt->br_stack;
+}
+
 static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 						   unsigned int queue_nr)
 {
@@ -929,10 +969,7 @@ static struct intel_pt_queue *intel_pt_alloc_queue(struct intel_pt *pt,
 	}
 
 	if (pt->synth_opts.last_branch) {
-		size_t sz = sizeof(struct branch_stack);
-
-		sz += pt->br_stack_sz * sizeof(struct branch_entry);
-		ptq->last_branch = zalloc(sz);
+		ptq->last_branch = intel_pt_alloc_br_stack(pt);
 		if (!ptq->last_branch)
 			goto out_free;
 	}
@@ -1963,7 +2000,7 @@ static int intel_pt_sample(struct intel_pt_queue *ptq)
 		thread_stack__event(ptq->thread, ptq->cpu, ptq->flags,
 				    state->from_ip, state->to_ip, ptq->insn_len,
 				    state->trace_nr, pt->callstack,
-				    pt->br_stack_sz,
+				    pt->br_stack_sz_plus,
 				    pt->mispred_all);
 	} else {
 		thread_stack__set_trace_nr(ptq->thread, ptq->cpu, state->trace_nr);
@@ -2609,6 +2646,8 @@ static int intel_pt_process_event(struct perf_session *session,
 	if (event->header.type == PERF_RECORD_SAMPLE) {
 		if (pt->synth_opts.add_callchain && !sample->callchain)
 			intel_pt_add_callchain(pt, sample);
+		if (pt->synth_opts.add_last_branch && !sample->branch_stack)
+			intel_pt_add_br_stack(pt, sample);
 	}
 
 	if (event->header.type == PERF_RECORD_AUX &&
@@ -3370,13 +3409,33 @@ int intel_pt_process_auxtrace_info(union perf_event *event,
 			goto err_delete_thread;
 	}
 
-	if (pt->synth_opts.last_branch)
+	if (pt->synth_opts.last_branch || pt->synth_opts.add_last_branch) {
 		pt->br_stack_sz = pt->synth_opts.last_branch_sz;
+		pt->br_stack_sz_plus = pt->br_stack_sz;
+	}
+
+	if (pt->synth_opts.add_last_branch) {
+		err = intel_pt_br_stack_init(pt);
+		if (err)
+			goto err_delete_thread;
+		/*
+		 * Additional branch stack size to cater for tracing from the
+		 * actual sample ip to where the sample time is recorded.
+		 * Measured at about 200 branches, but generously set to 1024.
+		 * If kernel space is not being traced, then add just 1 for the
+		 * branch to kernel space.
+		 */
+		if (intel_pt_tracing_kernel(pt))
+			pt->br_stack_sz_plus += 1024;
+		else
+			pt->br_stack_sz_plus += 1;
+	}
 
 	pt->use_thread_stack = pt->synth_opts.callchain ||
 			       pt->synth_opts.add_callchain ||
 			       pt->synth_opts.thread_stack ||
-			       pt->synth_opts.last_branch;
+			       pt->synth_opts.last_branch ||
+			       pt->synth_opts.add_last_branch;
 
 	pt->callstack = pt->synth_opts.callchain ||
 			pt->synth_opts.add_callchain ||
-- 
2.21.1


  parent reply	other threads:[~2020-05-06 15:25 UTC|newest]

Thread overview: 92+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2020-05-06 15:21 [GIT PULL] perf/core improvements and fixes Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 01/91] perf cgroup: Avoid needless closing of unopened fd Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 02/91] perf bench: Fix div-by-zero if runtime is zero Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 03/91] perf evlist: Remove duplicate headers Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 04/91] perf script: Avoid NULL dereference on symbol Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 05/91] perf stat: Zero all the 'ena' and 'run' array slot stats for interval mode Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 06/91] perf stat: Improve runtime stat " Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 07/91] perf test session topology: Fix data path Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 08/91] perf record: Add num-synthesize-threads option Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 09/91] perf bench: Add a multi-threaded synthesize benchmark Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 10/91] tools api: Add a lightweight buffered reading api Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 11/91] perf synthetic events: Remove use of sscanf from /proc reading Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 12/91] perf script: Remove extraneous newline in perf_sample__fprintf_regs() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 13/91] libtraceevent: Remove unneeded semicolon Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 14/91] perf c2c: " Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 15/91] perf tools: Remove unneeded semicolons Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 16/91] perf report: Fix warning assignment of 0/1 to bool variable Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 17/91] perf pmu: Fix function name in comment, its get_cpuid_str(), not get_cpustr() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 18/91] perf metricgroups: Enhance JSON/metric infrastructure to handle "?" Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 19/91] perf tests expr: Added test for runtime param in metric expression Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 20/91] perf tools: Enable Hz/hz prinitg for --metric-only option Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 21/91] perf vendor events power9: Add hv_24x7 socket/chip level metric events Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 22/91] perf tools: Move routines that probe for perf API features to separate file Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 23/91] perf record: Move sb_evlist to 'struct record' Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 24/91] perf top: Move sb_evlist to 'struct perf_top' Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 25/91] perf bpf: Decouple creating the evlist from adding the SB event Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 26/91] perf parse-events: Add parse_events_option() variant that creates evlist Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 27/91] perf evlist: Move the sideband thread routines to separate object Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 28/91] perf evlist: Allow reusing the side band thread for more purposes Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 29/91] libsubcmd: Introduce OPT_CALLBACK_SET() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 30/91] perf record: Introduce --switch-output-event Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 31/91] perf record: Move side band evlist setup to separate routine Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 32/91] libperf: Add NULL pointer check for cpu_map iteration and NULL assignment for all_cpus Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 33/91] perf parse-events: Fix memory leaks found on parse_events Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 34/91] " Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 35/91] perf parse-events: Fix another memory leaks found on parse_events() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 36/91] libperf evlist: Fix a refcount leak Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 37/91] perf tools: Fix reading new topology attribute "core_cpus" Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 38/91] perf tools: Simplify checking if SMT is active Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 39/91] perf thread-stack: Add branch stack support Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 40/91] perf intel-pt: Consolidate thread-stack use condition Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 41/91] perf intel-pt: Change branch stack support to use thread-stacks Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 42/91] perf auxtrace: Add option to synthesize branch stack for regular events Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 43/91] perf evsel: Add support for synthesized branch stack sample type Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 44/91] perf thread-stack: Add thread_stack__br_sample_late() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` Arnaldo Carvalho de Melo [this message]
2020-05-06 15:21 ` [PATCH 46/91] perf intel-pt: Update documentation about itrace G and L options Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 47/91] perf intel-pt: Update documentation about using /proc/kcore Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 48/91] perf evsel: Rename 'struct perf_evsel__sb_cb_t' to 'struct evsel__sb_cb_t' Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 49/91] perf evsel: Rename perf_evsel__nr_cpus() to evsel__nr_cpus() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 50/91] perf evsel: Rename perf_evsel__compute_deltas() to evsel__compute_deltas() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 51/91] perf evsel: Rename perf_evsel__find_pmu() to evsel__find_pmu() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 52/91] perf evsel: Rename perf_evsel__is_aux_event() to evsel__is_aux_event() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 53/91] perf evsel: Rename perf_evsel__exit() to evsel__exit() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 54/91] perf evsel: Rename perf_evsel__config*() to evsel__config*() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 55/91] perf evsel: Rename perf_evsel__calc_id_pos() to evsel__calc_id_pos() Arnaldo Carvalho de Melo
2020-05-06 15:21 ` [PATCH 56/91] perf evsel: Rename __perf_evsel__sample_size() to __evsel__sample_size() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 57/91] perf evsel: Rename *perf_evsel__*name() to *evsel__*name() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 58/91] perf evsel: Rename perf_evsel__group_desc() to evsel__group_desc() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 59/91] perf evsel: Rename *perf_evsel__*set_sample_*() to *evsel__*set_sample_*() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 60/91] perf evsel: Rename perf_evsel__*filter*() to evsel__*filter*() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 61/91] perf evsel: Rename perf_evsel__open_per_*() to evsel__open_per_*() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 62/91] perf evsel: Rename perf_evsel__{str,int}val() and other tracepoint field metehods to to evsel__*() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 63/91] perf mem2node: Avoid double free related to realloc Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 64/91] perf doc: Pass ASCIIDOC_EXTRA as an argument Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 65/91] tools feature: Add support for detecting libpfm4 Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 66/91] perf pmu: Add perf_pmu__find_by_type helper Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 67/91] perf evsel: Rename perf_evsel__is_*() to evsel__is*() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 68/91] perf evsel: Ditch perf_evsel__cmp(), not used for quite a while Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 69/91] perf evsel: Rename *perf_evsel__read*() to *evsel__read() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 70/91] perf evsel: Rename perf_evsel__parse_sample*() to evsel__parse_sample*() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 71/91] perf evsel: Rename perf_evsel__{prev,next}() to evsel__{prev,next}() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 72/91] perf evsel: Rename perf_evsel__has*() to evsel__has*() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 73/91] perf evsel: Rename perf_evsel__fallback() to evsel__fallback() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 74/91] perf evsel: Rename perf_evsel__group_idx() to evsel__group_idx() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 75/91] perf evsel: Rename perf_evsel__env() to evsel__env() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 76/91] perf evsel: Rename perf_evsel__store_ids() to evsel__store_id() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 77/91] perf stat: Rename perf_evsel__*() operating on 'struct evsel *' to evsel__*() Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 78/91] perf kmem: " Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 79/91] perf lock: " Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 80/91] perf sched: " Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 81/91] perf script: " Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 82/91] perf trace: " Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 83/91] perf annotate: " Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 84/91] perf inject: " Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 85/91] perf symbol: Fix kernel symbol address display Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 86/91] perf: cs-etm: Update to build with latest opencsd version Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 87/91] perf bench: Add kallsyms parsing Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 88/91] libsymbols kallsyms: Parse using io api Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 89/91] libsymbols kallsyms: Move hex2u64 out of header Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 90/91] perf cs-etm: Move definition of 'traceid_list' global variable from header file Arnaldo Carvalho de Melo
2020-05-06 15:22 ` [PATCH 91/91] perf flamegraph: Use /bin/bash for report and record scripts Arnaldo Carvalho de Melo

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20200506152234.21977-46-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=adrian.hunter@intel.com \
    --cc=ak@linux.intel.com \
    --cc=jolsa@kernel.org \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=tglx@linutronix.de \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox