From mboxrd@z Thu Jan 1 00:00:00 1970 Return-Path: X-Spam-Checker-Version: SpamAssassin 3.4.0 (2014-02-07) on aws-us-west-2-korg-lkml-1.web.codeaurora.org X-Spam-Level: X-Spam-Status: No, score=-9.0 required=3.0 tests=HEADER_FROM_DIFFERENT_DOMAINS, INCLUDES_PATCH,MAILING_LIST_MULTI,SIGNED_OFF_BY,SPF_PASS,URIBL_BLOCKED, USER_AGENT_GIT autolearn=ham autolearn_force=no version=3.4.0 Received: from mail.kernel.org (mail.kernel.org [198.145.29.99]) by smtp.lore.kernel.org (Postfix) with ESMTP id A956CC43387 for ; Wed, 9 Jan 2019 09:20:30 +0000 (UTC) Received: from vger.kernel.org (vger.kernel.org [209.132.180.67]) by mail.kernel.org (Postfix) with ESMTP id 764A420821 for ; Wed, 9 Jan 2019 09:20:30 +0000 (UTC) Received: (majordomo@vger.kernel.org) by vger.kernel.org via listexpand id S1730298AbfAIJUY (ORCPT ); Wed, 9 Jan 2019 04:20:24 -0500 Received: from mga07.intel.com ([134.134.136.100]:13930 "EHLO mga07.intel.com" rhost-flags-OK-OK-OK-OK) by vger.kernel.org with ESMTP id S1730281AbfAIJUU (ORCPT ); Wed, 9 Jan 2019 04:20:20 -0500 X-Amp-Result: SKIPPED(no attachment in message) X-Amp-File-Uploaded: False Received: from orsmga007.jf.intel.com ([10.7.209.58]) by orsmga105.jf.intel.com with ESMTP/TLS/DHE-RSA-AES256-GCM-SHA384; 09 Jan 2019 01:20:20 -0800 X-ExtLoop1: 1 X-IronPort-AV: E=Sophos;i="5.56,456,1539673200"; d="scan'208";a="105163049" Received: from ahunter-desktop.fi.intel.com ([10.237.72.56]) by orsmga007.jf.intel.com with ESMTP; 09 Jan 2019 01:20:18 -0800 From: Adrian Hunter To: Arnaldo Carvalho de Melo Cc: Jiri Olsa , linux-kernel@vger.kernel.org Subject: [PATCH 4/6] perf thread-stack: Represent jmps to the start of a different symbol Date: Wed, 9 Jan 2019 11:18:33 +0200 Message-Id: <20190109091835.5570-5-adrian.hunter@intel.com> X-Mailer: git-send-email 2.17.1 In-Reply-To: <20190109091835.5570-1-adrian.hunter@intel.com> References: <20190109091835.5570-1-adrian.hunter@intel.com> Organization: Intel Finland Oy, Registered Address: PL 281, 00181 Helsinki, Business Identity Code: 0357606 - 4, Domiciled in Helsinki Sender: linux-kernel-owner@vger.kernel.org Precedence: bulk List-ID: X-Mailing-List: linux-kernel@vger.kernel.org The compiler might optimize a call/ret combination by making it a jmp. However the thread-stack does not presently cater for that, so that such control flow is not visible in the call graph. Make it visible by recording on the stack a branch to the start of a different symbol. Note, that means when a ret pops the stack, all jmps must be popped off first. Example: $ cat jmp-to-fn.c __attribute__((noinline)) int bar(void) { return -1; } __attribute__((noinline)) int foo(void) { return bar() + 1; } int main() { return foo(); } $ gcc -ggdb3 -Wall -Wextra -O2 -o jmp-to-fn jmp-to-fn.c $ objdump -d jmp-to-fn 0000000000001040
: 1040: 31 c0 xor %eax,%eax 1042: e9 09 01 00 00 jmpq 1150 0000000000001140 : 1140: b8 ff ff ff ff mov $0xffffffff,%eax 1145: c3 retq 0000000000001150 : 1150: 31 c0 xor %eax,%eax 1152: e8 e9 ff ff ff callq 1140 1157: 83 c0 01 add $0x1,%eax 115a: c3 retq $ perf record -o jmp-to-fn.perf.data -e intel_pt/cyc/u ./jmp-to-fn [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0,017 MB jmp-to-fn.perf.data ] $ perf script -i jmp-to-fn.perf.data --itrace=be -s ~/libexec/perf-core/scripts/python/export-to-sqlite.py jmp-to-fn.db branches calls 2019-01-08 13:24:58.783069 Creating database... 2019-01-08 13:24:58.794650 Writing records... 2019-01-08 13:24:59.008050 Adding indexes 2019-01-08 13:24:59.015802 Done $ ~/libexec/perf-core/scripts/python/exported-sql-viewer.py jmp-to-fn.db Before: main -> bar After: main -> foo -> bar Signed-off-by: Adrian Hunter --- .../scripts/python/export-to-postgresql.py | 2 +- tools/perf/scripts/python/export-to-sqlite.py | 2 +- tools/perf/util/thread-stack.c | 30 +++++++++++++++++-- tools/perf/util/thread-stack.h | 3 ++ 4 files changed, 33 insertions(+), 4 deletions(-) diff --git a/tools/perf/scripts/python/export-to-postgresql.py b/tools/perf/scripts/python/export-to-postgresql.py index 0564dd7377f2..30130213da7e 100644 --- a/tools/perf/scripts/python/export-to-postgresql.py +++ b/tools/perf/scripts/python/export-to-postgresql.py @@ -478,7 +478,7 @@ if perf_db_export_calls: 'branch_count,' 'call_id,' 'return_id,' - 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 'parent_call_path_id' ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') diff --git a/tools/perf/scripts/python/export-to-sqlite.py b/tools/perf/scripts/python/export-to-sqlite.py index 245caf2643ed..ed237f2ed03f 100644 --- a/tools/perf/scripts/python/export-to-sqlite.py +++ b/tools/perf/scripts/python/export-to-sqlite.py @@ -320,7 +320,7 @@ if perf_db_export_calls: 'branch_count,' 'call_id,' 'return_id,' - 'CASE WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' ELSE \'\' END AS flags,' + 'CASE WHEN flags=0 THEN \'\' WHEN flags=1 THEN \'no call\' WHEN flags=2 THEN \'no return\' WHEN flags=3 THEN \'no call/return\' WHEN flags=6 THEN \'jump\' ELSE flags END AS flags,' 'parent_call_path_id' ' FROM calls INNER JOIN call_paths ON call_paths.id = call_path_id') diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 7f8eff018c16..f52c0f90915d 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -38,6 +38,7 @@ * @cp: call path * @no_call: a 'call' was not seen * @trace_end: a 'call' but trace ended + * @non_call: a branch but not a 'call' to the start of a different symbol */ struct thread_stack_entry { u64 ret_addr; @@ -47,6 +48,7 @@ struct thread_stack_entry { struct call_path *cp; bool no_call; bool trace_end; + bool non_call; }; /** @@ -268,6 +270,8 @@ static int thread_stack__call_return(struct thread *thread, cr.flags |= CALL_RETURN_NO_CALL; if (no_return) cr.flags |= CALL_RETURN_NO_RETURN; + if (tse->non_call) + cr.flags |= CALL_RETURN_NON_CALL; return crp->process(&cr, crp->data); } @@ -510,6 +514,7 @@ static int thread_stack__push_cp(struct thread_stack *ts, u64 ret_addr, tse->cp = cp; tse->no_call = no_call; tse->trace_end = trace_end; + tse->non_call = false; return 0; } @@ -531,14 +536,16 @@ static int thread_stack__pop_cp(struct thread *thread, struct thread_stack *ts, timestamp, ref, false); } - if (ts->stack[ts->cnt - 1].ret_addr == ret_addr) { + if (ts->stack[ts->cnt - 1].ret_addr == ret_addr && + !ts->stack[ts->cnt - 1].non_call) { return thread_stack__call_return(thread, ts, --ts->cnt, timestamp, ref, false); } else { size_t i = ts->cnt - 1; while (i--) { - if (ts->stack[i].ret_addr != ret_addr) + if (ts->stack[i].ret_addr != ret_addr || + ts->stack[i].non_call) continue; i += 1; while (ts->cnt > i) { @@ -757,6 +764,25 @@ int thread_stack__process(struct thread *thread, struct comm *comm, err = thread_stack__trace_begin(thread, ts, sample->time, ref); } else if (sample->flags & PERF_IP_FLAG_TRACE_END) { err = thread_stack__trace_end(ts, sample, ref); + } else if (sample->flags & PERF_IP_FLAG_BRANCH && + from_al->sym != to_al->sym && to_al->sym && + to_al->addr == to_al->sym->start) { + struct call_path_root *cpr = ts->crp->cpr; + struct call_path *cp; + + /* + * The compiler might optimize a call/ret combination by making + * it a jmp. Make that visible by recording on the stack a + * branch to the start of a different symbol. Note, that means + * when a ret pops the stack, all jmps must be popped off first. + */ + cp = call_path__findnew(cpr, ts->stack[ts->cnt - 1].cp, + to_al->sym, sample->addr, + ts->kernel_start); + err = thread_stack__push_cp(ts, 0, sample->time, ref, cp, false, + false); + if (!err) + ts->stack[ts->cnt - 1].non_call = true; } return err; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index 1f626f4a1c40..b7c04e19ad41 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -35,10 +35,13 @@ struct call_path; * * CALL_RETURN_NO_CALL: 'return' but no matching 'call' * CALL_RETURN_NO_RETURN: 'call' but no matching 'return' + * CALL_RETURN_NON_CALL: a branch but not a 'call' to the start of a different + * symbol */ enum { CALL_RETURN_NO_CALL = 1 << 0, CALL_RETURN_NO_RETURN = 1 << 1, + CALL_RETURN_NON_CALL = 1 << 2, }; /** -- 2.17.1