linux-perf-users.vger.kernel.org archive mirror
 help / color / mirror / Atom feed
From: Arnaldo Carvalho de Melo <acme@kernel.org>
To: Ingo Molnar <mingo@kernel.org>
Cc: Clark Williams <williams@redhat.com>,
	linux-kernel@vger.kernel.org, linux-perf-users@vger.kernel.org,
	Eric Saint-Etienne <eric.saint.etienne@oracle.com>,
	Alexander Shishkin <alexander.shishkin@linux.intel.com>,
	Jiri Olsa <jolsa@redhat.com>, Namhyung Kim <namhyung@kernel.org>,
	Peter Zijlstra <peterz@infradead.org>,
	Arnaldo Carvalho de Melo <acme@redhat.com>
Subject: [PATCH 27/28] perf symbols: Fix slowness due to -ffunction-section
Date: Thu, 22 Nov 2018 00:36:10 -0300	[thread overview]
Message-ID: <20181122033611.15890-28-acme@kernel.org> (raw)
In-Reply-To: <20181122033611.15890-1-acme@kernel.org>

From: Eric Saint-Etienne <eric.saint.etienne@oracle.com>

Perf can take minutes to parse an image when -ffunction-section is used.
This is especially true with the kernel image when it is compiled this
way, which is the arm64 default since the patcheset "Enable deadcode
elimination at link time".

Perf organize maps using a rbtree. Whenever perf finds a new symbols, it
first searches this rbtree for the map it belongs to, by strcmp()'aring
section names.  When it finds the map with the right name, it uses it to
add the symbol. With a usual image there aren't so many maps but when
using -ffunction-section there's basically one map per function.  With
the kernel image that's north of 40,000 maps. For most symbols perf has
to parses the entire rbtree to eventually create a new map and add it.
Consequently perf spends most of the time browsing a rbtree that keeps
getting larger.

This performance fix introduces a secondary rbtree that indexes maps
based on the section name.

Signed-off-by: Eric Saint-Etienne <eric.saint.etienne@oracle.com>
Reviewed-by: Dave Kleikamp <dave.kleikamp@oracle.com>
Reviewed-by: David Aldridge <david.aldridge@oracle.com>
Reviewed-by: Rob Gardner <rob.gardner@oracle.com>
Cc: Alexander Shishkin <alexander.shishkin@linux.intel.com>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1542822679-25591-1-git-send-email-eric.saint.etienne@oracle.com
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 tools/perf/util/map.c    | 27 +++++++++++++++++++++++++++
 tools/perf/util/map.h    |  2 ++
 tools/perf/util/symbol.c | 15 +++++++++++++--
 3 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c
index 354e54550d2b..781eed8e3265 100644
--- a/tools/perf/util/map.c
+++ b/tools/perf/util/map.c
@@ -21,6 +21,7 @@
 #include "unwind.h"
 
 static void __maps__insert(struct maps *maps, struct map *map);
+static void __maps__insert_name(struct maps *maps, struct map *map);
 
 static inline int is_anon_memory(const char *filename, u32 flags)
 {
@@ -496,6 +497,7 @@ u64 map__objdump_2mem(struct map *map, u64 ip)
 static void maps__init(struct maps *maps)
 {
 	maps->entries = RB_ROOT;
+	maps->names = RB_ROOT;
 	init_rwsem(&maps->lock);
 }
 
@@ -664,6 +666,7 @@ size_t map_groups__fprintf(struct map_groups *mg, FILE *fp)
 static void __map_groups__insert(struct map_groups *mg, struct map *map)
 {
 	__maps__insert(&mg->maps, map);
+	__maps__insert_name(&mg->maps, map);
 	map->groups = mg;
 }
 
@@ -824,10 +827,34 @@ static void __maps__insert(struct maps *maps, struct map *map)
 	map__get(map);
 }
 
+static void __maps__insert_name(struct maps *maps, struct map *map)
+{
+	struct rb_node **p = &maps->names.rb_node;
+	struct rb_node *parent = NULL;
+	struct map *m;
+	int rc;
+
+	while (*p != NULL) {
+		parent = *p;
+		m = rb_entry(parent, struct map, rb_node_name);
+		rc = strcmp(m->dso->short_name, map->dso->short_name);
+		if (rc < 0)
+			p = &(*p)->rb_left;
+		else if (rc  > 0)
+			p = &(*p)->rb_right;
+		else
+			return;
+	}
+	rb_link_node(&map->rb_node_name, parent, p);
+	rb_insert_color(&map->rb_node_name, &maps->names);
+	map__get(map);
+}
+
 void maps__insert(struct maps *maps, struct map *map)
 {
 	down_write(&maps->lock);
 	__maps__insert(maps, map);
+	__maps__insert_name(maps, map);
 	up_write(&maps->lock);
 }
 
diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h
index e0f327b51e66..5c792c90fc4c 100644
--- a/tools/perf/util/map.h
+++ b/tools/perf/util/map.h
@@ -25,6 +25,7 @@ struct map {
 		struct rb_node	rb_node;
 		struct list_head node;
 	};
+	struct rb_node          rb_node_name;
 	u64			start;
 	u64			end;
 	bool			erange_warned;
@@ -57,6 +58,7 @@ struct kmap {
 
 struct maps {
 	struct rb_root	 entries;
+	struct rb_root	 names;
 	struct rw_semaphore lock;
 };
 
diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c
index d188b7588152..dcce74bae6de 100644
--- a/tools/perf/util/symbol.c
+++ b/tools/perf/util/symbol.c
@@ -1680,11 +1680,22 @@ struct map *map_groups__find_by_name(struct map_groups *mg, const char *name)
 {
 	struct maps *maps = &mg->maps;
 	struct map *map;
+	struct rb_node *node;
 
 	down_read(&maps->lock);
 
-	for (map = maps__first(maps); map; map = map__next(map)) {
-		if (map->dso && strcmp(map->dso->short_name, name) == 0)
+	for (node = maps->names.rb_node; node; ) {
+		int rc;
+
+		map = rb_entry(node, struct map, rb_node_name);
+
+		rc = strcmp(map->dso->short_name, name);
+		if (rc < 0)
+			node = node->rb_left;
+		else if (rc > 0)
+			node = node->rb_right;
+		else
+
 			goto out_unlock;
 	}
 
-- 
2.14.5

  parent reply	other threads:[~2018-11-22  3:36 UTC|newest]

Thread overview: 30+ messages / expand[flat|nested]  mbox.gz  Atom feed  top
2018-11-22  3:35 [GIT PULL 00/28] perf/core improvements and fixes Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 01/28] perf bpf: Add unistd.h to the headers accessible to bpf proggies Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 02/28] perf augmented_syscalls: Filter on a hard coded pid Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 03/28] perf augmented_syscalls: Remove needless linux/socket.h include Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 04/28] perf bpf: Add defines for map insertion/lookup Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 05/28] perf bpf: Add simple pid_filter class accessible to BPF proggies Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 06/28] perf augmented_syscalls: Drop 'write', 'poll' for testing without self pid filter Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 07/28] perf augmented_syscalls: Use pid_filter Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 08/28] perf evlist: Rename perf_evlist__set_filter* to perf_evlist__set_tp_filter* Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 09/28] perf trace: Add "_from_option" suffix to trace__set_filter() Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 10/28] perf trace: See if there is a map named "filtered_pids" Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 11/28] perf trace: Fill in BPF "filtered_pids" map when present Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 12/28] perf augmented_syscalls: Remove example hardcoded set of filtered pids Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 13/28] Revert "perf augmented_syscalls: Drop 'write', 'poll' for testing without self pid filter" Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 14/28] perf script: Add newline after uregs output Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 15/28] perf bpf: Reduce the hardcoded .max_entries for pid_maps Arnaldo Carvalho de Melo
2018-11-22  3:35 ` [PATCH 16/28] perf script: Share code and output format for uregs and iregs output Arnaldo Carvalho de Melo
2018-11-22  3:36 ` [PATCH 17/28] perf bench: Move HAVE_PTHREAD_ATTR_SETAFFINITY_NP into bench.h Arnaldo Carvalho de Melo
2018-11-22  3:36 ` [PATCH 18/28] tools build feature: Check if eventfd() is available Arnaldo Carvalho de Melo
2018-11-22  3:36 ` [PATCH 19/28] perf bench: Add epoll parallel epoll_wait benchmark Arnaldo Carvalho de Melo
2024-07-29 11:53   ` Like Xu
2018-11-22  3:36 ` [PATCH 20/28] perf bench: Add epoll_ctl(2) benchmark Arnaldo Carvalho de Melo
2018-11-22  3:36 ` [PATCH 21/28] perf tools: Add Hygon Dhyana support Arnaldo Carvalho de Melo
2018-11-22  3:36 ` [PATCH 22/28] perf pmu: Suppress potential format-truncation warning Arnaldo Carvalho de Melo
2018-11-22  3:36 ` [PATCH 23/28] perf stat: Use perf_evsel__is_clocki() for clock events Arnaldo Carvalho de Melo
2018-11-22  3:36 ` [PATCH 24/28] perf vendor events: Add stepping in CPUID string for x86 Arnaldo Carvalho de Melo
2018-11-22  3:36 ` [PATCH 26/28] perf jvmti: Separate jvmti cmlr check Arnaldo Carvalho de Melo
2018-11-22  3:36 ` Arnaldo Carvalho de Melo [this message]
2018-11-22  3:36 ` [PATCH 28/28] perf pmu: Move *_cpuid_str() weak functions to header.c Arnaldo Carvalho de Melo
2018-11-22  6:54 ` [GIT PULL 00/28] perf/core improvements and fixes Ingo Molnar

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

  Avoid top-posting and favor interleaved quoting:
  https://en.wikipedia.org/wiki/Posting_style#Interleaved_style

* Reply using the --to, --cc, and --in-reply-to
  switches of git-send-email(1):

  git send-email \
    --in-reply-to=20181122033611.15890-28-acme@kernel.org \
    --to=acme@kernel.org \
    --cc=acme@redhat.com \
    --cc=alexander.shishkin@linux.intel.com \
    --cc=eric.saint.etienne@oracle.com \
    --cc=jolsa@redhat.com \
    --cc=linux-kernel@vger.kernel.org \
    --cc=linux-perf-users@vger.kernel.org \
    --cc=mingo@kernel.org \
    --cc=namhyung@kernel.org \
    --cc=peterz@infradead.org \
    --cc=williams@redhat.com \
    /path/to/YOUR_REPLY

  https://kernel.org/pub/software/scm/git/docs/git-send-email.html

* If your mail client supports setting the In-Reply-To header
  via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).