From: tip-bot for Namhyung Kim <tipbot@zytor.com>
To: linux-tip-commits@vger.kernel.org
Cc: acme@redhat.com, linux-kernel@vger.kernel.org, paulus@samba.org,
hpa@zytor.com, mingo@kernel.org, a.p.zijlstra@chello.nl,
torvalds@linux-foundation.org, namhyung.kim@lge.com,
namhyung@kernel.org, jolsa@redhat.com, fweisbec@gmail.com,
tglx@linutronix.de
Subject: [tip:perf/core] perf callchain: Convert children list to rbtree
Date: Wed, 23 Oct 2013 00:54:48 -0700 [thread overview]
Message-ID: <tip-e369517ce5f796945c6af047b4e8b1d650e03458@git.kernel.org> (raw)
In-Reply-To: <1381468543-25334-2-git-send-email-namhyung@kernel.org>
Commit-ID: e369517ce5f796945c6af047b4e8b1d650e03458
Gitweb: http://git.kernel.org/tip/e369517ce5f796945c6af047b4e8b1d650e03458
Author: Namhyung Kim <namhyung.kim@lge.com>
AuthorDate: Fri, 11 Oct 2013 14:15:36 +0900
Committer: Arnaldo Carvalho de Melo <acme@redhat.com>
CommitDate: Mon, 21 Oct 2013 17:33:23 -0300
perf callchain: Convert children list to rbtree
Current collapse stage has a scalability problem which can be reproduced
easily with a parallel kernel build.
This is because it needs to traverse every children of callchains
linearly during the collapse/merge stage.
Converting it to a rbtree reduced the overhead significantly.
On my 400MB perf.data file which recorded with make -j32 kernel build:
$ time perf --no-pager report --stdio > /dev/null
before:
real 6m22.073s
user 6m18.683s
sys 0m0.706s
after:
real 0m20.780s
user 0m19.962s
sys 0m0.689s
During the perf report the overhead on append_chain_children went down
from 96.69% to 18.16%:
- 18.16% perf perf [.] append_chain_children
- append_chain_children
- 77.48% append_chain_children
+ 69.79% merge_chain_branch
- 22.96% append_chain_children
+ 67.44% merge_chain_branch
+ 30.15% append_chain_children
+ 2.41% callchain_append
+ 7.25% callchain_append
+ 12.26% callchain_append
+ 10.22% merge_chain_branch
+ 11.58% perf perf [.] dso__find_symbol
+ 8.02% perf perf [.] sort__comm_cmp
+ 5.48% perf libc-2.17.so [.] malloc_consolidate
Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Namhyung Kim <namhyung@kernel.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/r/1381468543-25334-2-git-send-email-namhyung@kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
tools/perf/util/callchain.c | 147 +++++++++++++++++++++++++++++++++-----------
tools/perf/util/callchain.h | 11 ++--
2 files changed, 116 insertions(+), 42 deletions(-)
diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c
index 482f680..e3970e3 100644
--- a/tools/perf/util/callchain.c
+++ b/tools/perf/util/callchain.c
@@ -21,12 +21,6 @@
__thread struct callchain_cursor callchain_cursor;
-#define chain_for_each_child(child, parent) \
- list_for_each_entry(child, &parent->children, siblings)
-
-#define chain_for_each_child_safe(child, next, parent) \
- list_for_each_entry_safe(child, next, &parent->children, siblings)
-
static void
rb_insert_callchain(struct rb_root *root, struct callchain_node *chain,
enum chain_mode mode)
@@ -71,10 +65,16 @@ static void
__sort_chain_flat(struct rb_root *rb_root, struct callchain_node *node,
u64 min_hit)
{
+ struct rb_node *n;
struct callchain_node *child;
- chain_for_each_child(child, node)
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
__sort_chain_flat(rb_root, child, min_hit);
+ }
if (node->hit && node->hit >= min_hit)
rb_insert_callchain(rb_root, node, CHAIN_FLAT);
@@ -94,11 +94,16 @@ sort_chain_flat(struct rb_root *rb_root, struct callchain_root *root,
static void __sort_chain_graph_abs(struct callchain_node *node,
u64 min_hit)
{
+ struct rb_node *n;
struct callchain_node *child;
node->rb_root = RB_ROOT;
+ n = rb_first(&node->rb_root_in);
+
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
- chain_for_each_child(child, node) {
__sort_chain_graph_abs(child, min_hit);
if (callchain_cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
@@ -117,13 +122,18 @@ sort_chain_graph_abs(struct rb_root *rb_root, struct callchain_root *chain_root,
static void __sort_chain_graph_rel(struct callchain_node *node,
double min_percent)
{
+ struct rb_node *n;
struct callchain_node *child;
u64 min_hit;
node->rb_root = RB_ROOT;
min_hit = ceil(node->children_hit * min_percent);
- chain_for_each_child(child, node) {
+ n = rb_first(&node->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+
__sort_chain_graph_rel(child, min_percent);
if (callchain_cumul_hits(child) >= min_hit)
rb_insert_callchain(&node->rb_root, child,
@@ -173,19 +183,26 @@ create_child(struct callchain_node *parent, bool inherit_children)
return NULL;
}
new->parent = parent;
- INIT_LIST_HEAD(&new->children);
INIT_LIST_HEAD(&new->val);
if (inherit_children) {
- struct callchain_node *next;
+ struct rb_node *n;
+ struct callchain_node *child;
+
+ new->rb_root_in = parent->rb_root_in;
+ parent->rb_root_in = RB_ROOT;
- list_splice(&parent->children, &new->children);
- INIT_LIST_HEAD(&parent->children);
+ n = rb_first(&new->rb_root_in);
+ while (n) {
+ child = rb_entry(n, struct callchain_node, rb_node_in);
+ child->parent = new;
+ n = rb_next(n);
+ }
- chain_for_each_child(next, new)
- next->parent = new;
+ /* make it the first child */
+ rb_link_node(&new->rb_node_in, NULL, &parent->rb_root_in.rb_node);
+ rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
}
- list_add_tail(&new->siblings, &parent->children);
return new;
}
@@ -223,7 +240,7 @@ fill_node(struct callchain_node *node, struct callchain_cursor *cursor)
}
}
-static void
+static struct callchain_node *
add_child(struct callchain_node *parent,
struct callchain_cursor *cursor,
u64 period)
@@ -235,6 +252,19 @@ add_child(struct callchain_node *parent,
new->children_hit = 0;
new->hit = period;
+ return new;
+}
+
+static s64 match_chain(struct callchain_cursor_node *node,
+ struct callchain_list *cnode)
+{
+ struct symbol *sym = node->sym;
+
+ if (cnode->ms.sym && sym &&
+ callchain_param.key == CCKEY_FUNCTION)
+ return cnode->ms.sym->start - sym->start;
+ else
+ return cnode->ip - node->ip;
}
/*
@@ -272,9 +302,33 @@ split_add_child(struct callchain_node *parent,
/* create a new child for the new branch if any */
if (idx_total < cursor->nr) {
+ struct callchain_node *first;
+ struct callchain_list *cnode;
+ struct callchain_cursor_node *node;
+ struct rb_node *p, **pp;
+
parent->hit = 0;
- add_child(parent, cursor, period);
parent->children_hit += period;
+
+ node = callchain_cursor_current(cursor);
+ new = add_child(parent, cursor, period);
+
+ /*
+ * This is second child since we moved parent's children
+ * to new (first) child above.
+ */
+ p = parent->rb_root_in.rb_node;
+ first = rb_entry(p, struct callchain_node, rb_node_in);
+ cnode = list_first_entry(&first->val, struct callchain_list,
+ list);
+
+ if (match_chain(node, cnode) < 0)
+ pp = &p->rb_left;
+ else
+ pp = &p->rb_right;
+
+ rb_link_node(&new->rb_node_in, p, pp);
+ rb_insert_color(&new->rb_node_in, &parent->rb_root_in);
} else {
parent->hit = period;
}
@@ -291,16 +345,40 @@ append_chain_children(struct callchain_node *root,
u64 period)
{
struct callchain_node *rnode;
+ struct callchain_cursor_node *node;
+ struct rb_node **p = &root->rb_root_in.rb_node;
+ struct rb_node *parent = NULL;
+
+ node = callchain_cursor_current(cursor);
+ if (!node)
+ return;
/* lookup in childrens */
- chain_for_each_child(rnode, root) {
- unsigned int ret = append_chain(rnode, cursor, period);
+ while (*p) {
+ s64 ret;
+ struct callchain_list *cnode;
- if (!ret)
+ parent = *p;
+ rnode = rb_entry(parent, struct callchain_node, rb_node_in);
+ cnode = list_first_entry(&rnode->val, struct callchain_list,
+ list);
+
+ /* just check first entry */
+ ret = match_chain(node, cnode);
+ if (ret == 0) {
+ append_chain(rnode, cursor, period);
goto inc_children_hit;
+ }
+
+ if (ret < 0)
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
}
/* nothing in children, add to the current node */
- add_child(root, cursor, period);
+ rnode = add_child(root, cursor, period);
+ rb_link_node(&rnode->rb_node_in, parent, p);
+ rb_insert_color(&rnode->rb_node_in, &root->rb_root_in);
inc_children_hit:
root->children_hit += period;
@@ -325,28 +403,20 @@ append_chain(struct callchain_node *root,
*/
list_for_each_entry(cnode, &root->val, list) {
struct callchain_cursor_node *node;
- struct symbol *sym;
node = callchain_cursor_current(cursor);
if (!node)
break;
- sym = node->sym;
-
- if (cnode->ms.sym && sym &&
- callchain_param.key == CCKEY_FUNCTION) {
- if (cnode->ms.sym->start != sym->start)
- break;
- } else if (cnode->ip != node->ip)
+ if (match_chain(node, cnode) != 0)
break;
- if (!found)
- found = true;
+ found = true;
callchain_cursor_advance(cursor);
}
- /* matches not, relay on the parent */
+ /* matches not, relay no the parent */
if (!found) {
cursor->curr = curr_snap;
cursor->pos = start;
@@ -395,8 +465,9 @@ merge_chain_branch(struct callchain_cursor *cursor,
struct callchain_node *dst, struct callchain_node *src)
{
struct callchain_cursor_node **old_last = cursor->last;
- struct callchain_node *child, *next_child;
+ struct callchain_node *child;
struct callchain_list *list, *next_list;
+ struct rb_node *n;
int old_pos = cursor->nr;
int err = 0;
@@ -412,12 +483,16 @@ merge_chain_branch(struct callchain_cursor *cursor,
append_chain_children(dst, cursor, src->hit);
}
- chain_for_each_child_safe(child, next_child, src) {
+ n = rb_first(&src->rb_root_in);
+ while (n) {
+ child = container_of(n, struct callchain_node, rb_node_in);
+ n = rb_next(n);
+ rb_erase(&child->rb_node_in, &src->rb_root_in);
+
err = merge_chain_branch(cursor, dst, child);
if (err)
break;
- list_del(&child->siblings);
free(child);
}
diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
index 2b585bc..7bb3602 100644
--- a/tools/perf/util/callchain.h
+++ b/tools/perf/util/callchain.h
@@ -21,11 +21,11 @@ enum chain_order {
struct callchain_node {
struct callchain_node *parent;
- struct list_head siblings;
- struct list_head children;
struct list_head val;
- struct rb_node rb_node; /* to sort nodes in an rbtree */
- struct rb_root rb_root; /* sorted tree of children */
+ struct rb_node rb_node_in; /* to insert nodes in an rbtree */
+ struct rb_node rb_node; /* to sort nodes in an output tree */
+ struct rb_root rb_root_in; /* input tree of children */
+ struct rb_root rb_root; /* sorted output tree of children */
unsigned int val_nr;
u64 hit;
u64 children_hit;
@@ -86,13 +86,12 @@ extern __thread struct callchain_cursor callchain_cursor;
static inline void callchain_init(struct callchain_root *root)
{
- INIT_LIST_HEAD(&root->node.siblings);
- INIT_LIST_HEAD(&root->node.children);
INIT_LIST_HEAD(&root->node.val);
root->node.parent = NULL;
root->node.hit = 0;
root->node.children_hit = 0;
+ root->node.rb_root_in = RB_ROOT;
root->max_depth = 0;
}
next prev parent reply other threads:[~2013-10-23 7:55 UTC|newest]
Thread overview: 88+ messages / expand[flat|nested] mbox.gz Atom feed top
2013-10-11 5:15 [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Namhyung Kim
2013-10-11 5:15 ` [PATCH 1/8] perf callchain: Convert children list to rbtree Namhyung Kim
2013-10-23 7:54 ` tip-bot for Namhyung Kim [this message]
2013-10-23 11:07 ` [tip:perf/core] " Frederic Weisbecker
2013-10-23 12:45 ` Arnaldo Carvalho de Melo
2013-10-11 5:15 ` [PATCH 2/8] perf ui/progress: Add new helper functions for progress bar Namhyung Kim
2013-10-21 18:13 ` Arnaldo Carvalho de Melo
2013-10-22 18:12 ` Namhyung Kim
2013-10-22 13:06 ` Arnaldo Carvalho de Melo
2013-10-11 5:15 ` [PATCH 3/8] perf tools: Show progress on histogram collapsing Namhyung Kim
2013-10-25 10:33 ` [tip:perf/core] " tip-bot for Namhyung Kim
2013-10-28 15:03 ` [tip:perf/urgent] perf script python: Fix mem leak due to missing Py_DECREFs on dict entries tip-bot for Joseph Schuchart
2013-10-11 5:15 ` [PATCH 4/8] perf tools: Use an accessor to read thread comm Namhyung Kim
2013-10-11 5:15 ` [PATCH 5/8] perf tools: Add time argument on comm setting Namhyung Kim
2013-10-11 5:15 ` [PATCH 6/8] perf tools: Add new comm infrastructure Namhyung Kim
2013-10-25 10:56 ` Frederic Weisbecker
2013-10-25 13:04 ` Arnaldo Carvalho de Melo
2013-10-25 15:33 ` David Ahern
2013-10-25 18:12 ` Frederic Weisbecker
2013-10-25 18:14 ` Arnaldo Carvalho de Melo
2013-10-25 18:19 ` David Ahern
2013-10-28 5:38 ` Namhyung Kim
2013-10-28 9:09 ` Frederic Weisbecker
2013-10-28 9:15 ` Namhyung Kim
2013-10-28 10:12 ` Frederic Weisbecker
2013-10-28 12:43 ` Arnaldo Carvalho de Melo
2013-10-28 14:29 ` Arnaldo Carvalho de Melo
2013-10-28 16:05 ` Frederic Weisbecker
2013-10-28 17:01 ` Arnaldo Carvalho de Melo
2013-10-28 17:48 ` Arnaldo Carvalho de Melo
2013-10-29 9:20 ` Frederic Weisbecker
2013-10-29 13:06 ` Arnaldo Carvalho de Melo
2013-10-11 5:15 ` [PATCH 7/8] perf tools: Compare hists comm by addresses Namhyung Kim
2013-11-04 20:19 ` [tip:perf/core] " tip-bot for Frederic Weisbecker
2013-10-11 5:15 ` [PATCH 8/8] perf tools: Get current comm instead of last one Namhyung Kim
2013-10-11 5:58 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
2013-10-11 7:34 ` Jiri Olsa
2013-10-11 8:24 ` Namhyung Kim
2013-10-11 12:59 ` Ingo Molnar
2013-10-11 13:04 ` Peter Zijlstra
2013-10-11 15:11 ` David Ahern
2013-10-11 15:20 ` David Ahern
2013-10-11 21:51 ` Andi Kleen
2013-10-11 22:04 ` David Ahern
2013-10-13 10:25 ` Jiri Olsa
2013-10-13 21:18 ` [RFC] perf record,top: Add callchain option into .perfconfig Jiri Olsa
2013-10-13 21:32 ` Andi Kleen
2013-10-14 7:56 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Ingo Molnar
2013-10-12 16:53 ` Ingo Molnar
2013-10-12 19:42 ` David Ahern
2013-10-13 5:23 ` Ingo Molnar
2013-10-25 19:09 ` RFP: Fixing "-ga -ag -g fp -g dwarf" was " Arnaldo Carvalho de Melo
2013-10-25 19:22 ` David Ahern
2013-10-25 19:46 ` Arnaldo Carvalho de Melo
2013-10-26 12:03 ` Ingo Molnar
2013-10-26 12:35 ` Jiri Olsa
2013-10-26 14:25 ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
2013-10-26 14:25 ` [PATCH 1/4] perf tools: Split -g and --call-graph for record command Jiri Olsa
2013-10-27 15:30 ` David Ahern
2013-10-28 17:46 ` Arnaldo Carvalho de Melo
2013-10-28 18:20 ` David Ahern
2013-10-29 5:13 ` Namhyung Kim
2013-10-28 7:59 ` Namhyung Kim
2013-10-29 10:18 ` Jiri Olsa
2013-10-29 12:42 ` Arnaldo Carvalho de Melo
2013-10-29 8:22 ` [tip:perf/urgent] perf record: Split -g and --call-graph tip-bot for Jiri Olsa
2013-10-26 14:25 ` [PATCH 2/4] perf tools: Split -G and --call-graph for top command Jiri Olsa
2013-10-27 15:34 ` David Ahern
2013-10-28 8:06 ` Namhyung Kim
2013-10-29 8:22 ` [tip:perf/urgent] perf top: Split -G and --call-graph tip-bot for Jiri Olsa
2013-10-26 14:25 ` [PATCH 3/4] perf tools: Add call-graph option support into .perfconfig Jiri Olsa
2013-10-27 15:36 ` David Ahern
2013-10-28 8:10 ` Namhyung Kim
2013-10-29 10:18 ` Jiri Olsa
2013-10-29 12:43 ` Arnaldo Carvalho de Melo
2013-10-29 12:46 ` Ingo Molnar
2013-11-01 15:20 ` Jiri Olsa
2013-10-26 14:25 ` [PATCH 4/4] perf tools: Add readable output for callchain debug Jiri Olsa
2013-10-27 15:39 ` David Ahern
2013-10-26 14:32 ` [PATCH 0/4] perf tools: Fix -g option handling Jiri Olsa
2013-10-27 6:56 ` Ingo Molnar
2013-10-29 10:21 ` Jiri Olsa
2013-10-29 10:25 ` Ingo Molnar
2013-10-13 12:34 ` [PATCHSET 0/8] perf tools: Fix scalability problem on callchain merging (v5) Jiri Olsa
2013-10-14 1:06 ` Namhyung Kim
2013-10-14 4:50 ` Namhyung Kim
2013-10-14 8:01 ` Jiri Olsa
2013-10-14 8:41 ` Namhyung Kim
Reply instructions:
You may reply publicly to this message via plain-text email
using any one of the following methods:
* Save the following mbox file, import it into your mail client,
and reply-to-all from there: mbox
Avoid top-posting and favor interleaved quoting:
https://en.wikipedia.org/wiki/Posting_style#Interleaved_style
* Reply using the --to, --cc, and --in-reply-to
switches of git-send-email(1):
git send-email \
--in-reply-to=tip-e369517ce5f796945c6af047b4e8b1d650e03458@git.kernel.org \
--to=tipbot@zytor.com \
--cc=a.p.zijlstra@chello.nl \
--cc=acme@redhat.com \
--cc=fweisbec@gmail.com \
--cc=hpa@zytor.com \
--cc=jolsa@redhat.com \
--cc=linux-kernel@vger.kernel.org \
--cc=linux-tip-commits@vger.kernel.org \
--cc=mingo@kernel.org \
--cc=namhyung.kim@lge.com \
--cc=namhyung@kernel.org \
--cc=paulus@samba.org \
--cc=tglx@linutronix.de \
--cc=torvalds@linux-foundation.org \
/path/to/YOUR_REPLY
https://kernel.org/pub/software/scm/git/docs/git-send-email.html
* If your mail client supports setting the In-Reply-To header
via mailto: links, try the mailto: link
Be sure your reply has a Subject: header at the top and a blank line
before the message body.
This is a public inbox, see mirroring instructions
for how to clone and mirror all data and code used for this inbox;
as well as URLs for NNTP newsgroup(s).